]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
poly_int: IN_TARGET_CODE
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "common/common-target.h"
55 #include "gimplify.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "params.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63
64 /* This file should be included last. */
65 #include "target-def.h"
66
67 /* Processor costs */
68
69 struct processor_costs {
70 /* Integer load */
71 const int int_load;
72
73 /* Integer signed load */
74 const int int_sload;
75
76 /* Integer zeroed load */
77 const int int_zload;
78
79 /* Float load */
80 const int float_load;
81
82 /* fmov, fneg, fabs */
83 const int float_move;
84
85 /* fadd, fsub */
86 const int float_plusminus;
87
88 /* fcmp */
89 const int float_cmp;
90
91 /* fmov, fmovr */
92 const int float_cmove;
93
94 /* fmul */
95 const int float_mul;
96
97 /* fdivs */
98 const int float_div_sf;
99
100 /* fdivd */
101 const int float_div_df;
102
103 /* fsqrts */
104 const int float_sqrt_sf;
105
106 /* fsqrtd */
107 const int float_sqrt_df;
108
109 /* umul/smul */
110 const int int_mul;
111
112 /* mulX */
113 const int int_mulX;
114
115 /* integer multiply cost for each bit set past the most
116 significant 3, so the formula for multiply cost becomes:
117
118 if (rs1 < 0)
119 highest_bit = highest_clear_bit(rs1);
120 else
121 highest_bit = highest_set_bit(rs1);
122 if (highest_bit < 3)
123 highest_bit = 3;
124 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
125
126 A value of zero indicates that the multiply costs is fixed,
127 and not variable. */
128 const int int_mul_bit_factor;
129
130 /* udiv/sdiv */
131 const int int_div;
132
133 /* divX */
134 const int int_divX;
135
136 /* movcc, movr */
137 const int int_cmove;
138
139 /* penalty for shifts, due to scheduling rules etc. */
140 const int shift_penalty;
141 };
142
143 static const
144 struct processor_costs cypress_costs = {
145 COSTS_N_INSNS (2), /* int load */
146 COSTS_N_INSNS (2), /* int signed load */
147 COSTS_N_INSNS (2), /* int zeroed load */
148 COSTS_N_INSNS (2), /* float load */
149 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
150 COSTS_N_INSNS (5), /* fadd, fsub */
151 COSTS_N_INSNS (1), /* fcmp */
152 COSTS_N_INSNS (1), /* fmov, fmovr */
153 COSTS_N_INSNS (7), /* fmul */
154 COSTS_N_INSNS (37), /* fdivs */
155 COSTS_N_INSNS (37), /* fdivd */
156 COSTS_N_INSNS (63), /* fsqrts */
157 COSTS_N_INSNS (63), /* fsqrtd */
158 COSTS_N_INSNS (1), /* imul */
159 COSTS_N_INSNS (1), /* imulX */
160 0, /* imul bit factor */
161 COSTS_N_INSNS (1), /* idiv */
162 COSTS_N_INSNS (1), /* idivX */
163 COSTS_N_INSNS (1), /* movcc/movr */
164 0, /* shift penalty */
165 };
166
167 static const
168 struct processor_costs supersparc_costs = {
169 COSTS_N_INSNS (1), /* int load */
170 COSTS_N_INSNS (1), /* int signed load */
171 COSTS_N_INSNS (1), /* int zeroed load */
172 COSTS_N_INSNS (0), /* float load */
173 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
174 COSTS_N_INSNS (3), /* fadd, fsub */
175 COSTS_N_INSNS (3), /* fcmp */
176 COSTS_N_INSNS (1), /* fmov, fmovr */
177 COSTS_N_INSNS (3), /* fmul */
178 COSTS_N_INSNS (6), /* fdivs */
179 COSTS_N_INSNS (9), /* fdivd */
180 COSTS_N_INSNS (12), /* fsqrts */
181 COSTS_N_INSNS (12), /* fsqrtd */
182 COSTS_N_INSNS (4), /* imul */
183 COSTS_N_INSNS (4), /* imulX */
184 0, /* imul bit factor */
185 COSTS_N_INSNS (4), /* idiv */
186 COSTS_N_INSNS (4), /* idivX */
187 COSTS_N_INSNS (1), /* movcc/movr */
188 1, /* shift penalty */
189 };
190
191 static const
192 struct processor_costs hypersparc_costs = {
193 COSTS_N_INSNS (1), /* int load */
194 COSTS_N_INSNS (1), /* int signed load */
195 COSTS_N_INSNS (1), /* int zeroed load */
196 COSTS_N_INSNS (1), /* float load */
197 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
198 COSTS_N_INSNS (1), /* fadd, fsub */
199 COSTS_N_INSNS (1), /* fcmp */
200 COSTS_N_INSNS (1), /* fmov, fmovr */
201 COSTS_N_INSNS (1), /* fmul */
202 COSTS_N_INSNS (8), /* fdivs */
203 COSTS_N_INSNS (12), /* fdivd */
204 COSTS_N_INSNS (17), /* fsqrts */
205 COSTS_N_INSNS (17), /* fsqrtd */
206 COSTS_N_INSNS (17), /* imul */
207 COSTS_N_INSNS (17), /* imulX */
208 0, /* imul bit factor */
209 COSTS_N_INSNS (17), /* idiv */
210 COSTS_N_INSNS (17), /* idivX */
211 COSTS_N_INSNS (1), /* movcc/movr */
212 0, /* shift penalty */
213 };
214
215 static const
216 struct processor_costs leon_costs = {
217 COSTS_N_INSNS (1), /* int load */
218 COSTS_N_INSNS (1), /* int signed load */
219 COSTS_N_INSNS (1), /* int zeroed load */
220 COSTS_N_INSNS (1), /* float load */
221 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
222 COSTS_N_INSNS (1), /* fadd, fsub */
223 COSTS_N_INSNS (1), /* fcmp */
224 COSTS_N_INSNS (1), /* fmov, fmovr */
225 COSTS_N_INSNS (1), /* fmul */
226 COSTS_N_INSNS (15), /* fdivs */
227 COSTS_N_INSNS (15), /* fdivd */
228 COSTS_N_INSNS (23), /* fsqrts */
229 COSTS_N_INSNS (23), /* fsqrtd */
230 COSTS_N_INSNS (5), /* imul */
231 COSTS_N_INSNS (5), /* imulX */
232 0, /* imul bit factor */
233 COSTS_N_INSNS (5), /* idiv */
234 COSTS_N_INSNS (5), /* idivX */
235 COSTS_N_INSNS (1), /* movcc/movr */
236 0, /* shift penalty */
237 };
238
239 static const
240 struct processor_costs leon3_costs = {
241 COSTS_N_INSNS (1), /* int load */
242 COSTS_N_INSNS (1), /* int signed load */
243 COSTS_N_INSNS (1), /* int zeroed load */
244 COSTS_N_INSNS (1), /* float load */
245 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
246 COSTS_N_INSNS (1), /* fadd, fsub */
247 COSTS_N_INSNS (1), /* fcmp */
248 COSTS_N_INSNS (1), /* fmov, fmovr */
249 COSTS_N_INSNS (1), /* fmul */
250 COSTS_N_INSNS (14), /* fdivs */
251 COSTS_N_INSNS (15), /* fdivd */
252 COSTS_N_INSNS (22), /* fsqrts */
253 COSTS_N_INSNS (23), /* fsqrtd */
254 COSTS_N_INSNS (5), /* imul */
255 COSTS_N_INSNS (5), /* imulX */
256 0, /* imul bit factor */
257 COSTS_N_INSNS (35), /* idiv */
258 COSTS_N_INSNS (35), /* idivX */
259 COSTS_N_INSNS (1), /* movcc/movr */
260 0, /* shift penalty */
261 };
262
263 static const
264 struct processor_costs sparclet_costs = {
265 COSTS_N_INSNS (3), /* int load */
266 COSTS_N_INSNS (3), /* int signed load */
267 COSTS_N_INSNS (1), /* int zeroed load */
268 COSTS_N_INSNS (1), /* float load */
269 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
270 COSTS_N_INSNS (1), /* fadd, fsub */
271 COSTS_N_INSNS (1), /* fcmp */
272 COSTS_N_INSNS (1), /* fmov, fmovr */
273 COSTS_N_INSNS (1), /* fmul */
274 COSTS_N_INSNS (1), /* fdivs */
275 COSTS_N_INSNS (1), /* fdivd */
276 COSTS_N_INSNS (1), /* fsqrts */
277 COSTS_N_INSNS (1), /* fsqrtd */
278 COSTS_N_INSNS (5), /* imul */
279 COSTS_N_INSNS (5), /* imulX */
280 0, /* imul bit factor */
281 COSTS_N_INSNS (5), /* idiv */
282 COSTS_N_INSNS (5), /* idivX */
283 COSTS_N_INSNS (1), /* movcc/movr */
284 0, /* shift penalty */
285 };
286
287 static const
288 struct processor_costs ultrasparc_costs = {
289 COSTS_N_INSNS (2), /* int load */
290 COSTS_N_INSNS (3), /* int signed load */
291 COSTS_N_INSNS (2), /* int zeroed load */
292 COSTS_N_INSNS (2), /* float load */
293 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
294 COSTS_N_INSNS (4), /* fadd, fsub */
295 COSTS_N_INSNS (1), /* fcmp */
296 COSTS_N_INSNS (2), /* fmov, fmovr */
297 COSTS_N_INSNS (4), /* fmul */
298 COSTS_N_INSNS (13), /* fdivs */
299 COSTS_N_INSNS (23), /* fdivd */
300 COSTS_N_INSNS (13), /* fsqrts */
301 COSTS_N_INSNS (23), /* fsqrtd */
302 COSTS_N_INSNS (4), /* imul */
303 COSTS_N_INSNS (4), /* imulX */
304 2, /* imul bit factor */
305 COSTS_N_INSNS (37), /* idiv */
306 COSTS_N_INSNS (68), /* idivX */
307 COSTS_N_INSNS (2), /* movcc/movr */
308 2, /* shift penalty */
309 };
310
311 static const
312 struct processor_costs ultrasparc3_costs = {
313 COSTS_N_INSNS (2), /* int load */
314 COSTS_N_INSNS (3), /* int signed load */
315 COSTS_N_INSNS (3), /* int zeroed load */
316 COSTS_N_INSNS (2), /* float load */
317 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
318 COSTS_N_INSNS (4), /* fadd, fsub */
319 COSTS_N_INSNS (5), /* fcmp */
320 COSTS_N_INSNS (3), /* fmov, fmovr */
321 COSTS_N_INSNS (4), /* fmul */
322 COSTS_N_INSNS (17), /* fdivs */
323 COSTS_N_INSNS (20), /* fdivd */
324 COSTS_N_INSNS (20), /* fsqrts */
325 COSTS_N_INSNS (29), /* fsqrtd */
326 COSTS_N_INSNS (6), /* imul */
327 COSTS_N_INSNS (6), /* imulX */
328 0, /* imul bit factor */
329 COSTS_N_INSNS (40), /* idiv */
330 COSTS_N_INSNS (71), /* idivX */
331 COSTS_N_INSNS (2), /* movcc/movr */
332 0, /* shift penalty */
333 };
334
335 static const
336 struct processor_costs niagara_costs = {
337 COSTS_N_INSNS (3), /* int load */
338 COSTS_N_INSNS (3), /* int signed load */
339 COSTS_N_INSNS (3), /* int zeroed load */
340 COSTS_N_INSNS (9), /* float load */
341 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
342 COSTS_N_INSNS (8), /* fadd, fsub */
343 COSTS_N_INSNS (26), /* fcmp */
344 COSTS_N_INSNS (8), /* fmov, fmovr */
345 COSTS_N_INSNS (29), /* fmul */
346 COSTS_N_INSNS (54), /* fdivs */
347 COSTS_N_INSNS (83), /* fdivd */
348 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
349 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
350 COSTS_N_INSNS (11), /* imul */
351 COSTS_N_INSNS (11), /* imulX */
352 0, /* imul bit factor */
353 COSTS_N_INSNS (72), /* idiv */
354 COSTS_N_INSNS (72), /* idivX */
355 COSTS_N_INSNS (1), /* movcc/movr */
356 0, /* shift penalty */
357 };
358
359 static const
360 struct processor_costs niagara2_costs = {
361 COSTS_N_INSNS (3), /* int load */
362 COSTS_N_INSNS (3), /* int signed load */
363 COSTS_N_INSNS (3), /* int zeroed load */
364 COSTS_N_INSNS (3), /* float load */
365 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
366 COSTS_N_INSNS (6), /* fadd, fsub */
367 COSTS_N_INSNS (6), /* fcmp */
368 COSTS_N_INSNS (6), /* fmov, fmovr */
369 COSTS_N_INSNS (6), /* fmul */
370 COSTS_N_INSNS (19), /* fdivs */
371 COSTS_N_INSNS (33), /* fdivd */
372 COSTS_N_INSNS (19), /* fsqrts */
373 COSTS_N_INSNS (33), /* fsqrtd */
374 COSTS_N_INSNS (5), /* imul */
375 COSTS_N_INSNS (5), /* imulX */
376 0, /* imul bit factor */
377 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
378 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
379 COSTS_N_INSNS (1), /* movcc/movr */
380 0, /* shift penalty */
381 };
382
383 static const
384 struct processor_costs niagara3_costs = {
385 COSTS_N_INSNS (3), /* int load */
386 COSTS_N_INSNS (3), /* int signed load */
387 COSTS_N_INSNS (3), /* int zeroed load */
388 COSTS_N_INSNS (3), /* float load */
389 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
390 COSTS_N_INSNS (9), /* fadd, fsub */
391 COSTS_N_INSNS (9), /* fcmp */
392 COSTS_N_INSNS (9), /* fmov, fmovr */
393 COSTS_N_INSNS (9), /* fmul */
394 COSTS_N_INSNS (23), /* fdivs */
395 COSTS_N_INSNS (37), /* fdivd */
396 COSTS_N_INSNS (23), /* fsqrts */
397 COSTS_N_INSNS (37), /* fsqrtd */
398 COSTS_N_INSNS (9), /* imul */
399 COSTS_N_INSNS (9), /* imulX */
400 0, /* imul bit factor */
401 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
402 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
403 COSTS_N_INSNS (1), /* movcc/movr */
404 0, /* shift penalty */
405 };
406
407 static const
408 struct processor_costs niagara4_costs = {
409 COSTS_N_INSNS (5), /* int load */
410 COSTS_N_INSNS (5), /* int signed load */
411 COSTS_N_INSNS (5), /* int zeroed load */
412 COSTS_N_INSNS (5), /* float load */
413 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
414 COSTS_N_INSNS (11), /* fadd, fsub */
415 COSTS_N_INSNS (11), /* fcmp */
416 COSTS_N_INSNS (11), /* fmov, fmovr */
417 COSTS_N_INSNS (11), /* fmul */
418 COSTS_N_INSNS (24), /* fdivs */
419 COSTS_N_INSNS (37), /* fdivd */
420 COSTS_N_INSNS (24), /* fsqrts */
421 COSTS_N_INSNS (37), /* fsqrtd */
422 COSTS_N_INSNS (12), /* imul */
423 COSTS_N_INSNS (12), /* imulX */
424 0, /* imul bit factor */
425 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
426 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
427 COSTS_N_INSNS (1), /* movcc/movr */
428 0, /* shift penalty */
429 };
430
431 static const
432 struct processor_costs niagara7_costs = {
433 COSTS_N_INSNS (5), /* int load */
434 COSTS_N_INSNS (5), /* int signed load */
435 COSTS_N_INSNS (5), /* int zeroed load */
436 COSTS_N_INSNS (5), /* float load */
437 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
438 COSTS_N_INSNS (11), /* fadd, fsub */
439 COSTS_N_INSNS (11), /* fcmp */
440 COSTS_N_INSNS (11), /* fmov, fmovr */
441 COSTS_N_INSNS (11), /* fmul */
442 COSTS_N_INSNS (24), /* fdivs */
443 COSTS_N_INSNS (37), /* fdivd */
444 COSTS_N_INSNS (24), /* fsqrts */
445 COSTS_N_INSNS (37), /* fsqrtd */
446 COSTS_N_INSNS (12), /* imul */
447 COSTS_N_INSNS (12), /* imulX */
448 0, /* imul bit factor */
449 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
450 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
451 COSTS_N_INSNS (1), /* movcc/movr */
452 0, /* shift penalty */
453 };
454
455 static const
456 struct processor_costs m8_costs = {
457 COSTS_N_INSNS (3), /* int load */
458 COSTS_N_INSNS (3), /* int signed load */
459 COSTS_N_INSNS (3), /* int zeroed load */
460 COSTS_N_INSNS (3), /* float load */
461 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
462 COSTS_N_INSNS (9), /* fadd, fsub */
463 COSTS_N_INSNS (9), /* fcmp */
464 COSTS_N_INSNS (9), /* fmov, fmovr */
465 COSTS_N_INSNS (9), /* fmul */
466 COSTS_N_INSNS (26), /* fdivs */
467 COSTS_N_INSNS (30), /* fdivd */
468 COSTS_N_INSNS (33), /* fsqrts */
469 COSTS_N_INSNS (41), /* fsqrtd */
470 COSTS_N_INSNS (12), /* imul */
471 COSTS_N_INSNS (10), /* imulX */
472 0, /* imul bit factor */
473 COSTS_N_INSNS (57), /* udiv/sdiv */
474 COSTS_N_INSNS (30), /* udivx/sdivx */
475 COSTS_N_INSNS (1), /* movcc/movr */
476 0, /* shift penalty */
477 };
478
479 static const struct processor_costs *sparc_costs = &cypress_costs;
480
481 #ifdef HAVE_AS_RELAX_OPTION
482 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
483 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
484 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
485 somebody does not branch between the sethi and jmp. */
486 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
487 #else
488 #define LEAF_SIBCALL_SLOT_RESERVED_P \
489 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
490 #endif
491
492 /* Vector to say how input registers are mapped to output registers.
493 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
494 eliminate it. You must use -fomit-frame-pointer to get that. */
495 char leaf_reg_remap[] =
496 { 0, 1, 2, 3, 4, 5, 6, 7,
497 -1, -1, -1, -1, -1, -1, 14, -1,
498 -1, -1, -1, -1, -1, -1, -1, -1,
499 8, 9, 10, 11, 12, 13, -1, 15,
500
501 32, 33, 34, 35, 36, 37, 38, 39,
502 40, 41, 42, 43, 44, 45, 46, 47,
503 48, 49, 50, 51, 52, 53, 54, 55,
504 56, 57, 58, 59, 60, 61, 62, 63,
505 64, 65, 66, 67, 68, 69, 70, 71,
506 72, 73, 74, 75, 76, 77, 78, 79,
507 80, 81, 82, 83, 84, 85, 86, 87,
508 88, 89, 90, 91, 92, 93, 94, 95,
509 96, 97, 98, 99, 100, 101, 102};
510
511 /* Vector, indexed by hard register number, which contains 1
512 for a register that is allowable in a candidate for leaf
513 function treatment. */
514 char sparc_leaf_regs[] =
515 { 1, 1, 1, 1, 1, 1, 1, 1,
516 0, 0, 0, 0, 0, 0, 1, 0,
517 0, 0, 0, 0, 0, 0, 0, 0,
518 1, 1, 1, 1, 1, 1, 0, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1, 1,
525 1, 1, 1, 1, 1, 1, 1, 1,
526 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1};
528
529 struct GTY(()) machine_function
530 {
531 /* Size of the frame of the function. */
532 HOST_WIDE_INT frame_size;
533
534 /* Size of the frame of the function minus the register window save area
535 and the outgoing argument area. */
536 HOST_WIDE_INT apparent_frame_size;
537
538 /* Register we pretend the frame pointer is allocated to. Normally, this
539 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
540 record "offset" separately as it may be too big for (reg + disp). */
541 rtx frame_base_reg;
542 HOST_WIDE_INT frame_base_offset;
543
544 /* Number of global or FP registers to be saved (as 4-byte quantities). */
545 int n_global_fp_regs;
546
547 /* True if the current function is leaf and uses only leaf regs,
548 so that the SPARC leaf function optimization can be applied.
549 Private version of crtl->uses_only_leaf_regs, see
550 sparc_expand_prologue for the rationale. */
551 int leaf_function_p;
552
553 /* True if the prologue saves local or in registers. */
554 bool save_local_in_regs_p;
555
556 /* True if the data calculated by sparc_expand_prologue are valid. */
557 bool prologue_data_valid_p;
558 };
559
560 #define sparc_frame_size cfun->machine->frame_size
561 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
562 #define sparc_frame_base_reg cfun->machine->frame_base_reg
563 #define sparc_frame_base_offset cfun->machine->frame_base_offset
564 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
565 #define sparc_leaf_function_p cfun->machine->leaf_function_p
566 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
567 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
568
569 /* 1 if the next opcode is to be specially indented. */
570 int sparc_indent_opcode = 0;
571
572 static void sparc_option_override (void);
573 static void sparc_init_modes (void);
574 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
575 const_tree, bool, bool, int *, int *);
576
577 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
578 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
579
580 static void sparc_emit_set_const32 (rtx, rtx);
581 static void sparc_emit_set_const64 (rtx, rtx);
582 static void sparc_output_addr_vec (rtx);
583 static void sparc_output_addr_diff_vec (rtx);
584 static void sparc_output_deferred_case_vectors (void);
585 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
586 static bool sparc_legitimate_constant_p (machine_mode, rtx);
587 static rtx sparc_builtin_saveregs (void);
588 static int epilogue_renumber (rtx *, int);
589 static bool sparc_assemble_integer (rtx, unsigned int, int);
590 static int set_extends (rtx_insn *);
591 static void sparc_asm_function_prologue (FILE *);
592 static void sparc_asm_function_epilogue (FILE *);
593 #ifdef TARGET_SOLARIS
594 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
595 tree) ATTRIBUTE_UNUSED;
596 #endif
597 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
598 static int sparc_issue_rate (void);
599 static void sparc_sched_init (FILE *, int, int);
600 static int sparc_use_sched_lookahead (void);
601
602 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
603 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
604 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
605 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
606 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
607
608 static bool sparc_function_ok_for_sibcall (tree, tree);
609 static void sparc_init_libfuncs (void);
610 static void sparc_init_builtins (void);
611 static void sparc_fpu_init_builtins (void);
612 static void sparc_vis_init_builtins (void);
613 static tree sparc_builtin_decl (unsigned, bool);
614 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
615 static tree sparc_fold_builtin (tree, int, tree *, bool);
616 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
617 HOST_WIDE_INT, tree);
618 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
619 HOST_WIDE_INT, const_tree);
620 static struct machine_function * sparc_init_machine_status (void);
621 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
622 static rtx sparc_tls_get_addr (void);
623 static rtx sparc_tls_got (void);
624 static int sparc_register_move_cost (machine_mode,
625 reg_class_t, reg_class_t);
626 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
627 static rtx sparc_function_value (const_tree, const_tree, bool);
628 static rtx sparc_libcall_value (machine_mode, const_rtx);
629 static bool sparc_function_value_regno_p (const unsigned int);
630 static rtx sparc_struct_value_rtx (tree, int);
631 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
632 int *, const_tree, int);
633 static bool sparc_return_in_memory (const_tree, const_tree);
634 static bool sparc_strict_argument_naming (cumulative_args_t);
635 static void sparc_va_start (tree, rtx);
636 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
637 static bool sparc_vector_mode_supported_p (machine_mode);
638 static bool sparc_tls_referenced_p (rtx);
639 static rtx sparc_legitimize_tls_address (rtx);
640 static rtx sparc_legitimize_pic_address (rtx, rtx);
641 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
642 static rtx sparc_delegitimize_address (rtx);
643 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
644 static bool sparc_pass_by_reference (cumulative_args_t,
645 machine_mode, const_tree, bool);
646 static void sparc_function_arg_advance (cumulative_args_t,
647 machine_mode, const_tree, bool);
648 static rtx sparc_function_arg_1 (cumulative_args_t,
649 machine_mode, const_tree, bool, bool);
650 static rtx sparc_function_arg (cumulative_args_t,
651 machine_mode, const_tree, bool);
652 static rtx sparc_function_incoming_arg (cumulative_args_t,
653 machine_mode, const_tree, bool);
654 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
655 static unsigned int sparc_function_arg_boundary (machine_mode,
656 const_tree);
657 static int sparc_arg_partial_bytes (cumulative_args_t,
658 machine_mode, tree, bool);
659 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
660 static void sparc_file_end (void);
661 static bool sparc_frame_pointer_required (void);
662 static bool sparc_can_eliminate (const int, const int);
663 static rtx sparc_builtin_setjmp_frame_value (void);
664 static void sparc_conditional_register_usage (void);
665 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
666 static const char *sparc_mangle_type (const_tree);
667 #endif
668 static void sparc_trampoline_init (rtx, tree, rtx);
669 static machine_mode sparc_preferred_simd_mode (scalar_mode);
670 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
671 static bool sparc_lra_p (void);
672 static bool sparc_print_operand_punct_valid_p (unsigned char);
673 static void sparc_print_operand (FILE *, rtx, int);
674 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
675 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
676 machine_mode,
677 secondary_reload_info *);
678 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
679 reg_class_t);
680 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
681 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
682 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
683 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
684 static unsigned int sparc_min_arithmetic_precision (void);
685 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
686 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
687 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
688 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
689 reg_class_t);
690 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
691 \f
692 #ifdef SUBTARGET_ATTRIBUTE_TABLE
693 /* Table of valid machine attributes. */
694 static const struct attribute_spec sparc_attribute_table[] =
695 {
696 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
697 do_diagnostic, handler, exclude } */
698 SUBTARGET_ATTRIBUTE_TABLE,
699 { NULL, 0, 0, false, false, false, false, NULL, NULL }
700 };
701 #endif
702 \f
703 /* Option handling. */
704
705 /* Parsed value. */
706 enum cmodel sparc_cmodel;
707
708 char sparc_hard_reg_printed[8];
709
710 /* Initialize the GCC target structure. */
711
712 /* The default is to use .half rather than .short for aligned HI objects. */
713 #undef TARGET_ASM_ALIGNED_HI_OP
714 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
715
716 #undef TARGET_ASM_UNALIGNED_HI_OP
717 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
718 #undef TARGET_ASM_UNALIGNED_SI_OP
719 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
720 #undef TARGET_ASM_UNALIGNED_DI_OP
721 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
722
723 /* The target hook has to handle DI-mode values. */
724 #undef TARGET_ASM_INTEGER
725 #define TARGET_ASM_INTEGER sparc_assemble_integer
726
727 #undef TARGET_ASM_FUNCTION_PROLOGUE
728 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
729 #undef TARGET_ASM_FUNCTION_EPILOGUE
730 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
731
732 #undef TARGET_SCHED_ADJUST_COST
733 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
734 #undef TARGET_SCHED_ISSUE_RATE
735 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
736 #undef TARGET_SCHED_INIT
737 #define TARGET_SCHED_INIT sparc_sched_init
738 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
739 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
740
741 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
742 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
743
744 #undef TARGET_INIT_LIBFUNCS
745 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
746
747 #undef TARGET_LEGITIMIZE_ADDRESS
748 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
749 #undef TARGET_DELEGITIMIZE_ADDRESS
750 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
751 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
752 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
753
754 #undef TARGET_INIT_BUILTINS
755 #define TARGET_INIT_BUILTINS sparc_init_builtins
756 #undef TARGET_BUILTIN_DECL
757 #define TARGET_BUILTIN_DECL sparc_builtin_decl
758 #undef TARGET_EXPAND_BUILTIN
759 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
760 #undef TARGET_FOLD_BUILTIN
761 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
762
763 #if TARGET_TLS
764 #undef TARGET_HAVE_TLS
765 #define TARGET_HAVE_TLS true
766 #endif
767
768 #undef TARGET_CANNOT_FORCE_CONST_MEM
769 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
770
771 #undef TARGET_ASM_OUTPUT_MI_THUNK
772 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
773 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
774 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
775
776 #undef TARGET_RTX_COSTS
777 #define TARGET_RTX_COSTS sparc_rtx_costs
778 #undef TARGET_ADDRESS_COST
779 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
780 #undef TARGET_REGISTER_MOVE_COST
781 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
782
783 #undef TARGET_PROMOTE_FUNCTION_MODE
784 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
785
786 #undef TARGET_FUNCTION_VALUE
787 #define TARGET_FUNCTION_VALUE sparc_function_value
788 #undef TARGET_LIBCALL_VALUE
789 #define TARGET_LIBCALL_VALUE sparc_libcall_value
790 #undef TARGET_FUNCTION_VALUE_REGNO_P
791 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
792
793 #undef TARGET_STRUCT_VALUE_RTX
794 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
795 #undef TARGET_RETURN_IN_MEMORY
796 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
797 #undef TARGET_MUST_PASS_IN_STACK
798 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
799 #undef TARGET_PASS_BY_REFERENCE
800 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
801 #undef TARGET_ARG_PARTIAL_BYTES
802 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
803 #undef TARGET_FUNCTION_ARG_ADVANCE
804 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
805 #undef TARGET_FUNCTION_ARG
806 #define TARGET_FUNCTION_ARG sparc_function_arg
807 #undef TARGET_FUNCTION_INCOMING_ARG
808 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
809 #undef TARGET_FUNCTION_ARG_PADDING
810 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
811 #undef TARGET_FUNCTION_ARG_BOUNDARY
812 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
813
814 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
815 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
816 #undef TARGET_STRICT_ARGUMENT_NAMING
817 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
818
819 #undef TARGET_EXPAND_BUILTIN_VA_START
820 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
821 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
822 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
823
824 #undef TARGET_VECTOR_MODE_SUPPORTED_P
825 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
826
827 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
828 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
829
830 #ifdef SUBTARGET_INSERT_ATTRIBUTES
831 #undef TARGET_INSERT_ATTRIBUTES
832 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
833 #endif
834
835 #ifdef SUBTARGET_ATTRIBUTE_TABLE
836 #undef TARGET_ATTRIBUTE_TABLE
837 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
838 #endif
839
840 #undef TARGET_OPTION_OVERRIDE
841 #define TARGET_OPTION_OVERRIDE sparc_option_override
842
843 #ifdef TARGET_THREAD_SSP_OFFSET
844 #undef TARGET_STACK_PROTECT_GUARD
845 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
846 #endif
847
848 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
849 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
850 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
851 #endif
852
853 #undef TARGET_ASM_FILE_END
854 #define TARGET_ASM_FILE_END sparc_file_end
855
856 #undef TARGET_FRAME_POINTER_REQUIRED
857 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
858
859 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
860 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
861
862 #undef TARGET_CAN_ELIMINATE
863 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
864
865 #undef TARGET_PREFERRED_RELOAD_CLASS
866 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
867
868 #undef TARGET_SECONDARY_RELOAD
869 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
870 #undef TARGET_SECONDARY_MEMORY_NEEDED
871 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
872 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
873 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
874
875 #undef TARGET_CONDITIONAL_REGISTER_USAGE
876 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
877
878 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
879 #undef TARGET_MANGLE_TYPE
880 #define TARGET_MANGLE_TYPE sparc_mangle_type
881 #endif
882
883 #undef TARGET_LRA_P
884 #define TARGET_LRA_P sparc_lra_p
885
886 #undef TARGET_LEGITIMATE_ADDRESS_P
887 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
888
889 #undef TARGET_LEGITIMATE_CONSTANT_P
890 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
891
892 #undef TARGET_TRAMPOLINE_INIT
893 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
894
895 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
896 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
897 #undef TARGET_PRINT_OPERAND
898 #define TARGET_PRINT_OPERAND sparc_print_operand
899 #undef TARGET_PRINT_OPERAND_ADDRESS
900 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
901
902 /* The value stored by LDSTUB. */
903 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
904 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
905
906 #undef TARGET_CSTORE_MODE
907 #define TARGET_CSTORE_MODE sparc_cstore_mode
908
909 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
910 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
911
912 #undef TARGET_FIXED_CONDITION_CODE_REGS
913 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
914
915 #undef TARGET_MIN_ARITHMETIC_PRECISION
916 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
917
918 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
919 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
920
921 #undef TARGET_HARD_REGNO_NREGS
922 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
923 #undef TARGET_HARD_REGNO_MODE_OK
924 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
925
926 #undef TARGET_MODES_TIEABLE_P
927 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
928
929 #undef TARGET_CAN_CHANGE_MODE_CLASS
930 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
931
932 #undef TARGET_CONSTANT_ALIGNMENT
933 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
934
935 struct gcc_target targetm = TARGET_INITIALIZER;
936
937 /* Return the memory reference contained in X if any, zero otherwise. */
938
939 static rtx
940 mem_ref (rtx x)
941 {
942 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
943 x = XEXP (x, 0);
944
945 if (MEM_P (x))
946 return x;
947
948 return NULL_RTX;
949 }
950
951 /* True if any of INSN's source register(s) is REG. */
952
953 static bool
954 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
955 {
956 extract_insn (insn);
957 return ((REG_P (recog_data.operand[1])
958 && REGNO (recog_data.operand[1]) == reg)
959 || (recog_data.n_operands == 3
960 && REG_P (recog_data.operand[2])
961 && REGNO (recog_data.operand[2]) == reg));
962 }
963
964 /* True if INSN is a floating-point division or square-root. */
965
966 static bool
967 div_sqrt_insn_p (rtx_insn *insn)
968 {
969 if (GET_CODE (PATTERN (insn)) != SET)
970 return false;
971
972 switch (get_attr_type (insn))
973 {
974 case TYPE_FPDIVS:
975 case TYPE_FPSQRTS:
976 case TYPE_FPDIVD:
977 case TYPE_FPSQRTD:
978 return true;
979 default:
980 return false;
981 }
982 }
983
984 /* True if INSN is a floating-point instruction. */
985
986 static bool
987 fpop_insn_p (rtx_insn *insn)
988 {
989 if (GET_CODE (PATTERN (insn)) != SET)
990 return false;
991
992 switch (get_attr_type (insn))
993 {
994 case TYPE_FPMOVE:
995 case TYPE_FPCMOVE:
996 case TYPE_FP:
997 case TYPE_FPCMP:
998 case TYPE_FPMUL:
999 case TYPE_FPDIVS:
1000 case TYPE_FPSQRTS:
1001 case TYPE_FPDIVD:
1002 case TYPE_FPSQRTD:
1003 return true;
1004 default:
1005 return false;
1006 }
1007 }
1008
1009 /* True if INSN is an atomic instruction. */
1010
1011 static bool
1012 atomic_insn_for_leon3_p (rtx_insn *insn)
1013 {
1014 switch (INSN_CODE (insn))
1015 {
1016 case CODE_FOR_swapsi:
1017 case CODE_FOR_ldstub:
1018 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1019 return true;
1020 default:
1021 return false;
1022 }
1023 }
1024
1025 /* We use a machine specific pass to enable workarounds for errata.
1026
1027 We need to have the (essentially) final form of the insn stream in order
1028 to properly detect the various hazards. Therefore, this machine specific
1029 pass runs as late as possible. */
1030
1031 /* True if INSN is a md pattern or asm statement. */
1032 #define USEFUL_INSN_P(INSN) \
1033 (NONDEBUG_INSN_P (INSN) \
1034 && GET_CODE (PATTERN (INSN)) != USE \
1035 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1036
1037 static unsigned int
1038 sparc_do_work_around_errata (void)
1039 {
1040 rtx_insn *insn, *next;
1041
1042 /* Force all instructions to be split into their final form. */
1043 split_all_insns_noflow ();
1044
1045 /* Now look for specific patterns in the insn stream. */
1046 for (insn = get_insns (); insn; insn = next)
1047 {
1048 bool insert_nop = false;
1049 rtx set;
1050 rtx_insn *jump;
1051 rtx_sequence *seq;
1052
1053 /* Look into the instruction in a delay slot. */
1054 if (NONJUMP_INSN_P (insn)
1055 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1056 {
1057 jump = seq->insn (0);
1058 insn = seq->insn (1);
1059 }
1060 else if (JUMP_P (insn))
1061 jump = insn;
1062 else
1063 jump = NULL;
1064
1065 /* Place a NOP at the branch target of an integer branch if it is a
1066 floating-point operation or a floating-point branch. */
1067 if (sparc_fix_gr712rc
1068 && jump
1069 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1070 {
1071 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1072 if (target
1073 && (fpop_insn_p (target)
1074 || (JUMP_P (target)
1075 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1076 emit_insn_before (gen_nop (), target);
1077 }
1078
1079 /* Insert a NOP between load instruction and atomic instruction. Insert
1080 a NOP at branch target if there is a load in delay slot and an atomic
1081 instruction at branch target. */
1082 if (sparc_fix_ut700
1083 && NONJUMP_INSN_P (insn)
1084 && (set = single_set (insn)) != NULL_RTX
1085 && mem_ref (SET_SRC (set))
1086 && REG_P (SET_DEST (set)))
1087 {
1088 if (jump)
1089 {
1090 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1091 if (target && atomic_insn_for_leon3_p (target))
1092 emit_insn_before (gen_nop (), target);
1093 }
1094
1095 next = next_active_insn (insn);
1096 if (!next)
1097 break;
1098
1099 if (atomic_insn_for_leon3_p (next))
1100 insert_nop = true;
1101 }
1102
1103 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1104 ends with another fdiv or fsqrt instruction with no dependencies on
1105 the former, along with an appropriate pattern in between. */
1106 if (sparc_fix_lost_divsqrt
1107 && NONJUMP_INSN_P (insn)
1108 && div_sqrt_insn_p (insn))
1109 {
1110 int i;
1111 int fp_found = 0;
1112 rtx_insn *after;
1113
1114 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1115
1116 next = next_active_insn (insn);
1117 if (!next)
1118 break;
1119
1120 for (after = next, i = 0; i < 4; i++)
1121 {
1122 /* Count floating-point operations. */
1123 if (i != 3 && fpop_insn_p (after))
1124 {
1125 /* If the insn uses the destination register of
1126 the div/sqrt, then it cannot be problematic. */
1127 if (insn_uses_reg_p (after, dest_reg))
1128 break;
1129 fp_found++;
1130 }
1131
1132 /* Count floating-point loads. */
1133 if (i != 3
1134 && (set = single_set (after)) != NULL_RTX
1135 && REG_P (SET_DEST (set))
1136 && REGNO (SET_DEST (set)) > 31)
1137 {
1138 /* If the insn uses the destination register of
1139 the div/sqrt, then it cannot be problematic. */
1140 if (REGNO (SET_DEST (set)) == dest_reg)
1141 break;
1142 fp_found++;
1143 }
1144
1145 /* Check if this is a problematic sequence. */
1146 if (i > 1
1147 && fp_found >= 2
1148 && div_sqrt_insn_p (after))
1149 {
1150 /* If this is the short version of the problematic
1151 sequence we add two NOPs in a row to also prevent
1152 the long version. */
1153 if (i == 2)
1154 emit_insn_before (gen_nop (), next);
1155 insert_nop = true;
1156 break;
1157 }
1158
1159 /* No need to scan past a second div/sqrt. */
1160 if (div_sqrt_insn_p (after))
1161 break;
1162
1163 /* Insert NOP before branch. */
1164 if (i < 3
1165 && (!NONJUMP_INSN_P (after)
1166 || GET_CODE (PATTERN (after)) == SEQUENCE))
1167 {
1168 insert_nop = true;
1169 break;
1170 }
1171
1172 after = next_active_insn (after);
1173 if (!after)
1174 break;
1175 }
1176 }
1177
1178 /* Look for either of these two sequences:
1179
1180 Sequence A:
1181 1. store of word size or less (e.g. st / stb / sth / stf)
1182 2. any single instruction that is not a load or store
1183 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1184
1185 Sequence B:
1186 1. store of double word size (e.g. std / stdf)
1187 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1188 if (sparc_fix_b2bst
1189 && NONJUMP_INSN_P (insn)
1190 && (set = single_set (insn)) != NULL_RTX
1191 && MEM_P (SET_DEST (set)))
1192 {
1193 /* Sequence B begins with a double-word store. */
1194 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1195 rtx_insn *after;
1196 int i;
1197
1198 next = next_active_insn (insn);
1199 if (!next)
1200 break;
1201
1202 for (after = next, i = 0; i < 2; i++)
1203 {
1204 /* Skip empty assembly statements. */
1205 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1206 || (USEFUL_INSN_P (after)
1207 && (asm_noperands (PATTERN (after))>=0)
1208 && !strcmp (decode_asm_operands (PATTERN (after),
1209 NULL, NULL, NULL,
1210 NULL, NULL), "")))
1211 after = next_active_insn (after);
1212 if (!after)
1213 break;
1214
1215 /* If the insn is a branch, then it cannot be problematic. */
1216 if (!NONJUMP_INSN_P (after)
1217 || GET_CODE (PATTERN (after)) == SEQUENCE)
1218 break;
1219
1220 /* Sequence B is only two instructions long. */
1221 if (seq_b)
1222 {
1223 /* Add NOP if followed by a store. */
1224 if ((set = single_set (after)) != NULL_RTX
1225 && MEM_P (SET_DEST (set)))
1226 insert_nop = true;
1227
1228 /* Otherwise it is ok. */
1229 break;
1230 }
1231
1232 /* If the second instruction is a load or a store,
1233 then the sequence cannot be problematic. */
1234 if (i == 0)
1235 {
1236 if ((set = single_set (after)) != NULL_RTX
1237 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1238 break;
1239
1240 after = next_active_insn (after);
1241 if (!after)
1242 break;
1243 }
1244
1245 /* Add NOP if third instruction is a store. */
1246 if (i == 1
1247 && (set = single_set (after)) != NULL_RTX
1248 && MEM_P (SET_DEST (set)))
1249 insert_nop = true;
1250 }
1251 }
1252
1253 /* Look for a single-word load into an odd-numbered FP register. */
1254 else if (sparc_fix_at697f
1255 && NONJUMP_INSN_P (insn)
1256 && (set = single_set (insn)) != NULL_RTX
1257 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1258 && mem_ref (SET_SRC (set))
1259 && REG_P (SET_DEST (set))
1260 && REGNO (SET_DEST (set)) > 31
1261 && REGNO (SET_DEST (set)) % 2 != 0)
1262 {
1263 /* The wrong dependency is on the enclosing double register. */
1264 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1265 unsigned int src1, src2, dest;
1266 int code;
1267
1268 next = next_active_insn (insn);
1269 if (!next)
1270 break;
1271 /* If the insn is a branch, then it cannot be problematic. */
1272 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1273 continue;
1274
1275 extract_insn (next);
1276 code = INSN_CODE (next);
1277
1278 switch (code)
1279 {
1280 case CODE_FOR_adddf3:
1281 case CODE_FOR_subdf3:
1282 case CODE_FOR_muldf3:
1283 case CODE_FOR_divdf3:
1284 dest = REGNO (recog_data.operand[0]);
1285 src1 = REGNO (recog_data.operand[1]);
1286 src2 = REGNO (recog_data.operand[2]);
1287 if (src1 != src2)
1288 {
1289 /* Case [1-4]:
1290 ld [address], %fx+1
1291 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1292 if ((src1 == x || src2 == x)
1293 && (dest == src1 || dest == src2))
1294 insert_nop = true;
1295 }
1296 else
1297 {
1298 /* Case 5:
1299 ld [address], %fx+1
1300 FPOPd %fx, %fx, %fx */
1301 if (src1 == x
1302 && dest == src1
1303 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1304 insert_nop = true;
1305 }
1306 break;
1307
1308 case CODE_FOR_sqrtdf2:
1309 dest = REGNO (recog_data.operand[0]);
1310 src1 = REGNO (recog_data.operand[1]);
1311 /* Case 6:
1312 ld [address], %fx+1
1313 fsqrtd %fx, %fx */
1314 if (src1 == x && dest == src1)
1315 insert_nop = true;
1316 break;
1317
1318 default:
1319 break;
1320 }
1321 }
1322
1323 /* Look for a single-word load into an integer register. */
1324 else if (sparc_fix_ut699
1325 && NONJUMP_INSN_P (insn)
1326 && (set = single_set (insn)) != NULL_RTX
1327 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1328 && (mem_ref (SET_SRC (set)) != NULL_RTX
1329 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1330 && REG_P (SET_DEST (set))
1331 && REGNO (SET_DEST (set)) < 32)
1332 {
1333 /* There is no problem if the second memory access has a data
1334 dependency on the first single-cycle load. */
1335 rtx x = SET_DEST (set);
1336
1337 next = next_active_insn (insn);
1338 if (!next)
1339 break;
1340 /* If the insn is a branch, then it cannot be problematic. */
1341 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1342 continue;
1343
1344 /* Look for a second memory access to/from an integer register. */
1345 if ((set = single_set (next)) != NULL_RTX)
1346 {
1347 rtx src = SET_SRC (set);
1348 rtx dest = SET_DEST (set);
1349 rtx mem;
1350
1351 /* LDD is affected. */
1352 if ((mem = mem_ref (src)) != NULL_RTX
1353 && REG_P (dest)
1354 && REGNO (dest) < 32
1355 && !reg_mentioned_p (x, XEXP (mem, 0)))
1356 insert_nop = true;
1357
1358 /* STD is *not* affected. */
1359 else if (MEM_P (dest)
1360 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1361 && (src == CONST0_RTX (GET_MODE (dest))
1362 || (REG_P (src)
1363 && REGNO (src) < 32
1364 && REGNO (src) != REGNO (x)))
1365 && !reg_mentioned_p (x, XEXP (dest, 0)))
1366 insert_nop = true;
1367
1368 /* GOT accesses uses LD. */
1369 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1370 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1371 insert_nop = true;
1372 }
1373 }
1374
1375 /* Look for a single-word load/operation into an FP register. */
1376 else if (sparc_fix_ut699
1377 && NONJUMP_INSN_P (insn)
1378 && (set = single_set (insn)) != NULL_RTX
1379 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1380 && REG_P (SET_DEST (set))
1381 && REGNO (SET_DEST (set)) > 31)
1382 {
1383 /* Number of instructions in the problematic window. */
1384 const int n_insns = 4;
1385 /* The problematic combination is with the sibling FP register. */
1386 const unsigned int x = REGNO (SET_DEST (set));
1387 const unsigned int y = x ^ 1;
1388 rtx_insn *after;
1389 int i;
1390
1391 next = next_active_insn (insn);
1392 if (!next)
1393 break;
1394 /* If the insn is a branch, then it cannot be problematic. */
1395 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1396 continue;
1397
1398 /* Look for a second load/operation into the sibling FP register. */
1399 if (!((set = single_set (next)) != NULL_RTX
1400 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1401 && REG_P (SET_DEST (set))
1402 && REGNO (SET_DEST (set)) == y))
1403 continue;
1404
1405 /* Look for a (possible) store from the FP register in the next N
1406 instructions, but bail out if it is again modified or if there
1407 is a store from the sibling FP register before this store. */
1408 for (after = next, i = 0; i < n_insns; i++)
1409 {
1410 bool branch_p;
1411
1412 after = next_active_insn (after);
1413 if (!after)
1414 break;
1415
1416 /* This is a branch with an empty delay slot. */
1417 if (!NONJUMP_INSN_P (after))
1418 {
1419 if (++i == n_insns)
1420 break;
1421 branch_p = true;
1422 after = NULL;
1423 }
1424 /* This is a branch with a filled delay slot. */
1425 else if (rtx_sequence *seq =
1426 dyn_cast <rtx_sequence *> (PATTERN (after)))
1427 {
1428 if (++i == n_insns)
1429 break;
1430 branch_p = true;
1431 after = seq->insn (1);
1432 }
1433 /* This is a regular instruction. */
1434 else
1435 branch_p = false;
1436
1437 if (after && (set = single_set (after)) != NULL_RTX)
1438 {
1439 const rtx src = SET_SRC (set);
1440 const rtx dest = SET_DEST (set);
1441 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1442
1443 /* If the FP register is again modified before the store,
1444 then the store isn't affected. */
1445 if (REG_P (dest)
1446 && (REGNO (dest) == x
1447 || (REGNO (dest) == y && size == 8)))
1448 break;
1449
1450 if (MEM_P (dest) && REG_P (src))
1451 {
1452 /* If there is a store from the sibling FP register
1453 before the store, then the store is not affected. */
1454 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1455 break;
1456
1457 /* Otherwise, the store is affected. */
1458 if (REGNO (src) == x && size == 4)
1459 {
1460 insert_nop = true;
1461 break;
1462 }
1463 }
1464 }
1465
1466 /* If we have a branch in the first M instructions, then we
1467 cannot see the (M+2)th instruction so we play safe. */
1468 if (branch_p && i <= (n_insns - 2))
1469 {
1470 insert_nop = true;
1471 break;
1472 }
1473 }
1474 }
1475
1476 else
1477 next = NEXT_INSN (insn);
1478
1479 if (insert_nop)
1480 emit_insn_before (gen_nop (), next);
1481 }
1482
1483 return 0;
1484 }
1485
1486 namespace {
1487
1488 const pass_data pass_data_work_around_errata =
1489 {
1490 RTL_PASS, /* type */
1491 "errata", /* name */
1492 OPTGROUP_NONE, /* optinfo_flags */
1493 TV_MACH_DEP, /* tv_id */
1494 0, /* properties_required */
1495 0, /* properties_provided */
1496 0, /* properties_destroyed */
1497 0, /* todo_flags_start */
1498 0, /* todo_flags_finish */
1499 };
1500
1501 class pass_work_around_errata : public rtl_opt_pass
1502 {
1503 public:
1504 pass_work_around_errata(gcc::context *ctxt)
1505 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1506 {}
1507
1508 /* opt_pass methods: */
1509 virtual bool gate (function *)
1510 {
1511 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
1512 || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt;
1513 }
1514
1515 virtual unsigned int execute (function *)
1516 {
1517 return sparc_do_work_around_errata ();
1518 }
1519
1520 }; // class pass_work_around_errata
1521
1522 } // anon namespace
1523
1524 rtl_opt_pass *
1525 make_pass_work_around_errata (gcc::context *ctxt)
1526 {
1527 return new pass_work_around_errata (ctxt);
1528 }
1529
1530 /* Helpers for TARGET_DEBUG_OPTIONS. */
1531 static void
1532 dump_target_flag_bits (const int flags)
1533 {
1534 if (flags & MASK_64BIT)
1535 fprintf (stderr, "64BIT ");
1536 if (flags & MASK_APP_REGS)
1537 fprintf (stderr, "APP_REGS ");
1538 if (flags & MASK_FASTER_STRUCTS)
1539 fprintf (stderr, "FASTER_STRUCTS ");
1540 if (flags & MASK_FLAT)
1541 fprintf (stderr, "FLAT ");
1542 if (flags & MASK_FMAF)
1543 fprintf (stderr, "FMAF ");
1544 if (flags & MASK_FSMULD)
1545 fprintf (stderr, "FSMULD ");
1546 if (flags & MASK_FPU)
1547 fprintf (stderr, "FPU ");
1548 if (flags & MASK_HARD_QUAD)
1549 fprintf (stderr, "HARD_QUAD ");
1550 if (flags & MASK_POPC)
1551 fprintf (stderr, "POPC ");
1552 if (flags & MASK_PTR64)
1553 fprintf (stderr, "PTR64 ");
1554 if (flags & MASK_STACK_BIAS)
1555 fprintf (stderr, "STACK_BIAS ");
1556 if (flags & MASK_UNALIGNED_DOUBLES)
1557 fprintf (stderr, "UNALIGNED_DOUBLES ");
1558 if (flags & MASK_V8PLUS)
1559 fprintf (stderr, "V8PLUS ");
1560 if (flags & MASK_VIS)
1561 fprintf (stderr, "VIS ");
1562 if (flags & MASK_VIS2)
1563 fprintf (stderr, "VIS2 ");
1564 if (flags & MASK_VIS3)
1565 fprintf (stderr, "VIS3 ");
1566 if (flags & MASK_VIS4)
1567 fprintf (stderr, "VIS4 ");
1568 if (flags & MASK_VIS4B)
1569 fprintf (stderr, "VIS4B ");
1570 if (flags & MASK_CBCOND)
1571 fprintf (stderr, "CBCOND ");
1572 if (flags & MASK_DEPRECATED_V8_INSNS)
1573 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1574 if (flags & MASK_SPARCLET)
1575 fprintf (stderr, "SPARCLET ");
1576 if (flags & MASK_SPARCLITE)
1577 fprintf (stderr, "SPARCLITE ");
1578 if (flags & MASK_V8)
1579 fprintf (stderr, "V8 ");
1580 if (flags & MASK_V9)
1581 fprintf (stderr, "V9 ");
1582 }
1583
1584 static void
1585 dump_target_flags (const char *prefix, const int flags)
1586 {
1587 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1588 dump_target_flag_bits (flags);
1589 fprintf(stderr, "]\n");
1590 }
1591
1592 /* Validate and override various options, and do some machine dependent
1593 initialization. */
1594
1595 static void
1596 sparc_option_override (void)
1597 {
1598 static struct code_model {
1599 const char *const name;
1600 const enum cmodel value;
1601 } const cmodels[] = {
1602 { "32", CM_32 },
1603 { "medlow", CM_MEDLOW },
1604 { "medmid", CM_MEDMID },
1605 { "medany", CM_MEDANY },
1606 { "embmedany", CM_EMBMEDANY },
1607 { NULL, (enum cmodel) 0 }
1608 };
1609 const struct code_model *cmodel;
1610 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1611 static struct cpu_default {
1612 const int cpu;
1613 const enum processor_type processor;
1614 } const cpu_default[] = {
1615 /* There must be one entry here for each TARGET_CPU value. */
1616 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1617 { TARGET_CPU_v8, PROCESSOR_V8 },
1618 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1619 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1620 { TARGET_CPU_leon, PROCESSOR_LEON },
1621 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1622 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1623 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1624 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1625 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1626 { TARGET_CPU_v9, PROCESSOR_V9 },
1627 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1628 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1629 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1630 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1631 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1632 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1633 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1634 { TARGET_CPU_m8, PROCESSOR_M8 },
1635 { -1, PROCESSOR_V7 }
1636 };
1637 const struct cpu_default *def;
1638 /* Table of values for -m{cpu,tune}=. This must match the order of
1639 the enum processor_type in sparc-opts.h. */
1640 static struct cpu_table {
1641 const char *const name;
1642 const int disable;
1643 const int enable;
1644 } const cpu_table[] = {
1645 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1646 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1647 { "v8", MASK_ISA, MASK_V8 },
1648 /* TI TMS390Z55 supersparc */
1649 { "supersparc", MASK_ISA, MASK_V8 },
1650 { "hypersparc", MASK_ISA, MASK_V8 },
1651 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1652 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1653 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1654 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1655 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1656 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1657 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1658 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1659 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1660 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1661 /* TEMIC sparclet */
1662 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1663 { "v9", MASK_ISA, MASK_V9 },
1664 /* UltraSPARC I, II, IIi */
1665 { "ultrasparc", MASK_ISA,
1666 /* Although insns using %y are deprecated, it is a clear win. */
1667 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1668 /* UltraSPARC III */
1669 /* ??? Check if %y issue still holds true. */
1670 { "ultrasparc3", MASK_ISA,
1671 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1672 /* UltraSPARC T1 */
1673 { "niagara", MASK_ISA,
1674 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1675 /* UltraSPARC T2 */
1676 { "niagara2", MASK_ISA,
1677 MASK_V9|MASK_POPC|MASK_VIS2 },
1678 /* UltraSPARC T3 */
1679 { "niagara3", MASK_ISA,
1680 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1681 /* UltraSPARC T4 */
1682 { "niagara4", MASK_ISA,
1683 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1684 /* UltraSPARC M7 */
1685 { "niagara7", MASK_ISA,
1686 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1687 /* UltraSPARC M8 */
1688 { "m8", MASK_ISA,
1689 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1690 };
1691 const struct cpu_table *cpu;
1692 unsigned int i;
1693
1694 if (sparc_debug_string != NULL)
1695 {
1696 const char *q;
1697 char *p;
1698
1699 p = ASTRDUP (sparc_debug_string);
1700 while ((q = strtok (p, ",")) != NULL)
1701 {
1702 bool invert;
1703 int mask;
1704
1705 p = NULL;
1706 if (*q == '!')
1707 {
1708 invert = true;
1709 q++;
1710 }
1711 else
1712 invert = false;
1713
1714 if (! strcmp (q, "all"))
1715 mask = MASK_DEBUG_ALL;
1716 else if (! strcmp (q, "options"))
1717 mask = MASK_DEBUG_OPTIONS;
1718 else
1719 error ("unknown -mdebug-%s switch", q);
1720
1721 if (invert)
1722 sparc_debug &= ~mask;
1723 else
1724 sparc_debug |= mask;
1725 }
1726 }
1727
1728 /* Enable the FsMULd instruction by default if not explicitly specified by
1729 the user. It may be later disabled by the CPU (explicitly or not). */
1730 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1731 target_flags |= MASK_FSMULD;
1732
1733 if (TARGET_DEBUG_OPTIONS)
1734 {
1735 dump_target_flags("Initial target_flags", target_flags);
1736 dump_target_flags("target_flags_explicit", target_flags_explicit);
1737 }
1738
1739 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1740 SUBTARGET_OVERRIDE_OPTIONS;
1741 #endif
1742
1743 #ifndef SPARC_BI_ARCH
1744 /* Check for unsupported architecture size. */
1745 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1746 error ("%s is not supported by this configuration",
1747 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1748 #endif
1749
1750 /* We force all 64bit archs to use 128 bit long double */
1751 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1752 {
1753 error ("-mlong-double-64 not allowed with -m64");
1754 target_flags |= MASK_LONG_DOUBLE_128;
1755 }
1756
1757 /* Code model selection. */
1758 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1759
1760 #ifdef SPARC_BI_ARCH
1761 if (TARGET_ARCH32)
1762 sparc_cmodel = CM_32;
1763 #endif
1764
1765 if (sparc_cmodel_string != NULL)
1766 {
1767 if (TARGET_ARCH64)
1768 {
1769 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1770 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1771 break;
1772 if (cmodel->name == NULL)
1773 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1774 else
1775 sparc_cmodel = cmodel->value;
1776 }
1777 else
1778 error ("-mcmodel= is not supported on 32-bit systems");
1779 }
1780
1781 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1782 for (i = 8; i < 16; i++)
1783 if (!call_used_regs [i])
1784 {
1785 error ("-fcall-saved-REG is not supported for out registers");
1786 call_used_regs [i] = 1;
1787 }
1788
1789 /* Set the default CPU if no -mcpu option was specified. */
1790 if (!global_options_set.x_sparc_cpu_and_features)
1791 {
1792 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1793 if (def->cpu == TARGET_CPU_DEFAULT)
1794 break;
1795 gcc_assert (def->cpu != -1);
1796 sparc_cpu_and_features = def->processor;
1797 }
1798
1799 /* Set the default CPU if no -mtune option was specified. */
1800 if (!global_options_set.x_sparc_cpu)
1801 sparc_cpu = sparc_cpu_and_features;
1802
1803 cpu = &cpu_table[(int) sparc_cpu_and_features];
1804
1805 if (TARGET_DEBUG_OPTIONS)
1806 {
1807 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1808 dump_target_flags ("cpu->disable", cpu->disable);
1809 dump_target_flags ("cpu->enable", cpu->enable);
1810 }
1811
1812 target_flags &= ~cpu->disable;
1813 target_flags |= (cpu->enable
1814 #ifndef HAVE_AS_FMAF_HPC_VIS3
1815 & ~(MASK_FMAF | MASK_VIS3)
1816 #endif
1817 #ifndef HAVE_AS_SPARC4
1818 & ~MASK_CBCOND
1819 #endif
1820 #ifndef HAVE_AS_SPARC5_VIS4
1821 & ~(MASK_VIS4 | MASK_SUBXC)
1822 #endif
1823 #ifndef HAVE_AS_SPARC6
1824 & ~(MASK_VIS4B)
1825 #endif
1826 #ifndef HAVE_AS_LEON
1827 & ~(MASK_LEON | MASK_LEON3)
1828 #endif
1829 & ~(target_flags_explicit & MASK_FEATURES)
1830 );
1831
1832 /* -mvis2 implies -mvis. */
1833 if (TARGET_VIS2)
1834 target_flags |= MASK_VIS;
1835
1836 /* -mvis3 implies -mvis2 and -mvis. */
1837 if (TARGET_VIS3)
1838 target_flags |= MASK_VIS2 | MASK_VIS;
1839
1840 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1841 if (TARGET_VIS4)
1842 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1843
1844 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1845 if (TARGET_VIS4B)
1846 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1847
1848 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1849 FPU is disabled. */
1850 if (!TARGET_FPU)
1851 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1852 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1853
1854 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1855 are available; -m64 also implies v9. */
1856 if (TARGET_VIS || TARGET_ARCH64)
1857 {
1858 target_flags |= MASK_V9;
1859 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1860 }
1861
1862 /* -mvis also implies -mv8plus on 32-bit. */
1863 if (TARGET_VIS && !TARGET_ARCH64)
1864 target_flags |= MASK_V8PLUS;
1865
1866 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1867 if (TARGET_V9 && TARGET_ARCH32)
1868 target_flags |= MASK_DEPRECATED_V8_INSNS;
1869
1870 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1871 if (!TARGET_V9 || TARGET_ARCH64)
1872 target_flags &= ~MASK_V8PLUS;
1873
1874 /* Don't use stack biasing in 32-bit mode. */
1875 if (TARGET_ARCH32)
1876 target_flags &= ~MASK_STACK_BIAS;
1877
1878 /* Use LRA instead of reload, unless otherwise instructed. */
1879 if (!(target_flags_explicit & MASK_LRA))
1880 target_flags |= MASK_LRA;
1881
1882 /* Enable applicable errata workarounds for LEON3FT. */
1883 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1884 {
1885 sparc_fix_b2bst = 1;
1886 sparc_fix_lost_divsqrt = 1;
1887 }
1888
1889 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1890 if (sparc_fix_ut699)
1891 target_flags &= ~MASK_FSMULD;
1892
1893 /* Supply a default value for align_functions. */
1894 if (align_functions == 0)
1895 {
1896 if (sparc_cpu == PROCESSOR_ULTRASPARC
1897 || sparc_cpu == PROCESSOR_ULTRASPARC3
1898 || sparc_cpu == PROCESSOR_NIAGARA
1899 || sparc_cpu == PROCESSOR_NIAGARA2
1900 || sparc_cpu == PROCESSOR_NIAGARA3
1901 || sparc_cpu == PROCESSOR_NIAGARA4)
1902 align_functions = 32;
1903 else if (sparc_cpu == PROCESSOR_NIAGARA7
1904 || sparc_cpu == PROCESSOR_M8)
1905 align_functions = 64;
1906 }
1907
1908 /* Validate PCC_STRUCT_RETURN. */
1909 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1910 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1911
1912 /* Only use .uaxword when compiling for a 64-bit target. */
1913 if (!TARGET_ARCH64)
1914 targetm.asm_out.unaligned_op.di = NULL;
1915
1916 /* Do various machine dependent initializations. */
1917 sparc_init_modes ();
1918
1919 /* Set up function hooks. */
1920 init_machine_status = sparc_init_machine_status;
1921
1922 switch (sparc_cpu)
1923 {
1924 case PROCESSOR_V7:
1925 case PROCESSOR_CYPRESS:
1926 sparc_costs = &cypress_costs;
1927 break;
1928 case PROCESSOR_V8:
1929 case PROCESSOR_SPARCLITE:
1930 case PROCESSOR_SUPERSPARC:
1931 sparc_costs = &supersparc_costs;
1932 break;
1933 case PROCESSOR_F930:
1934 case PROCESSOR_F934:
1935 case PROCESSOR_HYPERSPARC:
1936 case PROCESSOR_SPARCLITE86X:
1937 sparc_costs = &hypersparc_costs;
1938 break;
1939 case PROCESSOR_LEON:
1940 sparc_costs = &leon_costs;
1941 break;
1942 case PROCESSOR_LEON3:
1943 case PROCESSOR_LEON3V7:
1944 sparc_costs = &leon3_costs;
1945 break;
1946 case PROCESSOR_SPARCLET:
1947 case PROCESSOR_TSC701:
1948 sparc_costs = &sparclet_costs;
1949 break;
1950 case PROCESSOR_V9:
1951 case PROCESSOR_ULTRASPARC:
1952 sparc_costs = &ultrasparc_costs;
1953 break;
1954 case PROCESSOR_ULTRASPARC3:
1955 sparc_costs = &ultrasparc3_costs;
1956 break;
1957 case PROCESSOR_NIAGARA:
1958 sparc_costs = &niagara_costs;
1959 break;
1960 case PROCESSOR_NIAGARA2:
1961 sparc_costs = &niagara2_costs;
1962 break;
1963 case PROCESSOR_NIAGARA3:
1964 sparc_costs = &niagara3_costs;
1965 break;
1966 case PROCESSOR_NIAGARA4:
1967 sparc_costs = &niagara4_costs;
1968 break;
1969 case PROCESSOR_NIAGARA7:
1970 sparc_costs = &niagara7_costs;
1971 break;
1972 case PROCESSOR_M8:
1973 sparc_costs = &m8_costs;
1974 break;
1975 case PROCESSOR_NATIVE:
1976 gcc_unreachable ();
1977 };
1978
1979 if (sparc_memory_model == SMM_DEFAULT)
1980 {
1981 /* Choose the memory model for the operating system. */
1982 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1983 if (os_default != SMM_DEFAULT)
1984 sparc_memory_model = os_default;
1985 /* Choose the most relaxed model for the processor. */
1986 else if (TARGET_V9)
1987 sparc_memory_model = SMM_RMO;
1988 else if (TARGET_LEON3)
1989 sparc_memory_model = SMM_TSO;
1990 else if (TARGET_LEON)
1991 sparc_memory_model = SMM_SC;
1992 else if (TARGET_V8)
1993 sparc_memory_model = SMM_PSO;
1994 else
1995 sparc_memory_model = SMM_SC;
1996 }
1997
1998 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1999 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
2000 target_flags |= MASK_LONG_DOUBLE_128;
2001 #endif
2002
2003 if (TARGET_DEBUG_OPTIONS)
2004 dump_target_flags ("Final target_flags", target_flags);
2005
2006 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2007 can run at the same time. More important, it is the threshold
2008 defining when additional prefetches will be dropped by the
2009 hardware.
2010
2011 The UltraSPARC-III features a documented prefetch queue with a
2012 size of 8. Additional prefetches issued in the cpu are
2013 dropped.
2014
2015 Niagara processors are different. In these processors prefetches
2016 are handled much like regular loads. The L1 miss buffer is 32
2017 entries, but prefetches start getting affected when 30 entries
2018 become occupied. That occupation could be a mix of regular loads
2019 and prefetches though. And that buffer is shared by all threads.
2020 Once the threshold is reached, if the core is running a single
2021 thread the prefetch will retry. If more than one thread is
2022 running, the prefetch will be dropped.
2023
2024 All this makes it very difficult to determine how many
2025 simultaneous prefetches can be issued simultaneously, even in a
2026 single-threaded program. Experimental results show that setting
2027 this parameter to 32 works well when the number of threads is not
2028 high. */
2029 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2030 ((sparc_cpu == PROCESSOR_ULTRASPARC
2031 || sparc_cpu == PROCESSOR_NIAGARA
2032 || sparc_cpu == PROCESSOR_NIAGARA2
2033 || sparc_cpu == PROCESSOR_NIAGARA3
2034 || sparc_cpu == PROCESSOR_NIAGARA4)
2035 ? 2
2036 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2037 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2038 || sparc_cpu == PROCESSOR_M8)
2039 ? 32 : 3))),
2040 global_options.x_param_values,
2041 global_options_set.x_param_values);
2042
2043 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2044 bytes.
2045
2046 The Oracle SPARC Architecture (previously the UltraSPARC
2047 Architecture) specification states that when a PREFETCH[A]
2048 instruction is executed an implementation-specific amount of data
2049 is prefetched, and that it is at least 64 bytes long (aligned to
2050 at least 64 bytes).
2051
2052 However, this is not correct. The M7 (and implementations prior
2053 to that) does not guarantee a 64B prefetch into a cache if the
2054 line size is smaller. A single cache line is all that is ever
2055 prefetched. So for the M7, where the L1D$ has 32B lines and the
2056 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2057 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2058 is a read_n prefetch, which is the only type which allocates to
2059 the L1.) */
2060 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2061 (sparc_cpu == PROCESSOR_M8
2062 ? 64 : 32),
2063 global_options.x_param_values,
2064 global_options_set.x_param_values);
2065
2066 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2067 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2068 Niagara processors feature a L1D$ of 16KB. */
2069 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2070 ((sparc_cpu == PROCESSOR_ULTRASPARC
2071 || sparc_cpu == PROCESSOR_ULTRASPARC3
2072 || sparc_cpu == PROCESSOR_NIAGARA
2073 || sparc_cpu == PROCESSOR_NIAGARA2
2074 || sparc_cpu == PROCESSOR_NIAGARA3
2075 || sparc_cpu == PROCESSOR_NIAGARA4
2076 || sparc_cpu == PROCESSOR_NIAGARA7
2077 || sparc_cpu == PROCESSOR_M8)
2078 ? 16 : 64),
2079 global_options.x_param_values,
2080 global_options_set.x_param_values);
2081
2082
2083 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2084 that 512 is the default in params.def. */
2085 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2086 ((sparc_cpu == PROCESSOR_NIAGARA4
2087 || sparc_cpu == PROCESSOR_M8)
2088 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2089 ? 256 : 512)),
2090 global_options.x_param_values,
2091 global_options_set.x_param_values);
2092
2093
2094 /* Disable save slot sharing for call-clobbered registers by default.
2095 The IRA sharing algorithm works on single registers only and this
2096 pessimizes for double floating-point registers. */
2097 if (!global_options_set.x_flag_ira_share_save_slots)
2098 flag_ira_share_save_slots = 0;
2099
2100 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2101 redundant 32-to-64-bit extensions. */
2102 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2103 flag_ree = 0;
2104 }
2105 \f
2106 /* Miscellaneous utilities. */
2107
2108 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2109 or branch on register contents instructions. */
2110
2111 int
2112 v9_regcmp_p (enum rtx_code code)
2113 {
2114 return (code == EQ || code == NE || code == GE || code == LT
2115 || code == LE || code == GT);
2116 }
2117
2118 /* Nonzero if OP is a floating point constant which can
2119 be loaded into an integer register using a single
2120 sethi instruction. */
2121
2122 int
2123 fp_sethi_p (rtx op)
2124 {
2125 if (GET_CODE (op) == CONST_DOUBLE)
2126 {
2127 long i;
2128
2129 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2130 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2131 }
2132
2133 return 0;
2134 }
2135
2136 /* Nonzero if OP is a floating point constant which can
2137 be loaded into an integer register using a single
2138 mov instruction. */
2139
2140 int
2141 fp_mov_p (rtx op)
2142 {
2143 if (GET_CODE (op) == CONST_DOUBLE)
2144 {
2145 long i;
2146
2147 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2148 return SPARC_SIMM13_P (i);
2149 }
2150
2151 return 0;
2152 }
2153
2154 /* Nonzero if OP is a floating point constant which can
2155 be loaded into an integer register using a high/losum
2156 instruction sequence. */
2157
2158 int
2159 fp_high_losum_p (rtx op)
2160 {
2161 /* The constraints calling this should only be in
2162 SFmode move insns, so any constant which cannot
2163 be moved using a single insn will do. */
2164 if (GET_CODE (op) == CONST_DOUBLE)
2165 {
2166 long i;
2167
2168 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2169 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2170 }
2171
2172 return 0;
2173 }
2174
2175 /* Return true if the address of LABEL can be loaded by means of the
2176 mov{si,di}_pic_label_ref patterns in PIC mode. */
2177
2178 static bool
2179 can_use_mov_pic_label_ref (rtx label)
2180 {
2181 /* VxWorks does not impose a fixed gap between segments; the run-time
2182 gap can be different from the object-file gap. We therefore can't
2183 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2184 are absolutely sure that X is in the same segment as the GOT.
2185 Unfortunately, the flexibility of linker scripts means that we
2186 can't be sure of that in general, so assume that GOT-relative
2187 accesses are never valid on VxWorks. */
2188 if (TARGET_VXWORKS_RTP)
2189 return false;
2190
2191 /* Similarly, if the label is non-local, it might end up being placed
2192 in a different section than the current one; now mov_pic_label_ref
2193 requires the label and the code to be in the same section. */
2194 if (LABEL_REF_NONLOCAL_P (label))
2195 return false;
2196
2197 /* Finally, if we are reordering basic blocks and partition into hot
2198 and cold sections, this might happen for any label. */
2199 if (flag_reorder_blocks_and_partition)
2200 return false;
2201
2202 return true;
2203 }
2204
2205 /* Expand a move instruction. Return true if all work is done. */
2206
2207 bool
2208 sparc_expand_move (machine_mode mode, rtx *operands)
2209 {
2210 /* Handle sets of MEM first. */
2211 if (GET_CODE (operands[0]) == MEM)
2212 {
2213 /* 0 is a register (or a pair of registers) on SPARC. */
2214 if (register_or_zero_operand (operands[1], mode))
2215 return false;
2216
2217 if (!reload_in_progress)
2218 {
2219 operands[0] = validize_mem (operands[0]);
2220 operands[1] = force_reg (mode, operands[1]);
2221 }
2222 }
2223
2224 /* Fixup TLS cases. */
2225 if (TARGET_HAVE_TLS
2226 && CONSTANT_P (operands[1])
2227 && sparc_tls_referenced_p (operands [1]))
2228 {
2229 operands[1] = sparc_legitimize_tls_address (operands[1]);
2230 return false;
2231 }
2232
2233 /* Fixup PIC cases. */
2234 if (flag_pic && CONSTANT_P (operands[1]))
2235 {
2236 if (pic_address_needs_scratch (operands[1]))
2237 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2238
2239 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2240 if (GET_CODE (operands[1]) == LABEL_REF
2241 && can_use_mov_pic_label_ref (operands[1]))
2242 {
2243 if (mode == SImode)
2244 {
2245 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2246 return true;
2247 }
2248
2249 if (mode == DImode)
2250 {
2251 gcc_assert (TARGET_ARCH64);
2252 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2253 return true;
2254 }
2255 }
2256
2257 if (symbolic_operand (operands[1], mode))
2258 {
2259 operands[1]
2260 = sparc_legitimize_pic_address (operands[1],
2261 reload_in_progress
2262 ? operands[0] : NULL_RTX);
2263 return false;
2264 }
2265 }
2266
2267 /* If we are trying to toss an integer constant into FP registers,
2268 or loading a FP or vector constant, force it into memory. */
2269 if (CONSTANT_P (operands[1])
2270 && REG_P (operands[0])
2271 && (SPARC_FP_REG_P (REGNO (operands[0]))
2272 || SCALAR_FLOAT_MODE_P (mode)
2273 || VECTOR_MODE_P (mode)))
2274 {
2275 /* emit_group_store will send such bogosity to us when it is
2276 not storing directly into memory. So fix this up to avoid
2277 crashes in output_constant_pool. */
2278 if (operands [1] == const0_rtx)
2279 operands[1] = CONST0_RTX (mode);
2280
2281 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2282 always other regs. */
2283 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2284 && (const_zero_operand (operands[1], mode)
2285 || const_all_ones_operand (operands[1], mode)))
2286 return false;
2287
2288 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2289 /* We are able to build any SF constant in integer registers
2290 with at most 2 instructions. */
2291 && (mode == SFmode
2292 /* And any DF constant in integer registers if needed. */
2293 || (mode == DFmode && !can_create_pseudo_p ())))
2294 return false;
2295
2296 operands[1] = force_const_mem (mode, operands[1]);
2297 if (!reload_in_progress)
2298 operands[1] = validize_mem (operands[1]);
2299 return false;
2300 }
2301
2302 /* Accept non-constants and valid constants unmodified. */
2303 if (!CONSTANT_P (operands[1])
2304 || GET_CODE (operands[1]) == HIGH
2305 || input_operand (operands[1], mode))
2306 return false;
2307
2308 switch (mode)
2309 {
2310 case E_QImode:
2311 /* All QImode constants require only one insn, so proceed. */
2312 break;
2313
2314 case E_HImode:
2315 case E_SImode:
2316 sparc_emit_set_const32 (operands[0], operands[1]);
2317 return true;
2318
2319 case E_DImode:
2320 /* input_operand should have filtered out 32-bit mode. */
2321 sparc_emit_set_const64 (operands[0], operands[1]);
2322 return true;
2323
2324 case E_TImode:
2325 {
2326 rtx high, low;
2327 /* TImode isn't available in 32-bit mode. */
2328 split_double (operands[1], &high, &low);
2329 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2330 high));
2331 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2332 low));
2333 }
2334 return true;
2335
2336 default:
2337 gcc_unreachable ();
2338 }
2339
2340 return false;
2341 }
2342
2343 /* Load OP1, a 32-bit constant, into OP0, a register.
2344 We know it can't be done in one insn when we get
2345 here, the move expander guarantees this. */
2346
2347 static void
2348 sparc_emit_set_const32 (rtx op0, rtx op1)
2349 {
2350 machine_mode mode = GET_MODE (op0);
2351 rtx temp = op0;
2352
2353 if (can_create_pseudo_p ())
2354 temp = gen_reg_rtx (mode);
2355
2356 if (GET_CODE (op1) == CONST_INT)
2357 {
2358 gcc_assert (!small_int_operand (op1, mode)
2359 && !const_high_operand (op1, mode));
2360
2361 /* Emit them as real moves instead of a HIGH/LO_SUM,
2362 this way CSE can see everything and reuse intermediate
2363 values if it wants. */
2364 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2365 & ~(HOST_WIDE_INT) 0x3ff)));
2366
2367 emit_insn (gen_rtx_SET (op0,
2368 gen_rtx_IOR (mode, temp,
2369 GEN_INT (INTVAL (op1) & 0x3ff))));
2370 }
2371 else
2372 {
2373 /* A symbol, emit in the traditional way. */
2374 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2375 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2376 }
2377 }
2378
2379 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2380 If TEMP is nonzero, we are forbidden to use any other scratch
2381 registers. Otherwise, we are allowed to generate them as needed.
2382
2383 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2384 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2385
2386 void
2387 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2388 {
2389 rtx cst, temp1, temp2, temp3, temp4, temp5;
2390 rtx ti_temp = 0;
2391
2392 /* Deal with too large offsets. */
2393 if (GET_CODE (op1) == CONST
2394 && GET_CODE (XEXP (op1, 0)) == PLUS
2395 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2396 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2397 {
2398 gcc_assert (!temp);
2399 temp1 = gen_reg_rtx (DImode);
2400 temp2 = gen_reg_rtx (DImode);
2401 sparc_emit_set_const64 (temp2, cst);
2402 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2403 NULL_RTX);
2404 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2405 return;
2406 }
2407
2408 if (temp && GET_MODE (temp) == TImode)
2409 {
2410 ti_temp = temp;
2411 temp = gen_rtx_REG (DImode, REGNO (temp));
2412 }
2413
2414 /* SPARC-V9 code-model support. */
2415 switch (sparc_cmodel)
2416 {
2417 case CM_MEDLOW:
2418 /* The range spanned by all instructions in the object is less
2419 than 2^31 bytes (2GB) and the distance from any instruction
2420 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2421 than 2^31 bytes (2GB).
2422
2423 The executable must be in the low 4TB of the virtual address
2424 space.
2425
2426 sethi %hi(symbol), %temp1
2427 or %temp1, %lo(symbol), %reg */
2428 if (temp)
2429 temp1 = temp; /* op0 is allowed. */
2430 else
2431 temp1 = gen_reg_rtx (DImode);
2432
2433 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2434 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2435 break;
2436
2437 case CM_MEDMID:
2438 /* The range spanned by all instructions in the object is less
2439 than 2^31 bytes (2GB) and the distance from any instruction
2440 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2441 than 2^31 bytes (2GB).
2442
2443 The executable must be in the low 16TB of the virtual address
2444 space.
2445
2446 sethi %h44(symbol), %temp1
2447 or %temp1, %m44(symbol), %temp2
2448 sllx %temp2, 12, %temp3
2449 or %temp3, %l44(symbol), %reg */
2450 if (temp)
2451 {
2452 temp1 = op0;
2453 temp2 = op0;
2454 temp3 = temp; /* op0 is allowed. */
2455 }
2456 else
2457 {
2458 temp1 = gen_reg_rtx (DImode);
2459 temp2 = gen_reg_rtx (DImode);
2460 temp3 = gen_reg_rtx (DImode);
2461 }
2462
2463 emit_insn (gen_seth44 (temp1, op1));
2464 emit_insn (gen_setm44 (temp2, temp1, op1));
2465 emit_insn (gen_rtx_SET (temp3,
2466 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2467 emit_insn (gen_setl44 (op0, temp3, op1));
2468 break;
2469
2470 case CM_MEDANY:
2471 /* The range spanned by all instructions in the object is less
2472 than 2^31 bytes (2GB) and the distance from any instruction
2473 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2474 than 2^31 bytes (2GB).
2475
2476 The executable can be placed anywhere in the virtual address
2477 space.
2478
2479 sethi %hh(symbol), %temp1
2480 sethi %lm(symbol), %temp2
2481 or %temp1, %hm(symbol), %temp3
2482 sllx %temp3, 32, %temp4
2483 or %temp4, %temp2, %temp5
2484 or %temp5, %lo(symbol), %reg */
2485 if (temp)
2486 {
2487 /* It is possible that one of the registers we got for operands[2]
2488 might coincide with that of operands[0] (which is why we made
2489 it TImode). Pick the other one to use as our scratch. */
2490 if (rtx_equal_p (temp, op0))
2491 {
2492 gcc_assert (ti_temp);
2493 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2494 }
2495 temp1 = op0;
2496 temp2 = temp; /* op0 is _not_ allowed, see above. */
2497 temp3 = op0;
2498 temp4 = op0;
2499 temp5 = op0;
2500 }
2501 else
2502 {
2503 temp1 = gen_reg_rtx (DImode);
2504 temp2 = gen_reg_rtx (DImode);
2505 temp3 = gen_reg_rtx (DImode);
2506 temp4 = gen_reg_rtx (DImode);
2507 temp5 = gen_reg_rtx (DImode);
2508 }
2509
2510 emit_insn (gen_sethh (temp1, op1));
2511 emit_insn (gen_setlm (temp2, op1));
2512 emit_insn (gen_sethm (temp3, temp1, op1));
2513 emit_insn (gen_rtx_SET (temp4,
2514 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2515 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2516 emit_insn (gen_setlo (op0, temp5, op1));
2517 break;
2518
2519 case CM_EMBMEDANY:
2520 /* Old old old backwards compatibility kruft here.
2521 Essentially it is MEDLOW with a fixed 64-bit
2522 virtual base added to all data segment addresses.
2523 Text-segment stuff is computed like MEDANY, we can't
2524 reuse the code above because the relocation knobs
2525 look different.
2526
2527 Data segment: sethi %hi(symbol), %temp1
2528 add %temp1, EMBMEDANY_BASE_REG, %temp2
2529 or %temp2, %lo(symbol), %reg */
2530 if (data_segment_operand (op1, GET_MODE (op1)))
2531 {
2532 if (temp)
2533 {
2534 temp1 = temp; /* op0 is allowed. */
2535 temp2 = op0;
2536 }
2537 else
2538 {
2539 temp1 = gen_reg_rtx (DImode);
2540 temp2 = gen_reg_rtx (DImode);
2541 }
2542
2543 emit_insn (gen_embmedany_sethi (temp1, op1));
2544 emit_insn (gen_embmedany_brsum (temp2, temp1));
2545 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2546 }
2547
2548 /* Text segment: sethi %uhi(symbol), %temp1
2549 sethi %hi(symbol), %temp2
2550 or %temp1, %ulo(symbol), %temp3
2551 sllx %temp3, 32, %temp4
2552 or %temp4, %temp2, %temp5
2553 or %temp5, %lo(symbol), %reg */
2554 else
2555 {
2556 if (temp)
2557 {
2558 /* It is possible that one of the registers we got for operands[2]
2559 might coincide with that of operands[0] (which is why we made
2560 it TImode). Pick the other one to use as our scratch. */
2561 if (rtx_equal_p (temp, op0))
2562 {
2563 gcc_assert (ti_temp);
2564 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2565 }
2566 temp1 = op0;
2567 temp2 = temp; /* op0 is _not_ allowed, see above. */
2568 temp3 = op0;
2569 temp4 = op0;
2570 temp5 = op0;
2571 }
2572 else
2573 {
2574 temp1 = gen_reg_rtx (DImode);
2575 temp2 = gen_reg_rtx (DImode);
2576 temp3 = gen_reg_rtx (DImode);
2577 temp4 = gen_reg_rtx (DImode);
2578 temp5 = gen_reg_rtx (DImode);
2579 }
2580
2581 emit_insn (gen_embmedany_textuhi (temp1, op1));
2582 emit_insn (gen_embmedany_texthi (temp2, op1));
2583 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2584 emit_insn (gen_rtx_SET (temp4,
2585 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2586 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2587 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2588 }
2589 break;
2590
2591 default:
2592 gcc_unreachable ();
2593 }
2594 }
2595
2596 /* These avoid problems when cross compiling. If we do not
2597 go through all this hair then the optimizer will see
2598 invalid REG_EQUAL notes or in some cases none at all. */
2599 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2600 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2601 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2602 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2603
2604 /* The optimizer is not to assume anything about exactly
2605 which bits are set for a HIGH, they are unspecified.
2606 Unfortunately this leads to many missed optimizations
2607 during CSE. We mask out the non-HIGH bits, and matches
2608 a plain movdi, to alleviate this problem. */
2609 static rtx
2610 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2611 {
2612 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2613 }
2614
2615 static rtx
2616 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2617 {
2618 return gen_rtx_SET (dest, GEN_INT (val));
2619 }
2620
2621 static rtx
2622 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2623 {
2624 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2625 }
2626
2627 static rtx
2628 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2629 {
2630 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2631 }
2632
2633 /* Worker routines for 64-bit constant formation on arch64.
2634 One of the key things to be doing in these emissions is
2635 to create as many temp REGs as possible. This makes it
2636 possible for half-built constants to be used later when
2637 such values are similar to something required later on.
2638 Without doing this, the optimizer cannot see such
2639 opportunities. */
2640
2641 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2642 unsigned HOST_WIDE_INT, int);
2643
2644 static void
2645 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2646 unsigned HOST_WIDE_INT low_bits, int is_neg)
2647 {
2648 unsigned HOST_WIDE_INT high_bits;
2649
2650 if (is_neg)
2651 high_bits = (~low_bits) & 0xffffffff;
2652 else
2653 high_bits = low_bits;
2654
2655 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2656 if (!is_neg)
2657 {
2658 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2659 }
2660 else
2661 {
2662 /* If we are XOR'ing with -1, then we should emit a one's complement
2663 instead. This way the combiner will notice logical operations
2664 such as ANDN later on and substitute. */
2665 if ((low_bits & 0x3ff) == 0x3ff)
2666 {
2667 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2668 }
2669 else
2670 {
2671 emit_insn (gen_rtx_SET (op0,
2672 gen_safe_XOR64 (temp,
2673 (-(HOST_WIDE_INT)0x400
2674 | (low_bits & 0x3ff)))));
2675 }
2676 }
2677 }
2678
2679 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2680 unsigned HOST_WIDE_INT, int);
2681
2682 static void
2683 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2684 unsigned HOST_WIDE_INT high_bits,
2685 unsigned HOST_WIDE_INT low_immediate,
2686 int shift_count)
2687 {
2688 rtx temp2 = op0;
2689
2690 if ((high_bits & 0xfffffc00) != 0)
2691 {
2692 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2693 if ((high_bits & ~0xfffffc00) != 0)
2694 emit_insn (gen_rtx_SET (op0,
2695 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2696 else
2697 temp2 = temp;
2698 }
2699 else
2700 {
2701 emit_insn (gen_safe_SET64 (temp, high_bits));
2702 temp2 = temp;
2703 }
2704
2705 /* Now shift it up into place. */
2706 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2707 GEN_INT (shift_count))));
2708
2709 /* If there is a low immediate part piece, finish up by
2710 putting that in as well. */
2711 if (low_immediate != 0)
2712 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2713 }
2714
2715 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2716 unsigned HOST_WIDE_INT);
2717
2718 /* Full 64-bit constant decomposition. Even though this is the
2719 'worst' case, we still optimize a few things away. */
2720 static void
2721 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2722 unsigned HOST_WIDE_INT high_bits,
2723 unsigned HOST_WIDE_INT low_bits)
2724 {
2725 rtx sub_temp = op0;
2726
2727 if (can_create_pseudo_p ())
2728 sub_temp = gen_reg_rtx (DImode);
2729
2730 if ((high_bits & 0xfffffc00) != 0)
2731 {
2732 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2733 if ((high_bits & ~0xfffffc00) != 0)
2734 emit_insn (gen_rtx_SET (sub_temp,
2735 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2736 else
2737 sub_temp = temp;
2738 }
2739 else
2740 {
2741 emit_insn (gen_safe_SET64 (temp, high_bits));
2742 sub_temp = temp;
2743 }
2744
2745 if (can_create_pseudo_p ())
2746 {
2747 rtx temp2 = gen_reg_rtx (DImode);
2748 rtx temp3 = gen_reg_rtx (DImode);
2749 rtx temp4 = gen_reg_rtx (DImode);
2750
2751 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2752 GEN_INT (32))));
2753
2754 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2755 if ((low_bits & ~0xfffffc00) != 0)
2756 {
2757 emit_insn (gen_rtx_SET (temp3,
2758 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2759 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2760 }
2761 else
2762 {
2763 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2764 }
2765 }
2766 else
2767 {
2768 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2769 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2770 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2771 int to_shift = 12;
2772
2773 /* We are in the middle of reload, so this is really
2774 painful. However we do still make an attempt to
2775 avoid emitting truly stupid code. */
2776 if (low1 != const0_rtx)
2777 {
2778 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2779 GEN_INT (to_shift))));
2780 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2781 sub_temp = op0;
2782 to_shift = 12;
2783 }
2784 else
2785 {
2786 to_shift += 12;
2787 }
2788 if (low2 != const0_rtx)
2789 {
2790 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2791 GEN_INT (to_shift))));
2792 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2793 sub_temp = op0;
2794 to_shift = 8;
2795 }
2796 else
2797 {
2798 to_shift += 8;
2799 }
2800 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2801 GEN_INT (to_shift))));
2802 if (low3 != const0_rtx)
2803 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2804 /* phew... */
2805 }
2806 }
2807
2808 /* Analyze a 64-bit constant for certain properties. */
2809 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2810 unsigned HOST_WIDE_INT,
2811 int *, int *, int *);
2812
2813 static void
2814 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2815 unsigned HOST_WIDE_INT low_bits,
2816 int *hbsp, int *lbsp, int *abbasp)
2817 {
2818 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2819 int i;
2820
2821 lowest_bit_set = highest_bit_set = -1;
2822 i = 0;
2823 do
2824 {
2825 if ((lowest_bit_set == -1)
2826 && ((low_bits >> i) & 1))
2827 lowest_bit_set = i;
2828 if ((highest_bit_set == -1)
2829 && ((high_bits >> (32 - i - 1)) & 1))
2830 highest_bit_set = (64 - i - 1);
2831 }
2832 while (++i < 32
2833 && ((highest_bit_set == -1)
2834 || (lowest_bit_set == -1)));
2835 if (i == 32)
2836 {
2837 i = 0;
2838 do
2839 {
2840 if ((lowest_bit_set == -1)
2841 && ((high_bits >> i) & 1))
2842 lowest_bit_set = i + 32;
2843 if ((highest_bit_set == -1)
2844 && ((low_bits >> (32 - i - 1)) & 1))
2845 highest_bit_set = 32 - i - 1;
2846 }
2847 while (++i < 32
2848 && ((highest_bit_set == -1)
2849 || (lowest_bit_set == -1)));
2850 }
2851 /* If there are no bits set this should have gone out
2852 as one instruction! */
2853 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2854 all_bits_between_are_set = 1;
2855 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2856 {
2857 if (i < 32)
2858 {
2859 if ((low_bits & (1 << i)) != 0)
2860 continue;
2861 }
2862 else
2863 {
2864 if ((high_bits & (1 << (i - 32))) != 0)
2865 continue;
2866 }
2867 all_bits_between_are_set = 0;
2868 break;
2869 }
2870 *hbsp = highest_bit_set;
2871 *lbsp = lowest_bit_set;
2872 *abbasp = all_bits_between_are_set;
2873 }
2874
2875 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2876
2877 static int
2878 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2879 unsigned HOST_WIDE_INT low_bits)
2880 {
2881 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2882
2883 if (high_bits == 0
2884 || high_bits == 0xffffffff)
2885 return 1;
2886
2887 analyze_64bit_constant (high_bits, low_bits,
2888 &highest_bit_set, &lowest_bit_set,
2889 &all_bits_between_are_set);
2890
2891 if ((highest_bit_set == 63
2892 || lowest_bit_set == 0)
2893 && all_bits_between_are_set != 0)
2894 return 1;
2895
2896 if ((highest_bit_set - lowest_bit_set) < 21)
2897 return 1;
2898
2899 return 0;
2900 }
2901
2902 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2903 unsigned HOST_WIDE_INT,
2904 int, int);
2905
2906 static unsigned HOST_WIDE_INT
2907 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2908 unsigned HOST_WIDE_INT low_bits,
2909 int lowest_bit_set, int shift)
2910 {
2911 HOST_WIDE_INT hi, lo;
2912
2913 if (lowest_bit_set < 32)
2914 {
2915 lo = (low_bits >> lowest_bit_set) << shift;
2916 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2917 }
2918 else
2919 {
2920 lo = 0;
2921 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2922 }
2923 gcc_assert (! (hi & lo));
2924 return (hi | lo);
2925 }
2926
2927 /* Here we are sure to be arch64 and this is an integer constant
2928 being loaded into a register. Emit the most efficient
2929 insn sequence possible. Detection of all the 1-insn cases
2930 has been done already. */
2931 static void
2932 sparc_emit_set_const64 (rtx op0, rtx op1)
2933 {
2934 unsigned HOST_WIDE_INT high_bits, low_bits;
2935 int lowest_bit_set, highest_bit_set;
2936 int all_bits_between_are_set;
2937 rtx temp = 0;
2938
2939 /* Sanity check that we know what we are working with. */
2940 gcc_assert (TARGET_ARCH64
2941 && (GET_CODE (op0) == SUBREG
2942 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2943
2944 if (! can_create_pseudo_p ())
2945 temp = op0;
2946
2947 if (GET_CODE (op1) != CONST_INT)
2948 {
2949 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2950 return;
2951 }
2952
2953 if (! temp)
2954 temp = gen_reg_rtx (DImode);
2955
2956 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2957 low_bits = (INTVAL (op1) & 0xffffffff);
2958
2959 /* low_bits bits 0 --> 31
2960 high_bits bits 32 --> 63 */
2961
2962 analyze_64bit_constant (high_bits, low_bits,
2963 &highest_bit_set, &lowest_bit_set,
2964 &all_bits_between_are_set);
2965
2966 /* First try for a 2-insn sequence. */
2967
2968 /* These situations are preferred because the optimizer can
2969 * do more things with them:
2970 * 1) mov -1, %reg
2971 * sllx %reg, shift, %reg
2972 * 2) mov -1, %reg
2973 * srlx %reg, shift, %reg
2974 * 3) mov some_small_const, %reg
2975 * sllx %reg, shift, %reg
2976 */
2977 if (((highest_bit_set == 63
2978 || lowest_bit_set == 0)
2979 && all_bits_between_are_set != 0)
2980 || ((highest_bit_set - lowest_bit_set) < 12))
2981 {
2982 HOST_WIDE_INT the_const = -1;
2983 int shift = lowest_bit_set;
2984
2985 if ((highest_bit_set != 63
2986 && lowest_bit_set != 0)
2987 || all_bits_between_are_set == 0)
2988 {
2989 the_const =
2990 create_simple_focus_bits (high_bits, low_bits,
2991 lowest_bit_set, 0);
2992 }
2993 else if (lowest_bit_set == 0)
2994 shift = -(63 - highest_bit_set);
2995
2996 gcc_assert (SPARC_SIMM13_P (the_const));
2997 gcc_assert (shift != 0);
2998
2999 emit_insn (gen_safe_SET64 (temp, the_const));
3000 if (shift > 0)
3001 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3002 GEN_INT (shift))));
3003 else if (shift < 0)
3004 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3005 GEN_INT (-shift))));
3006 return;
3007 }
3008
3009 /* Now a range of 22 or less bits set somewhere.
3010 * 1) sethi %hi(focus_bits), %reg
3011 * sllx %reg, shift, %reg
3012 * 2) sethi %hi(focus_bits), %reg
3013 * srlx %reg, shift, %reg
3014 */
3015 if ((highest_bit_set - lowest_bit_set) < 21)
3016 {
3017 unsigned HOST_WIDE_INT focus_bits =
3018 create_simple_focus_bits (high_bits, low_bits,
3019 lowest_bit_set, 10);
3020
3021 gcc_assert (SPARC_SETHI_P (focus_bits));
3022 gcc_assert (lowest_bit_set != 10);
3023
3024 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3025
3026 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3027 if (lowest_bit_set < 10)
3028 emit_insn (gen_rtx_SET (op0,
3029 gen_rtx_LSHIFTRT (DImode, temp,
3030 GEN_INT (10 - lowest_bit_set))));
3031 else if (lowest_bit_set > 10)
3032 emit_insn (gen_rtx_SET (op0,
3033 gen_rtx_ASHIFT (DImode, temp,
3034 GEN_INT (lowest_bit_set - 10))));
3035 return;
3036 }
3037
3038 /* 1) sethi %hi(low_bits), %reg
3039 * or %reg, %lo(low_bits), %reg
3040 * 2) sethi %hi(~low_bits), %reg
3041 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3042 */
3043 if (high_bits == 0
3044 || high_bits == 0xffffffff)
3045 {
3046 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3047 (high_bits == 0xffffffff));
3048 return;
3049 }
3050
3051 /* Now, try 3-insn sequences. */
3052
3053 /* 1) sethi %hi(high_bits), %reg
3054 * or %reg, %lo(high_bits), %reg
3055 * sllx %reg, 32, %reg
3056 */
3057 if (low_bits == 0)
3058 {
3059 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3060 return;
3061 }
3062
3063 /* We may be able to do something quick
3064 when the constant is negated, so try that. */
3065 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3066 (~low_bits) & 0xfffffc00))
3067 {
3068 /* NOTE: The trailing bits get XOR'd so we need the
3069 non-negated bits, not the negated ones. */
3070 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3071
3072 if ((((~high_bits) & 0xffffffff) == 0
3073 && ((~low_bits) & 0x80000000) == 0)
3074 || (((~high_bits) & 0xffffffff) == 0xffffffff
3075 && ((~low_bits) & 0x80000000) != 0))
3076 {
3077 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3078
3079 if ((SPARC_SETHI_P (fast_int)
3080 && (~high_bits & 0xffffffff) == 0)
3081 || SPARC_SIMM13_P (fast_int))
3082 emit_insn (gen_safe_SET64 (temp, fast_int));
3083 else
3084 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3085 }
3086 else
3087 {
3088 rtx negated_const;
3089 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3090 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3091 sparc_emit_set_const64 (temp, negated_const);
3092 }
3093
3094 /* If we are XOR'ing with -1, then we should emit a one's complement
3095 instead. This way the combiner will notice logical operations
3096 such as ANDN later on and substitute. */
3097 if (trailing_bits == 0x3ff)
3098 {
3099 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3100 }
3101 else
3102 {
3103 emit_insn (gen_rtx_SET (op0,
3104 gen_safe_XOR64 (temp,
3105 (-0x400 | trailing_bits))));
3106 }
3107 return;
3108 }
3109
3110 /* 1) sethi %hi(xxx), %reg
3111 * or %reg, %lo(xxx), %reg
3112 * sllx %reg, yyy, %reg
3113 *
3114 * ??? This is just a generalized version of the low_bits==0
3115 * thing above, FIXME...
3116 */
3117 if ((highest_bit_set - lowest_bit_set) < 32)
3118 {
3119 unsigned HOST_WIDE_INT focus_bits =
3120 create_simple_focus_bits (high_bits, low_bits,
3121 lowest_bit_set, 0);
3122
3123 /* We can't get here in this state. */
3124 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3125
3126 /* So what we know is that the set bits straddle the
3127 middle of the 64-bit word. */
3128 sparc_emit_set_const64_quick2 (op0, temp,
3129 focus_bits, 0,
3130 lowest_bit_set);
3131 return;
3132 }
3133
3134 /* 1) sethi %hi(high_bits), %reg
3135 * or %reg, %lo(high_bits), %reg
3136 * sllx %reg, 32, %reg
3137 * or %reg, low_bits, %reg
3138 */
3139 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3140 {
3141 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3142 return;
3143 }
3144
3145 /* The easiest way when all else fails, is full decomposition. */
3146 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3147 }
3148
3149 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3150
3151 static bool
3152 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3153 {
3154 *p1 = SPARC_ICC_REG;
3155 *p2 = SPARC_FCC_REG;
3156 return true;
3157 }
3158
3159 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3160
3161 static unsigned int
3162 sparc_min_arithmetic_precision (void)
3163 {
3164 return 32;
3165 }
3166
3167 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3168 return the mode to be used for the comparison. For floating-point,
3169 CCFP[E]mode is used. CCNZmode should be used when the first operand
3170 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3171 processing is needed. */
3172
3173 machine_mode
3174 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3175 {
3176 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3177 {
3178 switch (op)
3179 {
3180 case EQ:
3181 case NE:
3182 case UNORDERED:
3183 case ORDERED:
3184 case UNLT:
3185 case UNLE:
3186 case UNGT:
3187 case UNGE:
3188 case UNEQ:
3189 case LTGT:
3190 return CCFPmode;
3191
3192 case LT:
3193 case LE:
3194 case GT:
3195 case GE:
3196 return CCFPEmode;
3197
3198 default:
3199 gcc_unreachable ();
3200 }
3201 }
3202 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3203 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3204 && y == const0_rtx)
3205 {
3206 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3207 return CCXNZmode;
3208 else
3209 return CCNZmode;
3210 }
3211 else
3212 {
3213 /* This is for the cmp<mode>_sne pattern. */
3214 if (GET_CODE (x) == NOT && y == constm1_rtx)
3215 {
3216 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3217 return CCXCmode;
3218 else
3219 return CCCmode;
3220 }
3221
3222 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3223 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3224 {
3225 if (GET_CODE (y) == UNSPEC
3226 && (XINT (y, 1) == UNSPEC_ADDV
3227 || XINT (y, 1) == UNSPEC_SUBV
3228 || XINT (y, 1) == UNSPEC_NEGV))
3229 return CCVmode;
3230 else
3231 return CCCmode;
3232 }
3233
3234 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3235 return CCXmode;
3236 else
3237 return CCmode;
3238 }
3239 }
3240
3241 /* Emit the compare insn and return the CC reg for a CODE comparison
3242 with operands X and Y. */
3243
3244 static rtx
3245 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3246 {
3247 machine_mode mode;
3248 rtx cc_reg;
3249
3250 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3251 return x;
3252
3253 mode = SELECT_CC_MODE (code, x, y);
3254
3255 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3256 fcc regs (cse can't tell they're really call clobbered regs and will
3257 remove a duplicate comparison even if there is an intervening function
3258 call - it will then try to reload the cc reg via an int reg which is why
3259 we need the movcc patterns). It is possible to provide the movcc
3260 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3261 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3262 to tell cse that CCFPE mode registers (even pseudos) are call
3263 clobbered. */
3264
3265 /* ??? This is an experiment. Rather than making changes to cse which may
3266 or may not be easy/clean, we do our own cse. This is possible because
3267 we will generate hard registers. Cse knows they're call clobbered (it
3268 doesn't know the same thing about pseudos). If we guess wrong, no big
3269 deal, but if we win, great! */
3270
3271 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3272 #if 1 /* experiment */
3273 {
3274 int reg;
3275 /* We cycle through the registers to ensure they're all exercised. */
3276 static int next_fcc_reg = 0;
3277 /* Previous x,y for each fcc reg. */
3278 static rtx prev_args[4][2];
3279
3280 /* Scan prev_args for x,y. */
3281 for (reg = 0; reg < 4; reg++)
3282 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3283 break;
3284 if (reg == 4)
3285 {
3286 reg = next_fcc_reg;
3287 prev_args[reg][0] = x;
3288 prev_args[reg][1] = y;
3289 next_fcc_reg = (next_fcc_reg + 1) & 3;
3290 }
3291 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3292 }
3293 #else
3294 cc_reg = gen_reg_rtx (mode);
3295 #endif /* ! experiment */
3296 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3297 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3298 else
3299 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3300
3301 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3302 will only result in an unrecognizable insn so no point in asserting. */
3303 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3304
3305 return cc_reg;
3306 }
3307
3308
3309 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3310
3311 rtx
3312 gen_compare_reg (rtx cmp)
3313 {
3314 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3315 }
3316
3317 /* This function is used for v9 only.
3318 DEST is the target of the Scc insn.
3319 CODE is the code for an Scc's comparison.
3320 X and Y are the values we compare.
3321
3322 This function is needed to turn
3323
3324 (set (reg:SI 110)
3325 (gt (reg:CCX 100 %icc)
3326 (const_int 0)))
3327 into
3328 (set (reg:SI 110)
3329 (gt:DI (reg:CCX 100 %icc)
3330 (const_int 0)))
3331
3332 IE: The instruction recognizer needs to see the mode of the comparison to
3333 find the right instruction. We could use "gt:DI" right in the
3334 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3335
3336 static int
3337 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3338 {
3339 if (! TARGET_ARCH64
3340 && (GET_MODE (x) == DImode
3341 || GET_MODE (dest) == DImode))
3342 return 0;
3343
3344 /* Try to use the movrCC insns. */
3345 if (TARGET_ARCH64
3346 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3347 && y == const0_rtx
3348 && v9_regcmp_p (compare_code))
3349 {
3350 rtx op0 = x;
3351 rtx temp;
3352
3353 /* Special case for op0 != 0. This can be done with one instruction if
3354 dest == x. */
3355
3356 if (compare_code == NE
3357 && GET_MODE (dest) == DImode
3358 && rtx_equal_p (op0, dest))
3359 {
3360 emit_insn (gen_rtx_SET (dest,
3361 gen_rtx_IF_THEN_ELSE (DImode,
3362 gen_rtx_fmt_ee (compare_code, DImode,
3363 op0, const0_rtx),
3364 const1_rtx,
3365 dest)));
3366 return 1;
3367 }
3368
3369 if (reg_overlap_mentioned_p (dest, op0))
3370 {
3371 /* Handle the case where dest == x.
3372 We "early clobber" the result. */
3373 op0 = gen_reg_rtx (GET_MODE (x));
3374 emit_move_insn (op0, x);
3375 }
3376
3377 emit_insn (gen_rtx_SET (dest, const0_rtx));
3378 if (GET_MODE (op0) != DImode)
3379 {
3380 temp = gen_reg_rtx (DImode);
3381 convert_move (temp, op0, 0);
3382 }
3383 else
3384 temp = op0;
3385 emit_insn (gen_rtx_SET (dest,
3386 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3387 gen_rtx_fmt_ee (compare_code, DImode,
3388 temp, const0_rtx),
3389 const1_rtx,
3390 dest)));
3391 return 1;
3392 }
3393 else
3394 {
3395 x = gen_compare_reg_1 (compare_code, x, y);
3396 y = const0_rtx;
3397
3398 emit_insn (gen_rtx_SET (dest, const0_rtx));
3399 emit_insn (gen_rtx_SET (dest,
3400 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3401 gen_rtx_fmt_ee (compare_code,
3402 GET_MODE (x), x, y),
3403 const1_rtx, dest)));
3404 return 1;
3405 }
3406 }
3407
3408
3409 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3410 without jumps using the addx/subx instructions. */
3411
3412 bool
3413 emit_scc_insn (rtx operands[])
3414 {
3415 rtx tem, x, y;
3416 enum rtx_code code;
3417 machine_mode mode;
3418
3419 /* The quad-word fp compare library routines all return nonzero to indicate
3420 true, which is different from the equivalent libgcc routines, so we must
3421 handle them specially here. */
3422 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3423 {
3424 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3425 GET_CODE (operands[1]));
3426 operands[2] = XEXP (operands[1], 0);
3427 operands[3] = XEXP (operands[1], 1);
3428 }
3429
3430 code = GET_CODE (operands[1]);
3431 x = operands[2];
3432 y = operands[3];
3433 mode = GET_MODE (x);
3434
3435 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3436 more applications). The exception to this is "reg != 0" which can
3437 be done in one instruction on v9 (so we do it). */
3438 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3439 {
3440 if (y != const0_rtx)
3441 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3442
3443 rtx pat = gen_rtx_SET (operands[0],
3444 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3445 x, const0_rtx));
3446
3447 /* If we can use addx/subx or addxc, add a clobber for CC. */
3448 if (mode == SImode || (code == NE && TARGET_VIS3))
3449 {
3450 rtx clobber
3451 = gen_rtx_CLOBBER (VOIDmode,
3452 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3453 SPARC_ICC_REG));
3454 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3455 }
3456
3457 emit_insn (pat);
3458 return true;
3459 }
3460
3461 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3462 if (TARGET_ARCH64
3463 && mode == DImode
3464 && !((code == LTU || code == GTU) && TARGET_VIS3)
3465 && gen_v9_scc (operands[0], code, x, y))
3466 return true;
3467
3468 /* We can do LTU and GEU using the addx/subx instructions too. And
3469 for GTU/LEU, if both operands are registers swap them and fall
3470 back to the easy case. */
3471 if (code == GTU || code == LEU)
3472 {
3473 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3474 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3475 {
3476 tem = x;
3477 x = y;
3478 y = tem;
3479 code = swap_condition (code);
3480 }
3481 }
3482
3483 if (code == LTU || code == GEU)
3484 {
3485 emit_insn (gen_rtx_SET (operands[0],
3486 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3487 gen_compare_reg_1 (code, x, y),
3488 const0_rtx)));
3489 return true;
3490 }
3491
3492 /* All the posibilities to use addx/subx based sequences has been
3493 exhausted, try for a 3 instruction sequence using v9 conditional
3494 moves. */
3495 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3496 return true;
3497
3498 /* Nope, do branches. */
3499 return false;
3500 }
3501
3502 /* Emit a conditional jump insn for the v9 architecture using comparison code
3503 CODE and jump target LABEL.
3504 This function exists to take advantage of the v9 brxx insns. */
3505
3506 static void
3507 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3508 {
3509 emit_jump_insn (gen_rtx_SET (pc_rtx,
3510 gen_rtx_IF_THEN_ELSE (VOIDmode,
3511 gen_rtx_fmt_ee (code, GET_MODE (op0),
3512 op0, const0_rtx),
3513 gen_rtx_LABEL_REF (VOIDmode, label),
3514 pc_rtx)));
3515 }
3516
3517 /* Emit a conditional jump insn for the UA2011 architecture using
3518 comparison code CODE and jump target LABEL. This function exists
3519 to take advantage of the UA2011 Compare and Branch insns. */
3520
3521 static void
3522 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3523 {
3524 rtx if_then_else;
3525
3526 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3527 gen_rtx_fmt_ee(code, GET_MODE(op0),
3528 op0, op1),
3529 gen_rtx_LABEL_REF (VOIDmode, label),
3530 pc_rtx);
3531
3532 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3533 }
3534
3535 void
3536 emit_conditional_branch_insn (rtx operands[])
3537 {
3538 /* The quad-word fp compare library routines all return nonzero to indicate
3539 true, which is different from the equivalent libgcc routines, so we must
3540 handle them specially here. */
3541 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3542 {
3543 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3544 GET_CODE (operands[0]));
3545 operands[1] = XEXP (operands[0], 0);
3546 operands[2] = XEXP (operands[0], 1);
3547 }
3548
3549 /* If we can tell early on that the comparison is against a constant
3550 that won't fit in the 5-bit signed immediate field of a cbcond,
3551 use one of the other v9 conditional branch sequences. */
3552 if (TARGET_CBCOND
3553 && GET_CODE (operands[1]) == REG
3554 && (GET_MODE (operands[1]) == SImode
3555 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3556 && (GET_CODE (operands[2]) != CONST_INT
3557 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3558 {
3559 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3560 return;
3561 }
3562
3563 if (TARGET_ARCH64 && operands[2] == const0_rtx
3564 && GET_CODE (operands[1]) == REG
3565 && GET_MODE (operands[1]) == DImode)
3566 {
3567 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3568 return;
3569 }
3570
3571 operands[1] = gen_compare_reg (operands[0]);
3572 operands[2] = const0_rtx;
3573 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3574 operands[1], operands[2]);
3575 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3576 operands[3]));
3577 }
3578
3579
3580 /* Generate a DFmode part of a hard TFmode register.
3581 REG is the TFmode hard register, LOW is 1 for the
3582 low 64bit of the register and 0 otherwise.
3583 */
3584 rtx
3585 gen_df_reg (rtx reg, int low)
3586 {
3587 int regno = REGNO (reg);
3588
3589 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3590 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3591 return gen_rtx_REG (DFmode, regno);
3592 }
3593 \f
3594 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3595 Unlike normal calls, TFmode operands are passed by reference. It is
3596 assumed that no more than 3 operands are required. */
3597
3598 static void
3599 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3600 {
3601 rtx ret_slot = NULL, arg[3], func_sym;
3602 int i;
3603
3604 /* We only expect to be called for conversions, unary, and binary ops. */
3605 gcc_assert (nargs == 2 || nargs == 3);
3606
3607 for (i = 0; i < nargs; ++i)
3608 {
3609 rtx this_arg = operands[i];
3610 rtx this_slot;
3611
3612 /* TFmode arguments and return values are passed by reference. */
3613 if (GET_MODE (this_arg) == TFmode)
3614 {
3615 int force_stack_temp;
3616
3617 force_stack_temp = 0;
3618 if (TARGET_BUGGY_QP_LIB && i == 0)
3619 force_stack_temp = 1;
3620
3621 if (GET_CODE (this_arg) == MEM
3622 && ! force_stack_temp)
3623 {
3624 tree expr = MEM_EXPR (this_arg);
3625 if (expr)
3626 mark_addressable (expr);
3627 this_arg = XEXP (this_arg, 0);
3628 }
3629 else if (CONSTANT_P (this_arg)
3630 && ! force_stack_temp)
3631 {
3632 this_slot = force_const_mem (TFmode, this_arg);
3633 this_arg = XEXP (this_slot, 0);
3634 }
3635 else
3636 {
3637 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3638
3639 /* Operand 0 is the return value. We'll copy it out later. */
3640 if (i > 0)
3641 emit_move_insn (this_slot, this_arg);
3642 else
3643 ret_slot = this_slot;
3644
3645 this_arg = XEXP (this_slot, 0);
3646 }
3647 }
3648
3649 arg[i] = this_arg;
3650 }
3651
3652 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3653
3654 if (GET_MODE (operands[0]) == TFmode)
3655 {
3656 if (nargs == 2)
3657 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3658 arg[0], GET_MODE (arg[0]),
3659 arg[1], GET_MODE (arg[1]));
3660 else
3661 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3662 arg[0], GET_MODE (arg[0]),
3663 arg[1], GET_MODE (arg[1]),
3664 arg[2], GET_MODE (arg[2]));
3665
3666 if (ret_slot)
3667 emit_move_insn (operands[0], ret_slot);
3668 }
3669 else
3670 {
3671 rtx ret;
3672
3673 gcc_assert (nargs == 2);
3674
3675 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3676 GET_MODE (operands[0]),
3677 arg[1], GET_MODE (arg[1]));
3678
3679 if (ret != operands[0])
3680 emit_move_insn (operands[0], ret);
3681 }
3682 }
3683
3684 /* Expand soft-float TFmode calls to sparc abi routines. */
3685
3686 static void
3687 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3688 {
3689 const char *func;
3690
3691 switch (code)
3692 {
3693 case PLUS:
3694 func = "_Qp_add";
3695 break;
3696 case MINUS:
3697 func = "_Qp_sub";
3698 break;
3699 case MULT:
3700 func = "_Qp_mul";
3701 break;
3702 case DIV:
3703 func = "_Qp_div";
3704 break;
3705 default:
3706 gcc_unreachable ();
3707 }
3708
3709 emit_soft_tfmode_libcall (func, 3, operands);
3710 }
3711
3712 static void
3713 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3714 {
3715 const char *func;
3716
3717 gcc_assert (code == SQRT);
3718 func = "_Qp_sqrt";
3719
3720 emit_soft_tfmode_libcall (func, 2, operands);
3721 }
3722
3723 static void
3724 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3725 {
3726 const char *func;
3727
3728 switch (code)
3729 {
3730 case FLOAT_EXTEND:
3731 switch (GET_MODE (operands[1]))
3732 {
3733 case E_SFmode:
3734 func = "_Qp_stoq";
3735 break;
3736 case E_DFmode:
3737 func = "_Qp_dtoq";
3738 break;
3739 default:
3740 gcc_unreachable ();
3741 }
3742 break;
3743
3744 case FLOAT_TRUNCATE:
3745 switch (GET_MODE (operands[0]))
3746 {
3747 case E_SFmode:
3748 func = "_Qp_qtos";
3749 break;
3750 case E_DFmode:
3751 func = "_Qp_qtod";
3752 break;
3753 default:
3754 gcc_unreachable ();
3755 }
3756 break;
3757
3758 case FLOAT:
3759 switch (GET_MODE (operands[1]))
3760 {
3761 case E_SImode:
3762 func = "_Qp_itoq";
3763 if (TARGET_ARCH64)
3764 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3765 break;
3766 case E_DImode:
3767 func = "_Qp_xtoq";
3768 break;
3769 default:
3770 gcc_unreachable ();
3771 }
3772 break;
3773
3774 case UNSIGNED_FLOAT:
3775 switch (GET_MODE (operands[1]))
3776 {
3777 case E_SImode:
3778 func = "_Qp_uitoq";
3779 if (TARGET_ARCH64)
3780 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3781 break;
3782 case E_DImode:
3783 func = "_Qp_uxtoq";
3784 break;
3785 default:
3786 gcc_unreachable ();
3787 }
3788 break;
3789
3790 case FIX:
3791 switch (GET_MODE (operands[0]))
3792 {
3793 case E_SImode:
3794 func = "_Qp_qtoi";
3795 break;
3796 case E_DImode:
3797 func = "_Qp_qtox";
3798 break;
3799 default:
3800 gcc_unreachable ();
3801 }
3802 break;
3803
3804 case UNSIGNED_FIX:
3805 switch (GET_MODE (operands[0]))
3806 {
3807 case E_SImode:
3808 func = "_Qp_qtoui";
3809 break;
3810 case E_DImode:
3811 func = "_Qp_qtoux";
3812 break;
3813 default:
3814 gcc_unreachable ();
3815 }
3816 break;
3817
3818 default:
3819 gcc_unreachable ();
3820 }
3821
3822 emit_soft_tfmode_libcall (func, 2, operands);
3823 }
3824
3825 /* Expand a hard-float tfmode operation. All arguments must be in
3826 registers. */
3827
3828 static void
3829 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3830 {
3831 rtx op, dest;
3832
3833 if (GET_RTX_CLASS (code) == RTX_UNARY)
3834 {
3835 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3836 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3837 }
3838 else
3839 {
3840 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3841 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3842 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3843 operands[1], operands[2]);
3844 }
3845
3846 if (register_operand (operands[0], VOIDmode))
3847 dest = operands[0];
3848 else
3849 dest = gen_reg_rtx (GET_MODE (operands[0]));
3850
3851 emit_insn (gen_rtx_SET (dest, op));
3852
3853 if (dest != operands[0])
3854 emit_move_insn (operands[0], dest);
3855 }
3856
3857 void
3858 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3859 {
3860 if (TARGET_HARD_QUAD)
3861 emit_hard_tfmode_operation (code, operands);
3862 else
3863 emit_soft_tfmode_binop (code, operands);
3864 }
3865
3866 void
3867 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3868 {
3869 if (TARGET_HARD_QUAD)
3870 emit_hard_tfmode_operation (code, operands);
3871 else
3872 emit_soft_tfmode_unop (code, operands);
3873 }
3874
3875 void
3876 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3877 {
3878 if (TARGET_HARD_QUAD)
3879 emit_hard_tfmode_operation (code, operands);
3880 else
3881 emit_soft_tfmode_cvt (code, operands);
3882 }
3883 \f
3884 /* Return nonzero if a branch/jump/call instruction will be emitting
3885 nop into its delay slot. */
3886
3887 int
3888 empty_delay_slot (rtx_insn *insn)
3889 {
3890 rtx seq;
3891
3892 /* If no previous instruction (should not happen), return true. */
3893 if (PREV_INSN (insn) == NULL)
3894 return 1;
3895
3896 seq = NEXT_INSN (PREV_INSN (insn));
3897 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3898 return 0;
3899
3900 return 1;
3901 }
3902
3903 /* Return nonzero if we should emit a nop after a cbcond instruction.
3904 The cbcond instruction does not have a delay slot, however there is
3905 a severe performance penalty if a control transfer appears right
3906 after a cbcond. Therefore we emit a nop when we detect this
3907 situation. */
3908
3909 int
3910 emit_cbcond_nop (rtx_insn *insn)
3911 {
3912 rtx next = next_active_insn (insn);
3913
3914 if (!next)
3915 return 1;
3916
3917 if (NONJUMP_INSN_P (next)
3918 && GET_CODE (PATTERN (next)) == SEQUENCE)
3919 next = XVECEXP (PATTERN (next), 0, 0);
3920 else if (CALL_P (next)
3921 && GET_CODE (PATTERN (next)) == PARALLEL)
3922 {
3923 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3924
3925 if (GET_CODE (delay) == RETURN)
3926 {
3927 /* It's a sibling call. Do not emit the nop if we're going
3928 to emit something other than the jump itself as the first
3929 instruction of the sibcall sequence. */
3930 if (sparc_leaf_function_p || TARGET_FLAT)
3931 return 0;
3932 }
3933 }
3934
3935 if (NONJUMP_INSN_P (next))
3936 return 0;
3937
3938 return 1;
3939 }
3940
3941 /* Return nonzero if TRIAL can go into the call delay slot. */
3942
3943 int
3944 eligible_for_call_delay (rtx_insn *trial)
3945 {
3946 rtx pat;
3947
3948 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3949 return 0;
3950
3951 /* Binutils allows
3952 call __tls_get_addr, %tgd_call (foo)
3953 add %l7, %o0, %o0, %tgd_add (foo)
3954 while Sun as/ld does not. */
3955 if (TARGET_GNU_TLS || !TARGET_TLS)
3956 return 1;
3957
3958 pat = PATTERN (trial);
3959
3960 /* We must reject tgd_add{32|64}, i.e.
3961 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3962 and tldm_add{32|64}, i.e.
3963 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3964 for Sun as/ld. */
3965 if (GET_CODE (pat) == SET
3966 && GET_CODE (SET_SRC (pat)) == PLUS)
3967 {
3968 rtx unspec = XEXP (SET_SRC (pat), 1);
3969
3970 if (GET_CODE (unspec) == UNSPEC
3971 && (XINT (unspec, 1) == UNSPEC_TLSGD
3972 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3973 return 0;
3974 }
3975
3976 return 1;
3977 }
3978
3979 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3980 instruction. RETURN_P is true if the v9 variant 'return' is to be
3981 considered in the test too.
3982
3983 TRIAL must be a SET whose destination is a REG appropriate for the
3984 'restore' instruction or, if RETURN_P is true, for the 'return'
3985 instruction. */
3986
3987 static int
3988 eligible_for_restore_insn (rtx trial, bool return_p)
3989 {
3990 rtx pat = PATTERN (trial);
3991 rtx src = SET_SRC (pat);
3992 bool src_is_freg = false;
3993 rtx src_reg;
3994
3995 /* Since we now can do moves between float and integer registers when
3996 VIS3 is enabled, we have to catch this case. We can allow such
3997 moves when doing a 'return' however. */
3998 src_reg = src;
3999 if (GET_CODE (src_reg) == SUBREG)
4000 src_reg = SUBREG_REG (src_reg);
4001 if (GET_CODE (src_reg) == REG
4002 && SPARC_FP_REG_P (REGNO (src_reg)))
4003 src_is_freg = true;
4004
4005 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4006 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4007 && arith_operand (src, GET_MODE (src))
4008 && ! src_is_freg)
4009 {
4010 if (TARGET_ARCH64)
4011 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4012 else
4013 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4014 }
4015
4016 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4017 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4018 && arith_double_operand (src, GET_MODE (src))
4019 && ! src_is_freg)
4020 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4021
4022 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4023 else if (! TARGET_FPU && register_operand (src, SFmode))
4024 return 1;
4025
4026 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4027 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4028 return 1;
4029
4030 /* If we have the 'return' instruction, anything that does not use
4031 local or output registers and can go into a delay slot wins. */
4032 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4033 return 1;
4034
4035 /* The 'restore src1,src2,dest' pattern for SImode. */
4036 else if (GET_CODE (src) == PLUS
4037 && register_operand (XEXP (src, 0), SImode)
4038 && arith_operand (XEXP (src, 1), SImode))
4039 return 1;
4040
4041 /* The 'restore src1,src2,dest' pattern for DImode. */
4042 else if (GET_CODE (src) == PLUS
4043 && register_operand (XEXP (src, 0), DImode)
4044 && arith_double_operand (XEXP (src, 1), DImode))
4045 return 1;
4046
4047 /* The 'restore src1,%lo(src2),dest' pattern. */
4048 else if (GET_CODE (src) == LO_SUM
4049 && ! TARGET_CM_MEDMID
4050 && ((register_operand (XEXP (src, 0), SImode)
4051 && immediate_operand (XEXP (src, 1), SImode))
4052 || (TARGET_ARCH64
4053 && register_operand (XEXP (src, 0), DImode)
4054 && immediate_operand (XEXP (src, 1), DImode))))
4055 return 1;
4056
4057 /* The 'restore src,src,dest' pattern. */
4058 else if (GET_CODE (src) == ASHIFT
4059 && (register_operand (XEXP (src, 0), SImode)
4060 || register_operand (XEXP (src, 0), DImode))
4061 && XEXP (src, 1) == const1_rtx)
4062 return 1;
4063
4064 return 0;
4065 }
4066
4067 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4068
4069 int
4070 eligible_for_return_delay (rtx_insn *trial)
4071 {
4072 int regno;
4073 rtx pat;
4074
4075 /* If the function uses __builtin_eh_return, the eh_return machinery
4076 occupies the delay slot. */
4077 if (crtl->calls_eh_return)
4078 return 0;
4079
4080 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4081 return 0;
4082
4083 /* In the case of a leaf or flat function, anything can go into the slot. */
4084 if (sparc_leaf_function_p || TARGET_FLAT)
4085 return 1;
4086
4087 if (!NONJUMP_INSN_P (trial))
4088 return 0;
4089
4090 pat = PATTERN (trial);
4091 if (GET_CODE (pat) == PARALLEL)
4092 {
4093 int i;
4094
4095 if (! TARGET_V9)
4096 return 0;
4097 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4098 {
4099 rtx expr = XVECEXP (pat, 0, i);
4100 if (GET_CODE (expr) != SET)
4101 return 0;
4102 if (GET_CODE (SET_DEST (expr)) != REG)
4103 return 0;
4104 regno = REGNO (SET_DEST (expr));
4105 if (regno >= 8 && regno < 24)
4106 return 0;
4107 }
4108 return !epilogue_renumber (&pat, 1);
4109 }
4110
4111 if (GET_CODE (pat) != SET)
4112 return 0;
4113
4114 if (GET_CODE (SET_DEST (pat)) != REG)
4115 return 0;
4116
4117 regno = REGNO (SET_DEST (pat));
4118
4119 /* Otherwise, only operations which can be done in tandem with
4120 a `restore' or `return' insn can go into the delay slot. */
4121 if (regno >= 8 && regno < 24)
4122 return 0;
4123
4124 /* If this instruction sets up floating point register and we have a return
4125 instruction, it can probably go in. But restore will not work
4126 with FP_REGS. */
4127 if (! SPARC_INT_REG_P (regno))
4128 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4129
4130 return eligible_for_restore_insn (trial, true);
4131 }
4132
4133 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4134
4135 int
4136 eligible_for_sibcall_delay (rtx_insn *trial)
4137 {
4138 rtx pat;
4139
4140 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4141 return 0;
4142
4143 if (!NONJUMP_INSN_P (trial))
4144 return 0;
4145
4146 pat = PATTERN (trial);
4147
4148 if (sparc_leaf_function_p || TARGET_FLAT)
4149 {
4150 /* If the tail call is done using the call instruction,
4151 we have to restore %o7 in the delay slot. */
4152 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4153 return 0;
4154
4155 /* %g1 is used to build the function address */
4156 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4157 return 0;
4158
4159 return 1;
4160 }
4161
4162 if (GET_CODE (pat) != SET)
4163 return 0;
4164
4165 /* Otherwise, only operations which can be done in tandem with
4166 a `restore' insn can go into the delay slot. */
4167 if (GET_CODE (SET_DEST (pat)) != REG
4168 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4169 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4170 return 0;
4171
4172 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4173 in most cases. */
4174 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4175 return 0;
4176
4177 return eligible_for_restore_insn (trial, false);
4178 }
4179 \f
4180 /* Determine if it's legal to put X into the constant pool. This
4181 is not possible if X contains the address of a symbol that is
4182 not constant (TLS) or not known at final link time (PIC). */
4183
4184 static bool
4185 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4186 {
4187 switch (GET_CODE (x))
4188 {
4189 case CONST_INT:
4190 case CONST_WIDE_INT:
4191 case CONST_DOUBLE:
4192 case CONST_VECTOR:
4193 /* Accept all non-symbolic constants. */
4194 return false;
4195
4196 case LABEL_REF:
4197 /* Labels are OK iff we are non-PIC. */
4198 return flag_pic != 0;
4199
4200 case SYMBOL_REF:
4201 /* 'Naked' TLS symbol references are never OK,
4202 non-TLS symbols are OK iff we are non-PIC. */
4203 if (SYMBOL_REF_TLS_MODEL (x))
4204 return true;
4205 else
4206 return flag_pic != 0;
4207
4208 case CONST:
4209 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4210 case PLUS:
4211 case MINUS:
4212 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4213 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4214 case UNSPEC:
4215 return true;
4216 default:
4217 gcc_unreachable ();
4218 }
4219 }
4220 \f
4221 /* Global Offset Table support. */
4222 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4223 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4224
4225 /* Return the SYMBOL_REF for the Global Offset Table. */
4226
4227 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4228
4229 static rtx
4230 sparc_got (void)
4231 {
4232 if (!sparc_got_symbol)
4233 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4234
4235 return sparc_got_symbol;
4236 }
4237
4238 /* Ensure that we are not using patterns that are not OK with PIC. */
4239
4240 int
4241 check_pic (int i)
4242 {
4243 rtx op;
4244
4245 switch (flag_pic)
4246 {
4247 case 1:
4248 op = recog_data.operand[i];
4249 gcc_assert (GET_CODE (op) != SYMBOL_REF
4250 && (GET_CODE (op) != CONST
4251 || (GET_CODE (XEXP (op, 0)) == MINUS
4252 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4253 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4254 /* fallthrough */
4255 case 2:
4256 default:
4257 return 1;
4258 }
4259 }
4260
4261 /* Return true if X is an address which needs a temporary register when
4262 reloaded while generating PIC code. */
4263
4264 int
4265 pic_address_needs_scratch (rtx x)
4266 {
4267 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4268 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4269 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4270 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4271 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4272 return 1;
4273
4274 return 0;
4275 }
4276
4277 /* Determine if a given RTX is a valid constant. We already know this
4278 satisfies CONSTANT_P. */
4279
4280 static bool
4281 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4282 {
4283 switch (GET_CODE (x))
4284 {
4285 case CONST:
4286 case SYMBOL_REF:
4287 if (sparc_tls_referenced_p (x))
4288 return false;
4289 break;
4290
4291 case CONST_DOUBLE:
4292 /* Floating point constants are generally not ok.
4293 The only exception is 0.0 and all-ones in VIS. */
4294 if (TARGET_VIS
4295 && SCALAR_FLOAT_MODE_P (mode)
4296 && (const_zero_operand (x, mode)
4297 || const_all_ones_operand (x, mode)))
4298 return true;
4299
4300 return false;
4301
4302 case CONST_VECTOR:
4303 /* Vector constants are generally not ok.
4304 The only exception is 0 or -1 in VIS. */
4305 if (TARGET_VIS
4306 && (const_zero_operand (x, mode)
4307 || const_all_ones_operand (x, mode)))
4308 return true;
4309
4310 return false;
4311
4312 default:
4313 break;
4314 }
4315
4316 return true;
4317 }
4318
4319 /* Determine if a given RTX is a valid constant address. */
4320
4321 bool
4322 constant_address_p (rtx x)
4323 {
4324 switch (GET_CODE (x))
4325 {
4326 case LABEL_REF:
4327 case CONST_INT:
4328 case HIGH:
4329 return true;
4330
4331 case CONST:
4332 if (flag_pic && pic_address_needs_scratch (x))
4333 return false;
4334 return sparc_legitimate_constant_p (Pmode, x);
4335
4336 case SYMBOL_REF:
4337 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4338
4339 default:
4340 return false;
4341 }
4342 }
4343
4344 /* Nonzero if the constant value X is a legitimate general operand
4345 when generating PIC code. It is given that flag_pic is on and
4346 that X satisfies CONSTANT_P. */
4347
4348 bool
4349 legitimate_pic_operand_p (rtx x)
4350 {
4351 if (pic_address_needs_scratch (x))
4352 return false;
4353 if (sparc_tls_referenced_p (x))
4354 return false;
4355 return true;
4356 }
4357
4358 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4359 (CONST_INT_P (X) \
4360 && INTVAL (X) >= -0x1000 \
4361 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4362
4363 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4364 (CONST_INT_P (X) \
4365 && INTVAL (X) >= -0x1000 \
4366 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4367
4368 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4369
4370 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4371 ordinarily. This changes a bit when generating PIC. */
4372
4373 static bool
4374 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4375 {
4376 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4377
4378 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4379 rs1 = addr;
4380 else if (GET_CODE (addr) == PLUS)
4381 {
4382 rs1 = XEXP (addr, 0);
4383 rs2 = XEXP (addr, 1);
4384
4385 /* Canonicalize. REG comes first, if there are no regs,
4386 LO_SUM comes first. */
4387 if (!REG_P (rs1)
4388 && GET_CODE (rs1) != SUBREG
4389 && (REG_P (rs2)
4390 || GET_CODE (rs2) == SUBREG
4391 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4392 {
4393 rs1 = XEXP (addr, 1);
4394 rs2 = XEXP (addr, 0);
4395 }
4396
4397 if ((flag_pic == 1
4398 && rs1 == pic_offset_table_rtx
4399 && !REG_P (rs2)
4400 && GET_CODE (rs2) != SUBREG
4401 && GET_CODE (rs2) != LO_SUM
4402 && GET_CODE (rs2) != MEM
4403 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4404 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4405 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4406 || ((REG_P (rs1)
4407 || GET_CODE (rs1) == SUBREG)
4408 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4409 {
4410 imm1 = rs2;
4411 rs2 = NULL;
4412 }
4413 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4414 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4415 {
4416 /* We prohibit REG + REG for TFmode when there are no quad move insns
4417 and we consequently need to split. We do this because REG+REG
4418 is not an offsettable address. If we get the situation in reload
4419 where source and destination of a movtf pattern are both MEMs with
4420 REG+REG address, then only one of them gets converted to an
4421 offsettable address. */
4422 if (mode == TFmode
4423 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4424 return 0;
4425
4426 /* Likewise for TImode, but in all cases. */
4427 if (mode == TImode)
4428 return 0;
4429
4430 /* We prohibit REG + REG on ARCH32 if not optimizing for
4431 DFmode/DImode because then mem_min_alignment is likely to be zero
4432 after reload and the forced split would lack a matching splitter
4433 pattern. */
4434 if (TARGET_ARCH32 && !optimize
4435 && (mode == DFmode || mode == DImode))
4436 return 0;
4437 }
4438 else if (USE_AS_OFFSETABLE_LO10
4439 && GET_CODE (rs1) == LO_SUM
4440 && TARGET_ARCH64
4441 && ! TARGET_CM_MEDMID
4442 && RTX_OK_FOR_OLO10_P (rs2, mode))
4443 {
4444 rs2 = NULL;
4445 imm1 = XEXP (rs1, 1);
4446 rs1 = XEXP (rs1, 0);
4447 if (!CONSTANT_P (imm1)
4448 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4449 return 0;
4450 }
4451 }
4452 else if (GET_CODE (addr) == LO_SUM)
4453 {
4454 rs1 = XEXP (addr, 0);
4455 imm1 = XEXP (addr, 1);
4456
4457 if (!CONSTANT_P (imm1)
4458 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4459 return 0;
4460
4461 /* We can't allow TFmode in 32-bit mode, because an offset greater
4462 than the alignment (8) may cause the LO_SUM to overflow. */
4463 if (mode == TFmode && TARGET_ARCH32)
4464 return 0;
4465
4466 /* During reload, accept the HIGH+LO_SUM construct generated by
4467 sparc_legitimize_reload_address. */
4468 if (reload_in_progress
4469 && GET_CODE (rs1) == HIGH
4470 && XEXP (rs1, 0) == imm1)
4471 return 1;
4472 }
4473 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4474 return 1;
4475 else
4476 return 0;
4477
4478 if (GET_CODE (rs1) == SUBREG)
4479 rs1 = SUBREG_REG (rs1);
4480 if (!REG_P (rs1))
4481 return 0;
4482
4483 if (rs2)
4484 {
4485 if (GET_CODE (rs2) == SUBREG)
4486 rs2 = SUBREG_REG (rs2);
4487 if (!REG_P (rs2))
4488 return 0;
4489 }
4490
4491 if (strict)
4492 {
4493 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4494 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4495 return 0;
4496 }
4497 else
4498 {
4499 if ((! SPARC_INT_REG_P (REGNO (rs1))
4500 && REGNO (rs1) != FRAME_POINTER_REGNUM
4501 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4502 || (rs2
4503 && (! SPARC_INT_REG_P (REGNO (rs2))
4504 && REGNO (rs2) != FRAME_POINTER_REGNUM
4505 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4506 return 0;
4507 }
4508 return 1;
4509 }
4510
4511 /* Return the SYMBOL_REF for the tls_get_addr function. */
4512
4513 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4514
4515 static rtx
4516 sparc_tls_get_addr (void)
4517 {
4518 if (!sparc_tls_symbol)
4519 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4520
4521 return sparc_tls_symbol;
4522 }
4523
4524 /* Return the Global Offset Table to be used in TLS mode. */
4525
4526 static rtx
4527 sparc_tls_got (void)
4528 {
4529 /* In PIC mode, this is just the PIC offset table. */
4530 if (flag_pic)
4531 {
4532 crtl->uses_pic_offset_table = 1;
4533 return pic_offset_table_rtx;
4534 }
4535
4536 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4537 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4538 if (TARGET_SUN_TLS && TARGET_ARCH32)
4539 {
4540 load_got_register ();
4541 return global_offset_table_rtx;
4542 }
4543
4544 /* In all other cases, we load a new pseudo with the GOT symbol. */
4545 return copy_to_reg (sparc_got ());
4546 }
4547
4548 /* Return true if X contains a thread-local symbol. */
4549
4550 static bool
4551 sparc_tls_referenced_p (rtx x)
4552 {
4553 if (!TARGET_HAVE_TLS)
4554 return false;
4555
4556 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4557 x = XEXP (XEXP (x, 0), 0);
4558
4559 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4560 return true;
4561
4562 /* That's all we handle in sparc_legitimize_tls_address for now. */
4563 return false;
4564 }
4565
4566 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4567 this (thread-local) address. */
4568
4569 static rtx
4570 sparc_legitimize_tls_address (rtx addr)
4571 {
4572 rtx temp1, temp2, temp3, ret, o0, got;
4573 rtx_insn *insn;
4574
4575 gcc_assert (can_create_pseudo_p ());
4576
4577 if (GET_CODE (addr) == SYMBOL_REF)
4578 switch (SYMBOL_REF_TLS_MODEL (addr))
4579 {
4580 case TLS_MODEL_GLOBAL_DYNAMIC:
4581 start_sequence ();
4582 temp1 = gen_reg_rtx (SImode);
4583 temp2 = gen_reg_rtx (SImode);
4584 ret = gen_reg_rtx (Pmode);
4585 o0 = gen_rtx_REG (Pmode, 8);
4586 got = sparc_tls_got ();
4587 emit_insn (gen_tgd_hi22 (temp1, addr));
4588 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4589 if (TARGET_ARCH32)
4590 {
4591 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4592 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4593 addr, const1_rtx));
4594 }
4595 else
4596 {
4597 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4598 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4599 addr, const1_rtx));
4600 }
4601 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4602 insn = get_insns ();
4603 end_sequence ();
4604 emit_libcall_block (insn, ret, o0, addr);
4605 break;
4606
4607 case TLS_MODEL_LOCAL_DYNAMIC:
4608 start_sequence ();
4609 temp1 = gen_reg_rtx (SImode);
4610 temp2 = gen_reg_rtx (SImode);
4611 temp3 = gen_reg_rtx (Pmode);
4612 ret = gen_reg_rtx (Pmode);
4613 o0 = gen_rtx_REG (Pmode, 8);
4614 got = sparc_tls_got ();
4615 emit_insn (gen_tldm_hi22 (temp1));
4616 emit_insn (gen_tldm_lo10 (temp2, temp1));
4617 if (TARGET_ARCH32)
4618 {
4619 emit_insn (gen_tldm_add32 (o0, got, temp2));
4620 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4621 const1_rtx));
4622 }
4623 else
4624 {
4625 emit_insn (gen_tldm_add64 (o0, got, temp2));
4626 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4627 const1_rtx));
4628 }
4629 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4630 insn = get_insns ();
4631 end_sequence ();
4632 emit_libcall_block (insn, temp3, o0,
4633 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4634 UNSPEC_TLSLD_BASE));
4635 temp1 = gen_reg_rtx (SImode);
4636 temp2 = gen_reg_rtx (SImode);
4637 emit_insn (gen_tldo_hix22 (temp1, addr));
4638 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4639 if (TARGET_ARCH32)
4640 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4641 else
4642 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4643 break;
4644
4645 case TLS_MODEL_INITIAL_EXEC:
4646 temp1 = gen_reg_rtx (SImode);
4647 temp2 = gen_reg_rtx (SImode);
4648 temp3 = gen_reg_rtx (Pmode);
4649 got = sparc_tls_got ();
4650 emit_insn (gen_tie_hi22 (temp1, addr));
4651 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4652 if (TARGET_ARCH32)
4653 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4654 else
4655 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4656 if (TARGET_SUN_TLS)
4657 {
4658 ret = gen_reg_rtx (Pmode);
4659 if (TARGET_ARCH32)
4660 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4661 temp3, addr));
4662 else
4663 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4664 temp3, addr));
4665 }
4666 else
4667 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4668 break;
4669
4670 case TLS_MODEL_LOCAL_EXEC:
4671 temp1 = gen_reg_rtx (Pmode);
4672 temp2 = gen_reg_rtx (Pmode);
4673 if (TARGET_ARCH32)
4674 {
4675 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4676 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4677 }
4678 else
4679 {
4680 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4681 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4682 }
4683 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4684 break;
4685
4686 default:
4687 gcc_unreachable ();
4688 }
4689
4690 else if (GET_CODE (addr) == CONST)
4691 {
4692 rtx base, offset;
4693
4694 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4695
4696 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4697 offset = XEXP (XEXP (addr, 0), 1);
4698
4699 base = force_operand (base, NULL_RTX);
4700 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4701 offset = force_reg (Pmode, offset);
4702 ret = gen_rtx_PLUS (Pmode, base, offset);
4703 }
4704
4705 else
4706 gcc_unreachable (); /* for now ... */
4707
4708 return ret;
4709 }
4710
4711 /* Legitimize PIC addresses. If the address is already position-independent,
4712 we return ORIG. Newly generated position-independent addresses go into a
4713 reg. This is REG if nonzero, otherwise we allocate register(s) as
4714 necessary. */
4715
4716 static rtx
4717 sparc_legitimize_pic_address (rtx orig, rtx reg)
4718 {
4719 bool gotdata_op = false;
4720
4721 if (GET_CODE (orig) == SYMBOL_REF
4722 /* See the comment in sparc_expand_move. */
4723 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4724 {
4725 rtx pic_ref, address;
4726 rtx_insn *insn;
4727
4728 if (reg == 0)
4729 {
4730 gcc_assert (can_create_pseudo_p ());
4731 reg = gen_reg_rtx (Pmode);
4732 }
4733
4734 if (flag_pic == 2)
4735 {
4736 /* If not during reload, allocate another temp reg here for loading
4737 in the address, so that these instructions can be optimized
4738 properly. */
4739 rtx temp_reg = (! can_create_pseudo_p ()
4740 ? reg : gen_reg_rtx (Pmode));
4741
4742 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4743 won't get confused into thinking that these two instructions
4744 are loading in the true address of the symbol. If in the
4745 future a PIC rtx exists, that should be used instead. */
4746 if (TARGET_ARCH64)
4747 {
4748 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4749 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4750 }
4751 else
4752 {
4753 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4754 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4755 }
4756 address = temp_reg;
4757 gotdata_op = true;
4758 }
4759 else
4760 address = orig;
4761
4762 crtl->uses_pic_offset_table = 1;
4763 if (gotdata_op)
4764 {
4765 if (TARGET_ARCH64)
4766 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4767 pic_offset_table_rtx,
4768 address, orig));
4769 else
4770 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4771 pic_offset_table_rtx,
4772 address, orig));
4773 }
4774 else
4775 {
4776 pic_ref
4777 = gen_const_mem (Pmode,
4778 gen_rtx_PLUS (Pmode,
4779 pic_offset_table_rtx, address));
4780 insn = emit_move_insn (reg, pic_ref);
4781 }
4782
4783 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4784 by loop. */
4785 set_unique_reg_note (insn, REG_EQUAL, orig);
4786 return reg;
4787 }
4788 else if (GET_CODE (orig) == CONST)
4789 {
4790 rtx base, offset;
4791
4792 if (GET_CODE (XEXP (orig, 0)) == PLUS
4793 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4794 return orig;
4795
4796 if (reg == 0)
4797 {
4798 gcc_assert (can_create_pseudo_p ());
4799 reg = gen_reg_rtx (Pmode);
4800 }
4801
4802 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4803 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4804 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4805 base == reg ? NULL_RTX : reg);
4806
4807 if (GET_CODE (offset) == CONST_INT)
4808 {
4809 if (SMALL_INT (offset))
4810 return plus_constant (Pmode, base, INTVAL (offset));
4811 else if (can_create_pseudo_p ())
4812 offset = force_reg (Pmode, offset);
4813 else
4814 /* If we reach here, then something is seriously wrong. */
4815 gcc_unreachable ();
4816 }
4817 return gen_rtx_PLUS (Pmode, base, offset);
4818 }
4819 else if (GET_CODE (orig) == LABEL_REF)
4820 /* ??? We ought to be checking that the register is live instead, in case
4821 it is eliminated. */
4822 crtl->uses_pic_offset_table = 1;
4823
4824 return orig;
4825 }
4826
4827 /* Try machine-dependent ways of modifying an illegitimate address X
4828 to be legitimate. If we find one, return the new, valid address.
4829
4830 OLDX is the address as it was before break_out_memory_refs was called.
4831 In some cases it is useful to look at this to decide what needs to be done.
4832
4833 MODE is the mode of the operand pointed to by X.
4834
4835 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4836
4837 static rtx
4838 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4839 machine_mode mode)
4840 {
4841 rtx orig_x = x;
4842
4843 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4844 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4845 force_operand (XEXP (x, 0), NULL_RTX));
4846 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4847 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4848 force_operand (XEXP (x, 1), NULL_RTX));
4849 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4850 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4851 XEXP (x, 1));
4852 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4853 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4854 force_operand (XEXP (x, 1), NULL_RTX));
4855
4856 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4857 return x;
4858
4859 if (sparc_tls_referenced_p (x))
4860 x = sparc_legitimize_tls_address (x);
4861 else if (flag_pic)
4862 x = sparc_legitimize_pic_address (x, NULL_RTX);
4863 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4864 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4865 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4866 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4867 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4868 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4869 else if (GET_CODE (x) == SYMBOL_REF
4870 || GET_CODE (x) == CONST
4871 || GET_CODE (x) == LABEL_REF)
4872 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4873
4874 return x;
4875 }
4876
4877 /* Delegitimize an address that was legitimized by the above function. */
4878
4879 static rtx
4880 sparc_delegitimize_address (rtx x)
4881 {
4882 x = delegitimize_mem_from_attrs (x);
4883
4884 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4885 switch (XINT (XEXP (x, 1), 1))
4886 {
4887 case UNSPEC_MOVE_PIC:
4888 case UNSPEC_TLSLE:
4889 x = XVECEXP (XEXP (x, 1), 0, 0);
4890 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4891 break;
4892 default:
4893 break;
4894 }
4895
4896 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4897 if (GET_CODE (x) == MINUS
4898 && REG_P (XEXP (x, 0))
4899 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4900 && GET_CODE (XEXP (x, 1)) == LO_SUM
4901 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4902 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4903 {
4904 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4905 gcc_assert (GET_CODE (x) == LABEL_REF);
4906 }
4907
4908 return x;
4909 }
4910
4911 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4912 replace the input X, or the original X if no replacement is called for.
4913 The output parameter *WIN is 1 if the calling macro should goto WIN,
4914 0 if it should not.
4915
4916 For SPARC, we wish to handle addresses by splitting them into
4917 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4918 This cuts the number of extra insns by one.
4919
4920 Do nothing when generating PIC code and the address is a symbolic
4921 operand or requires a scratch register. */
4922
4923 rtx
4924 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4925 int opnum, int type,
4926 int ind_levels ATTRIBUTE_UNUSED, int *win)
4927 {
4928 /* Decompose SImode constants into HIGH+LO_SUM. */
4929 if (CONSTANT_P (x)
4930 && (mode != TFmode || TARGET_ARCH64)
4931 && GET_MODE (x) == SImode
4932 && GET_CODE (x) != LO_SUM
4933 && GET_CODE (x) != HIGH
4934 && sparc_cmodel <= CM_MEDLOW
4935 && !(flag_pic
4936 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4937 {
4938 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4939 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4940 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4941 opnum, (enum reload_type)type);
4942 *win = 1;
4943 return x;
4944 }
4945
4946 /* We have to recognize what we have already generated above. */
4947 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4948 {
4949 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4950 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4951 opnum, (enum reload_type)type);
4952 *win = 1;
4953 return x;
4954 }
4955
4956 *win = 0;
4957 return x;
4958 }
4959
4960 /* Return true if ADDR (a legitimate address expression)
4961 has an effect that depends on the machine mode it is used for.
4962
4963 In PIC mode,
4964
4965 (mem:HI [%l7+a])
4966
4967 is not equivalent to
4968
4969 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4970
4971 because [%l7+a+1] is interpreted as the address of (a+1). */
4972
4973
4974 static bool
4975 sparc_mode_dependent_address_p (const_rtx addr,
4976 addr_space_t as ATTRIBUTE_UNUSED)
4977 {
4978 if (flag_pic && GET_CODE (addr) == PLUS)
4979 {
4980 rtx op0 = XEXP (addr, 0);
4981 rtx op1 = XEXP (addr, 1);
4982 if (op0 == pic_offset_table_rtx
4983 && symbolic_operand (op1, VOIDmode))
4984 return true;
4985 }
4986
4987 return false;
4988 }
4989
4990 #ifdef HAVE_GAS_HIDDEN
4991 # define USE_HIDDEN_LINKONCE 1
4992 #else
4993 # define USE_HIDDEN_LINKONCE 0
4994 #endif
4995
4996 static void
4997 get_pc_thunk_name (char name[32], unsigned int regno)
4998 {
4999 const char *reg_name = reg_names[regno];
5000
5001 /* Skip the leading '%' as that cannot be used in a
5002 symbol name. */
5003 reg_name += 1;
5004
5005 if (USE_HIDDEN_LINKONCE)
5006 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
5007 else
5008 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
5009 }
5010
5011 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
5012
5013 static rtx
5014 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
5015 {
5016 int orig_flag_pic = flag_pic;
5017 rtx insn;
5018
5019 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
5020 flag_pic = 0;
5021 if (TARGET_ARCH64)
5022 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
5023 else
5024 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
5025 flag_pic = orig_flag_pic;
5026
5027 return insn;
5028 }
5029
5030 /* Emit code to load the GOT register. */
5031
5032 void
5033 load_got_register (void)
5034 {
5035 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
5036 if (!global_offset_table_rtx)
5037 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
5038
5039 if (TARGET_VXWORKS_RTP)
5040 emit_insn (gen_vxworks_load_got ());
5041 else
5042 {
5043 /* The GOT symbol is subject to a PC-relative relocation so we need a
5044 helper function to add the PC value and thus get the final value. */
5045 if (!got_helper_rtx)
5046 {
5047 char name[32];
5048 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
5049 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5050 }
5051
5052 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
5053 got_helper_rtx,
5054 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
5055 }
5056
5057 /* Need to emit this whether or not we obey regdecls,
5058 since setjmp/longjmp can cause life info to screw up.
5059 ??? In the case where we don't obey regdecls, this is not sufficient
5060 since we may not fall out the bottom. */
5061 emit_use (global_offset_table_rtx);
5062 }
5063
5064 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5065 address of the call target. */
5066
5067 void
5068 sparc_emit_call_insn (rtx pat, rtx addr)
5069 {
5070 rtx_insn *insn;
5071
5072 insn = emit_call_insn (pat);
5073
5074 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5075 if (TARGET_VXWORKS_RTP
5076 && flag_pic
5077 && GET_CODE (addr) == SYMBOL_REF
5078 && (SYMBOL_REF_DECL (addr)
5079 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5080 : !SYMBOL_REF_LOCAL_P (addr)))
5081 {
5082 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5083 crtl->uses_pic_offset_table = 1;
5084 }
5085 }
5086 \f
5087 /* Return 1 if RTX is a MEM which is known to be aligned to at
5088 least a DESIRED byte boundary. */
5089
5090 int
5091 mem_min_alignment (rtx mem, int desired)
5092 {
5093 rtx addr, base, offset;
5094
5095 /* If it's not a MEM we can't accept it. */
5096 if (GET_CODE (mem) != MEM)
5097 return 0;
5098
5099 /* Obviously... */
5100 if (!TARGET_UNALIGNED_DOUBLES
5101 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5102 return 1;
5103
5104 /* ??? The rest of the function predates MEM_ALIGN so
5105 there is probably a bit of redundancy. */
5106 addr = XEXP (mem, 0);
5107 base = offset = NULL_RTX;
5108 if (GET_CODE (addr) == PLUS)
5109 {
5110 if (GET_CODE (XEXP (addr, 0)) == REG)
5111 {
5112 base = XEXP (addr, 0);
5113
5114 /* What we are saying here is that if the base
5115 REG is aligned properly, the compiler will make
5116 sure any REG based index upon it will be so
5117 as well. */
5118 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5119 offset = XEXP (addr, 1);
5120 else
5121 offset = const0_rtx;
5122 }
5123 }
5124 else if (GET_CODE (addr) == REG)
5125 {
5126 base = addr;
5127 offset = const0_rtx;
5128 }
5129
5130 if (base != NULL_RTX)
5131 {
5132 int regno = REGNO (base);
5133
5134 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5135 {
5136 /* Check if the compiler has recorded some information
5137 about the alignment of the base REG. If reload has
5138 completed, we already matched with proper alignments.
5139 If not running global_alloc, reload might give us
5140 unaligned pointer to local stack though. */
5141 if (((cfun != 0
5142 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5143 || (optimize && reload_completed))
5144 && (INTVAL (offset) & (desired - 1)) == 0)
5145 return 1;
5146 }
5147 else
5148 {
5149 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5150 return 1;
5151 }
5152 }
5153 else if (! TARGET_UNALIGNED_DOUBLES
5154 || CONSTANT_P (addr)
5155 || GET_CODE (addr) == LO_SUM)
5156 {
5157 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5158 is true, in which case we can only assume that an access is aligned if
5159 it is to a constant address, or the address involves a LO_SUM. */
5160 return 1;
5161 }
5162
5163 /* An obviously unaligned address. */
5164 return 0;
5165 }
5166
5167 \f
5168 /* Vectors to keep interesting information about registers where it can easily
5169 be got. We used to use the actual mode value as the bit number, but there
5170 are more than 32 modes now. Instead we use two tables: one indexed by
5171 hard register number, and one indexed by mode. */
5172
5173 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5174 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5175 mapped into one sparc_mode_class mode. */
5176
5177 enum sparc_mode_class {
5178 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5179 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5180 CC_MODE, CCFP_MODE
5181 };
5182
5183 /* Modes for single-word and smaller quantities. */
5184 #define S_MODES \
5185 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5186
5187 /* Modes for double-word and smaller quantities. */
5188 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5189
5190 /* Modes for quad-word and smaller quantities. */
5191 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5192
5193 /* Modes for 8-word and smaller quantities. */
5194 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5195
5196 /* Modes for single-float quantities. */
5197 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5198
5199 /* Modes for double-float and smaller quantities. */
5200 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5201
5202 /* Modes for quad-float and smaller quantities. */
5203 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5204
5205 /* Modes for quad-float pairs and smaller quantities. */
5206 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5207
5208 /* Modes for double-float only quantities. */
5209 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5210
5211 /* Modes for quad-float and double-float only quantities. */
5212 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5213
5214 /* Modes for quad-float pairs and double-float only quantities. */
5215 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5216
5217 /* Modes for condition codes. */
5218 #define CC_MODES (1 << (int) CC_MODE)
5219 #define CCFP_MODES (1 << (int) CCFP_MODE)
5220
5221 /* Value is 1 if register/mode pair is acceptable on sparc.
5222
5223 The funny mixture of D and T modes is because integer operations
5224 do not specially operate on tetra quantities, so non-quad-aligned
5225 registers can hold quadword quantities (except %o4 and %i4 because
5226 they cross fixed registers).
5227
5228 ??? Note that, despite the settings, non-double-aligned parameter
5229 registers can hold double-word quantities in 32-bit mode. */
5230
5231 /* This points to either the 32-bit or the 64-bit version. */
5232 static const int *hard_regno_mode_classes;
5233
5234 static const int hard_32bit_mode_classes[] = {
5235 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5236 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5237 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5238 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5239
5240 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5241 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5242 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5243 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5244
5245 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5246 and none can hold SFmode/SImode values. */
5247 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5248 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5249 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5250 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5251
5252 /* %fcc[0123] */
5253 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5254
5255 /* %icc, %sfp, %gsr */
5256 CC_MODES, 0, D_MODES
5257 };
5258
5259 static const int hard_64bit_mode_classes[] = {
5260 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5261 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5262 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5263 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5264
5265 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5266 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5267 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5268 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5269
5270 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5271 and none can hold SFmode/SImode values. */
5272 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5273 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5274 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5275 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5276
5277 /* %fcc[0123] */
5278 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5279
5280 /* %icc, %sfp, %gsr */
5281 CC_MODES, 0, D_MODES
5282 };
5283
5284 static int sparc_mode_class [NUM_MACHINE_MODES];
5285
5286 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5287
5288 static void
5289 sparc_init_modes (void)
5290 {
5291 int i;
5292
5293 for (i = 0; i < NUM_MACHINE_MODES; i++)
5294 {
5295 machine_mode m = (machine_mode) i;
5296 unsigned int size = GET_MODE_SIZE (m);
5297
5298 switch (GET_MODE_CLASS (m))
5299 {
5300 case MODE_INT:
5301 case MODE_PARTIAL_INT:
5302 case MODE_COMPLEX_INT:
5303 if (size < 4)
5304 sparc_mode_class[i] = 1 << (int) H_MODE;
5305 else if (size == 4)
5306 sparc_mode_class[i] = 1 << (int) S_MODE;
5307 else if (size == 8)
5308 sparc_mode_class[i] = 1 << (int) D_MODE;
5309 else if (size == 16)
5310 sparc_mode_class[i] = 1 << (int) T_MODE;
5311 else if (size == 32)
5312 sparc_mode_class[i] = 1 << (int) O_MODE;
5313 else
5314 sparc_mode_class[i] = 0;
5315 break;
5316 case MODE_VECTOR_INT:
5317 if (size == 4)
5318 sparc_mode_class[i] = 1 << (int) SF_MODE;
5319 else if (size == 8)
5320 sparc_mode_class[i] = 1 << (int) DF_MODE;
5321 else
5322 sparc_mode_class[i] = 0;
5323 break;
5324 case MODE_FLOAT:
5325 case MODE_COMPLEX_FLOAT:
5326 if (size == 4)
5327 sparc_mode_class[i] = 1 << (int) SF_MODE;
5328 else if (size == 8)
5329 sparc_mode_class[i] = 1 << (int) DF_MODE;
5330 else if (size == 16)
5331 sparc_mode_class[i] = 1 << (int) TF_MODE;
5332 else if (size == 32)
5333 sparc_mode_class[i] = 1 << (int) OF_MODE;
5334 else
5335 sparc_mode_class[i] = 0;
5336 break;
5337 case MODE_CC:
5338 if (m == CCFPmode || m == CCFPEmode)
5339 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5340 else
5341 sparc_mode_class[i] = 1 << (int) CC_MODE;
5342 break;
5343 default:
5344 sparc_mode_class[i] = 0;
5345 break;
5346 }
5347 }
5348
5349 if (TARGET_ARCH64)
5350 hard_regno_mode_classes = hard_64bit_mode_classes;
5351 else
5352 hard_regno_mode_classes = hard_32bit_mode_classes;
5353
5354 /* Initialize the array used by REGNO_REG_CLASS. */
5355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5356 {
5357 if (i < 16 && TARGET_V8PLUS)
5358 sparc_regno_reg_class[i] = I64_REGS;
5359 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5360 sparc_regno_reg_class[i] = GENERAL_REGS;
5361 else if (i < 64)
5362 sparc_regno_reg_class[i] = FP_REGS;
5363 else if (i < 96)
5364 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5365 else if (i < 100)
5366 sparc_regno_reg_class[i] = FPCC_REGS;
5367 else
5368 sparc_regno_reg_class[i] = NO_REGS;
5369 }
5370 }
5371 \f
5372 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5373
5374 static inline bool
5375 save_global_or_fp_reg_p (unsigned int regno,
5376 int leaf_function ATTRIBUTE_UNUSED)
5377 {
5378 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5379 }
5380
5381 /* Return whether the return address register (%i7) is needed. */
5382
5383 static inline bool
5384 return_addr_reg_needed_p (int leaf_function)
5385 {
5386 /* If it is live, for example because of __builtin_return_address (0). */
5387 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5388 return true;
5389
5390 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5391 if (!leaf_function
5392 /* Loading the GOT register clobbers %o7. */
5393 || crtl->uses_pic_offset_table
5394 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5395 return true;
5396
5397 return false;
5398 }
5399
5400 /* Return whether REGNO, a local or in register, must be saved/restored. */
5401
5402 static bool
5403 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5404 {
5405 /* General case: call-saved registers live at some point. */
5406 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5407 return true;
5408
5409 /* Frame pointer register (%fp) if needed. */
5410 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5411 return true;
5412
5413 /* Return address register (%i7) if needed. */
5414 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5415 return true;
5416
5417 /* GOT register (%l7) if needed. */
5418 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5419 return true;
5420
5421 /* If the function accesses prior frames, the frame pointer and the return
5422 address of the previous frame must be saved on the stack. */
5423 if (crtl->accesses_prior_frames
5424 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5425 return true;
5426
5427 return false;
5428 }
5429
5430 /* Compute the frame size required by the function. This function is called
5431 during the reload pass and also by sparc_expand_prologue. */
5432
5433 HOST_WIDE_INT
5434 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5435 {
5436 HOST_WIDE_INT frame_size, apparent_frame_size;
5437 int args_size, n_global_fp_regs = 0;
5438 bool save_local_in_regs_p = false;
5439 unsigned int i;
5440
5441 /* If the function allocates dynamic stack space, the dynamic offset is
5442 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5443 if (leaf_function && !cfun->calls_alloca)
5444 args_size = 0;
5445 else
5446 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5447
5448 /* Calculate space needed for global registers. */
5449 if (TARGET_ARCH64)
5450 {
5451 for (i = 0; i < 8; i++)
5452 if (save_global_or_fp_reg_p (i, 0))
5453 n_global_fp_regs += 2;
5454 }
5455 else
5456 {
5457 for (i = 0; i < 8; i += 2)
5458 if (save_global_or_fp_reg_p (i, 0)
5459 || save_global_or_fp_reg_p (i + 1, 0))
5460 n_global_fp_regs += 2;
5461 }
5462
5463 /* In the flat window model, find out which local and in registers need to
5464 be saved. We don't reserve space in the current frame for them as they
5465 will be spilled into the register window save area of the caller's frame.
5466 However, as soon as we use this register window save area, we must create
5467 that of the current frame to make it the live one. */
5468 if (TARGET_FLAT)
5469 for (i = 16; i < 32; i++)
5470 if (save_local_or_in_reg_p (i, leaf_function))
5471 {
5472 save_local_in_regs_p = true;
5473 break;
5474 }
5475
5476 /* Calculate space needed for FP registers. */
5477 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5478 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5479 n_global_fp_regs += 2;
5480
5481 if (size == 0
5482 && n_global_fp_regs == 0
5483 && args_size == 0
5484 && !save_local_in_regs_p)
5485 frame_size = apparent_frame_size = 0;
5486 else
5487 {
5488 /* Start from the apparent frame size. */
5489 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5490
5491 /* We need to add the size of the outgoing argument area. */
5492 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5493
5494 /* And that of the register window save area. */
5495 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5496
5497 /* Finally, bump to the appropriate alignment. */
5498 frame_size = SPARC_STACK_ALIGN (frame_size);
5499 }
5500
5501 /* Set up values for use in prologue and epilogue. */
5502 sparc_frame_size = frame_size;
5503 sparc_apparent_frame_size = apparent_frame_size;
5504 sparc_n_global_fp_regs = n_global_fp_regs;
5505 sparc_save_local_in_regs_p = save_local_in_regs_p;
5506
5507 return frame_size;
5508 }
5509
5510 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5511
5512 int
5513 sparc_initial_elimination_offset (int to)
5514 {
5515 int offset;
5516
5517 if (to == STACK_POINTER_REGNUM)
5518 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5519 else
5520 offset = 0;
5521
5522 offset += SPARC_STACK_BIAS;
5523 return offset;
5524 }
5525
5526 /* Output any necessary .register pseudo-ops. */
5527
5528 void
5529 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5530 {
5531 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5532 int i;
5533
5534 if (TARGET_ARCH32)
5535 return;
5536
5537 /* Check if %g[2367] were used without
5538 .register being printed for them already. */
5539 for (i = 2; i < 8; i++)
5540 {
5541 if (df_regs_ever_live_p (i)
5542 && ! sparc_hard_reg_printed [i])
5543 {
5544 sparc_hard_reg_printed [i] = 1;
5545 /* %g7 is used as TLS base register, use #ignore
5546 for it instead of #scratch. */
5547 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5548 i == 7 ? "ignore" : "scratch");
5549 }
5550 if (i == 3) i = 5;
5551 }
5552 #endif
5553 }
5554
5555 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5556
5557 #if PROBE_INTERVAL > 4096
5558 #error Cannot use indexed addressing mode for stack probing
5559 #endif
5560
5561 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5562 inclusive. These are offsets from the current stack pointer.
5563
5564 Note that we don't use the REG+REG addressing mode for the probes because
5565 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5566 so the advantages of having a single code win here. */
5567
5568 static void
5569 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5570 {
5571 rtx g1 = gen_rtx_REG (Pmode, 1);
5572
5573 /* See if we have a constant small number of probes to generate. If so,
5574 that's the easy case. */
5575 if (size <= PROBE_INTERVAL)
5576 {
5577 emit_move_insn (g1, GEN_INT (first));
5578 emit_insn (gen_rtx_SET (g1,
5579 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5580 emit_stack_probe (plus_constant (Pmode, g1, -size));
5581 }
5582
5583 /* The run-time loop is made up of 9 insns in the generic case while the
5584 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5585 else if (size <= 4 * PROBE_INTERVAL)
5586 {
5587 HOST_WIDE_INT i;
5588
5589 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5590 emit_insn (gen_rtx_SET (g1,
5591 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5592 emit_stack_probe (g1);
5593
5594 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5595 it exceeds SIZE. If only two probes are needed, this will not
5596 generate any code. Then probe at FIRST + SIZE. */
5597 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5598 {
5599 emit_insn (gen_rtx_SET (g1,
5600 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5601 emit_stack_probe (g1);
5602 }
5603
5604 emit_stack_probe (plus_constant (Pmode, g1,
5605 (i - PROBE_INTERVAL) - size));
5606 }
5607
5608 /* Otherwise, do the same as above, but in a loop. Note that we must be
5609 extra careful with variables wrapping around because we might be at
5610 the very top (or the very bottom) of the address space and we have
5611 to be able to handle this case properly; in particular, we use an
5612 equality test for the loop condition. */
5613 else
5614 {
5615 HOST_WIDE_INT rounded_size;
5616 rtx g4 = gen_rtx_REG (Pmode, 4);
5617
5618 emit_move_insn (g1, GEN_INT (first));
5619
5620
5621 /* Step 1: round SIZE to the previous multiple of the interval. */
5622
5623 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5624 emit_move_insn (g4, GEN_INT (rounded_size));
5625
5626
5627 /* Step 2: compute initial and final value of the loop counter. */
5628
5629 /* TEST_ADDR = SP + FIRST. */
5630 emit_insn (gen_rtx_SET (g1,
5631 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5632
5633 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5634 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5635
5636
5637 /* Step 3: the loop
5638
5639 while (TEST_ADDR != LAST_ADDR)
5640 {
5641 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5642 probe at TEST_ADDR
5643 }
5644
5645 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5646 until it is equal to ROUNDED_SIZE. */
5647
5648 if (TARGET_ARCH64)
5649 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5650 else
5651 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5652
5653
5654 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5655 that SIZE is equal to ROUNDED_SIZE. */
5656
5657 if (size != rounded_size)
5658 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5659 }
5660
5661 /* Make sure nothing is scheduled before we are done. */
5662 emit_insn (gen_blockage ());
5663 }
5664
5665 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5666 absolute addresses. */
5667
5668 const char *
5669 output_probe_stack_range (rtx reg1, rtx reg2)
5670 {
5671 static int labelno = 0;
5672 char loop_lab[32];
5673 rtx xops[2];
5674
5675 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5676
5677 /* Loop. */
5678 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5679
5680 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5681 xops[0] = reg1;
5682 xops[1] = GEN_INT (-PROBE_INTERVAL);
5683 output_asm_insn ("add\t%0, %1, %0", xops);
5684
5685 /* Test if TEST_ADDR == LAST_ADDR. */
5686 xops[1] = reg2;
5687 output_asm_insn ("cmp\t%0, %1", xops);
5688
5689 /* Probe at TEST_ADDR and branch. */
5690 if (TARGET_ARCH64)
5691 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5692 else
5693 fputs ("\tbne\t", asm_out_file);
5694 assemble_name_raw (asm_out_file, loop_lab);
5695 fputc ('\n', asm_out_file);
5696 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5697 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5698
5699 return "";
5700 }
5701
5702 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5703 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5704 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5705 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5706 the action to be performed if it returns false. Return the new offset. */
5707
5708 typedef bool (*sorr_pred_t) (unsigned int, int);
5709 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5710
5711 static int
5712 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5713 int offset, int leaf_function, sorr_pred_t save_p,
5714 sorr_act_t action_true, sorr_act_t action_false)
5715 {
5716 unsigned int i;
5717 rtx mem;
5718 rtx_insn *insn;
5719
5720 if (TARGET_ARCH64 && high <= 32)
5721 {
5722 int fp_offset = -1;
5723
5724 for (i = low; i < high; i++)
5725 {
5726 if (save_p (i, leaf_function))
5727 {
5728 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5729 base, offset));
5730 if (action_true == SORR_SAVE)
5731 {
5732 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5733 RTX_FRAME_RELATED_P (insn) = 1;
5734 }
5735 else /* action_true == SORR_RESTORE */
5736 {
5737 /* The frame pointer must be restored last since its old
5738 value may be used as base address for the frame. This
5739 is problematic in 64-bit mode only because of the lack
5740 of double-word load instruction. */
5741 if (i == HARD_FRAME_POINTER_REGNUM)
5742 fp_offset = offset;
5743 else
5744 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5745 }
5746 offset += 8;
5747 }
5748 else if (action_false == SORR_ADVANCE)
5749 offset += 8;
5750 }
5751
5752 if (fp_offset >= 0)
5753 {
5754 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5755 emit_move_insn (hard_frame_pointer_rtx, mem);
5756 }
5757 }
5758 else
5759 {
5760 for (i = low; i < high; i += 2)
5761 {
5762 bool reg0 = save_p (i, leaf_function);
5763 bool reg1 = save_p (i + 1, leaf_function);
5764 machine_mode mode;
5765 int regno;
5766
5767 if (reg0 && reg1)
5768 {
5769 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5770 regno = i;
5771 }
5772 else if (reg0)
5773 {
5774 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5775 regno = i;
5776 }
5777 else if (reg1)
5778 {
5779 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5780 regno = i + 1;
5781 offset += 4;
5782 }
5783 else
5784 {
5785 if (action_false == SORR_ADVANCE)
5786 offset += 8;
5787 continue;
5788 }
5789
5790 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5791 if (action_true == SORR_SAVE)
5792 {
5793 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5794 RTX_FRAME_RELATED_P (insn) = 1;
5795 if (mode == DImode)
5796 {
5797 rtx set1, set2;
5798 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5799 offset));
5800 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5801 RTX_FRAME_RELATED_P (set1) = 1;
5802 mem
5803 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5804 offset + 4));
5805 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5806 RTX_FRAME_RELATED_P (set2) = 1;
5807 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5808 gen_rtx_PARALLEL (VOIDmode,
5809 gen_rtvec (2, set1, set2)));
5810 }
5811 }
5812 else /* action_true == SORR_RESTORE */
5813 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5814
5815 /* Bump and round down to double word
5816 in case we already bumped by 4. */
5817 offset = ROUND_DOWN (offset + 8, 8);
5818 }
5819 }
5820
5821 return offset;
5822 }
5823
5824 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5825
5826 static rtx
5827 emit_adjust_base_to_offset (rtx base, int offset)
5828 {
5829 /* ??? This might be optimized a little as %g1 might already have a
5830 value close enough that a single add insn will do. */
5831 /* ??? Although, all of this is probably only a temporary fix because
5832 if %g1 can hold a function result, then sparc_expand_epilogue will
5833 lose (the result will be clobbered). */
5834 rtx new_base = gen_rtx_REG (Pmode, 1);
5835 emit_move_insn (new_base, GEN_INT (offset));
5836 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5837 return new_base;
5838 }
5839
5840 /* Emit code to save/restore call-saved global and FP registers. */
5841
5842 static void
5843 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5844 {
5845 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5846 {
5847 base = emit_adjust_base_to_offset (base, offset);
5848 offset = 0;
5849 }
5850
5851 offset
5852 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5853 save_global_or_fp_reg_p, action, SORR_NONE);
5854 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5855 save_global_or_fp_reg_p, action, SORR_NONE);
5856 }
5857
5858 /* Emit code to save/restore call-saved local and in registers. */
5859
5860 static void
5861 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5862 {
5863 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5864 {
5865 base = emit_adjust_base_to_offset (base, offset);
5866 offset = 0;
5867 }
5868
5869 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5870 save_local_or_in_reg_p, action, SORR_ADVANCE);
5871 }
5872
5873 /* Emit a window_save insn. */
5874
5875 static rtx_insn *
5876 emit_window_save (rtx increment)
5877 {
5878 rtx_insn *insn = emit_insn (gen_window_save (increment));
5879 RTX_FRAME_RELATED_P (insn) = 1;
5880
5881 /* The incoming return address (%o7) is saved in %i7. */
5882 add_reg_note (insn, REG_CFA_REGISTER,
5883 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5884 gen_rtx_REG (Pmode,
5885 INCOMING_RETURN_ADDR_REGNUM)));
5886
5887 /* The window save event. */
5888 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5889
5890 /* The CFA is %fp, the hard frame pointer. */
5891 add_reg_note (insn, REG_CFA_DEF_CFA,
5892 plus_constant (Pmode, hard_frame_pointer_rtx,
5893 INCOMING_FRAME_SP_OFFSET));
5894
5895 return insn;
5896 }
5897
5898 /* Generate an increment for the stack pointer. */
5899
5900 static rtx
5901 gen_stack_pointer_inc (rtx increment)
5902 {
5903 return gen_rtx_SET (stack_pointer_rtx,
5904 gen_rtx_PLUS (Pmode,
5905 stack_pointer_rtx,
5906 increment));
5907 }
5908
5909 /* Expand the function prologue. The prologue is responsible for reserving
5910 storage for the frame, saving the call-saved registers and loading the
5911 GOT register if needed. */
5912
5913 void
5914 sparc_expand_prologue (void)
5915 {
5916 HOST_WIDE_INT size;
5917 rtx_insn *insn;
5918
5919 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5920 on the final value of the flag means deferring the prologue/epilogue
5921 expansion until just before the second scheduling pass, which is too
5922 late to emit multiple epilogues or return insns.
5923
5924 Of course we are making the assumption that the value of the flag
5925 will not change between now and its final value. Of the three parts
5926 of the formula, only the last one can reasonably vary. Let's take a
5927 closer look, after assuming that the first two ones are set to true
5928 (otherwise the last value is effectively silenced).
5929
5930 If only_leaf_regs_used returns false, the global predicate will also
5931 be false so the actual frame size calculated below will be positive.
5932 As a consequence, the save_register_window insn will be emitted in
5933 the instruction stream; now this insn explicitly references %fp
5934 which is not a leaf register so only_leaf_regs_used will always
5935 return false subsequently.
5936
5937 If only_leaf_regs_used returns true, we hope that the subsequent
5938 optimization passes won't cause non-leaf registers to pop up. For
5939 example, the regrename pass has special provisions to not rename to
5940 non-leaf registers in a leaf function. */
5941 sparc_leaf_function_p
5942 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5943
5944 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5945
5946 if (flag_stack_usage_info)
5947 current_function_static_stack_size = size;
5948
5949 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5950 || flag_stack_clash_protection)
5951 {
5952 if (crtl->is_leaf && !cfun->calls_alloca)
5953 {
5954 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5955 sparc_emit_probe_stack_range (get_stack_check_protect (),
5956 size - get_stack_check_protect ());
5957 }
5958 else if (size > 0)
5959 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5960 }
5961
5962 if (size == 0)
5963 ; /* do nothing. */
5964 else if (sparc_leaf_function_p)
5965 {
5966 rtx size_int_rtx = GEN_INT (-size);
5967
5968 if (size <= 4096)
5969 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5970 else if (size <= 8192)
5971 {
5972 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5973 RTX_FRAME_RELATED_P (insn) = 1;
5974
5975 /* %sp is still the CFA register. */
5976 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5977 }
5978 else
5979 {
5980 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5981 emit_move_insn (size_rtx, size_int_rtx);
5982 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5983 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5984 gen_stack_pointer_inc (size_int_rtx));
5985 }
5986
5987 RTX_FRAME_RELATED_P (insn) = 1;
5988 }
5989 else
5990 {
5991 rtx size_int_rtx = GEN_INT (-size);
5992
5993 if (size <= 4096)
5994 emit_window_save (size_int_rtx);
5995 else if (size <= 8192)
5996 {
5997 emit_window_save (GEN_INT (-4096));
5998
5999 /* %sp is not the CFA register anymore. */
6000 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6001
6002 /* Make sure no %fp-based store is issued until after the frame is
6003 established. The offset between the frame pointer and the stack
6004 pointer is calculated relative to the value of the stack pointer
6005 at the end of the function prologue, and moving instructions that
6006 access the stack via the frame pointer between the instructions
6007 that decrement the stack pointer could result in accessing the
6008 register window save area, which is volatile. */
6009 emit_insn (gen_frame_blockage ());
6010 }
6011 else
6012 {
6013 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6014 emit_move_insn (size_rtx, size_int_rtx);
6015 emit_window_save (size_rtx);
6016 }
6017 }
6018
6019 if (sparc_leaf_function_p)
6020 {
6021 sparc_frame_base_reg = stack_pointer_rtx;
6022 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6023 }
6024 else
6025 {
6026 sparc_frame_base_reg = hard_frame_pointer_rtx;
6027 sparc_frame_base_offset = SPARC_STACK_BIAS;
6028 }
6029
6030 if (sparc_n_global_fp_regs > 0)
6031 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6032 sparc_frame_base_offset
6033 - sparc_apparent_frame_size,
6034 SORR_SAVE);
6035
6036 /* Load the GOT register if needed. */
6037 if (crtl->uses_pic_offset_table)
6038 load_got_register ();
6039
6040 /* Advertise that the data calculated just above are now valid. */
6041 sparc_prologue_data_valid_p = true;
6042 }
6043
6044 /* Expand the function prologue. The prologue is responsible for reserving
6045 storage for the frame, saving the call-saved registers and loading the
6046 GOT register if needed. */
6047
6048 void
6049 sparc_flat_expand_prologue (void)
6050 {
6051 HOST_WIDE_INT size;
6052 rtx_insn *insn;
6053
6054 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6055
6056 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6057
6058 if (flag_stack_usage_info)
6059 current_function_static_stack_size = size;
6060
6061 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6062 || flag_stack_clash_protection)
6063 {
6064 if (crtl->is_leaf && !cfun->calls_alloca)
6065 {
6066 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6067 sparc_emit_probe_stack_range (get_stack_check_protect (),
6068 size - get_stack_check_protect ());
6069 }
6070 else if (size > 0)
6071 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6072 }
6073
6074 if (sparc_save_local_in_regs_p)
6075 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6076 SORR_SAVE);
6077
6078 if (size == 0)
6079 ; /* do nothing. */
6080 else
6081 {
6082 rtx size_int_rtx, size_rtx;
6083
6084 size_rtx = size_int_rtx = GEN_INT (-size);
6085
6086 /* We establish the frame (i.e. decrement the stack pointer) first, even
6087 if we use a frame pointer, because we cannot clobber any call-saved
6088 registers, including the frame pointer, if we haven't created a new
6089 register save area, for the sake of compatibility with the ABI. */
6090 if (size <= 4096)
6091 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6092 else if (size <= 8192 && !frame_pointer_needed)
6093 {
6094 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6095 RTX_FRAME_RELATED_P (insn) = 1;
6096 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6097 }
6098 else
6099 {
6100 size_rtx = gen_rtx_REG (Pmode, 1);
6101 emit_move_insn (size_rtx, size_int_rtx);
6102 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6103 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6104 gen_stack_pointer_inc (size_int_rtx));
6105 }
6106 RTX_FRAME_RELATED_P (insn) = 1;
6107
6108 /* Ensure nothing is scheduled until after the frame is established. */
6109 emit_insn (gen_blockage ());
6110
6111 if (frame_pointer_needed)
6112 {
6113 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6114 gen_rtx_MINUS (Pmode,
6115 stack_pointer_rtx,
6116 size_rtx)));
6117 RTX_FRAME_RELATED_P (insn) = 1;
6118
6119 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6120 gen_rtx_SET (hard_frame_pointer_rtx,
6121 plus_constant (Pmode, stack_pointer_rtx,
6122 size)));
6123 }
6124
6125 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6126 {
6127 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6128 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6129
6130 insn = emit_move_insn (i7, o7);
6131 RTX_FRAME_RELATED_P (insn) = 1;
6132
6133 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6134
6135 /* Prevent this instruction from ever being considered dead,
6136 even if this function has no epilogue. */
6137 emit_use (i7);
6138 }
6139 }
6140
6141 if (frame_pointer_needed)
6142 {
6143 sparc_frame_base_reg = hard_frame_pointer_rtx;
6144 sparc_frame_base_offset = SPARC_STACK_BIAS;
6145 }
6146 else
6147 {
6148 sparc_frame_base_reg = stack_pointer_rtx;
6149 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6150 }
6151
6152 if (sparc_n_global_fp_regs > 0)
6153 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6154 sparc_frame_base_offset
6155 - sparc_apparent_frame_size,
6156 SORR_SAVE);
6157
6158 /* Load the GOT register if needed. */
6159 if (crtl->uses_pic_offset_table)
6160 load_got_register ();
6161
6162 /* Advertise that the data calculated just above are now valid. */
6163 sparc_prologue_data_valid_p = true;
6164 }
6165
6166 /* This function generates the assembly code for function entry, which boils
6167 down to emitting the necessary .register directives. */
6168
6169 static void
6170 sparc_asm_function_prologue (FILE *file)
6171 {
6172 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6173 if (!TARGET_FLAT)
6174 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6175
6176 sparc_output_scratch_registers (file);
6177 }
6178
6179 /* Expand the function epilogue, either normal or part of a sibcall.
6180 We emit all the instructions except the return or the call. */
6181
6182 void
6183 sparc_expand_epilogue (bool for_eh)
6184 {
6185 HOST_WIDE_INT size = sparc_frame_size;
6186
6187 if (cfun->calls_alloca)
6188 emit_insn (gen_frame_blockage ());
6189
6190 if (sparc_n_global_fp_regs > 0)
6191 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6192 sparc_frame_base_offset
6193 - sparc_apparent_frame_size,
6194 SORR_RESTORE);
6195
6196 if (size == 0 || for_eh)
6197 ; /* do nothing. */
6198 else if (sparc_leaf_function_p)
6199 {
6200 if (size <= 4096)
6201 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6202 else if (size <= 8192)
6203 {
6204 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6205 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6206 }
6207 else
6208 {
6209 rtx reg = gen_rtx_REG (Pmode, 1);
6210 emit_move_insn (reg, GEN_INT (size));
6211 emit_insn (gen_stack_pointer_inc (reg));
6212 }
6213 }
6214 }
6215
6216 /* Expand the function epilogue, either normal or part of a sibcall.
6217 We emit all the instructions except the return or the call. */
6218
6219 void
6220 sparc_flat_expand_epilogue (bool for_eh)
6221 {
6222 HOST_WIDE_INT size = sparc_frame_size;
6223
6224 if (sparc_n_global_fp_regs > 0)
6225 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6226 sparc_frame_base_offset
6227 - sparc_apparent_frame_size,
6228 SORR_RESTORE);
6229
6230 /* If we have a frame pointer, we'll need both to restore it before the
6231 frame is destroyed and use its current value in destroying the frame.
6232 Since we don't have an atomic way to do that in the flat window model,
6233 we save the current value into a temporary register (%g1). */
6234 if (frame_pointer_needed && !for_eh)
6235 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6236
6237 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6238 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6239 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6240
6241 if (sparc_save_local_in_regs_p)
6242 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6243 sparc_frame_base_offset,
6244 SORR_RESTORE);
6245
6246 if (size == 0 || for_eh)
6247 ; /* do nothing. */
6248 else if (frame_pointer_needed)
6249 {
6250 /* Make sure the frame is destroyed after everything else is done. */
6251 emit_insn (gen_blockage ());
6252
6253 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6254 }
6255 else
6256 {
6257 /* Likewise. */
6258 emit_insn (gen_blockage ());
6259
6260 if (size <= 4096)
6261 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6262 else if (size <= 8192)
6263 {
6264 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6265 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6266 }
6267 else
6268 {
6269 rtx reg = gen_rtx_REG (Pmode, 1);
6270 emit_move_insn (reg, GEN_INT (size));
6271 emit_insn (gen_stack_pointer_inc (reg));
6272 }
6273 }
6274 }
6275
6276 /* Return true if it is appropriate to emit `return' instructions in the
6277 body of a function. */
6278
6279 bool
6280 sparc_can_use_return_insn_p (void)
6281 {
6282 return sparc_prologue_data_valid_p
6283 && sparc_n_global_fp_regs == 0
6284 && TARGET_FLAT
6285 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6286 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6287 }
6288
6289 /* This function generates the assembly code for function exit. */
6290
6291 static void
6292 sparc_asm_function_epilogue (FILE *file)
6293 {
6294 /* If the last two instructions of a function are "call foo; dslot;"
6295 the return address might point to the first instruction in the next
6296 function and we have to output a dummy nop for the sake of sane
6297 backtraces in such cases. This is pointless for sibling calls since
6298 the return address is explicitly adjusted. */
6299
6300 rtx_insn *insn = get_last_insn ();
6301
6302 rtx last_real_insn = prev_real_insn (insn);
6303 if (last_real_insn
6304 && NONJUMP_INSN_P (last_real_insn)
6305 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6306 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6307
6308 if (last_real_insn
6309 && CALL_P (last_real_insn)
6310 && !SIBLING_CALL_P (last_real_insn))
6311 fputs("\tnop\n", file);
6312
6313 sparc_output_deferred_case_vectors ();
6314 }
6315
6316 /* Output a 'restore' instruction. */
6317
6318 static void
6319 output_restore (rtx pat)
6320 {
6321 rtx operands[3];
6322
6323 if (! pat)
6324 {
6325 fputs ("\t restore\n", asm_out_file);
6326 return;
6327 }
6328
6329 gcc_assert (GET_CODE (pat) == SET);
6330
6331 operands[0] = SET_DEST (pat);
6332 pat = SET_SRC (pat);
6333
6334 switch (GET_CODE (pat))
6335 {
6336 case PLUS:
6337 operands[1] = XEXP (pat, 0);
6338 operands[2] = XEXP (pat, 1);
6339 output_asm_insn (" restore %r1, %2, %Y0", operands);
6340 break;
6341 case LO_SUM:
6342 operands[1] = XEXP (pat, 0);
6343 operands[2] = XEXP (pat, 1);
6344 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6345 break;
6346 case ASHIFT:
6347 operands[1] = XEXP (pat, 0);
6348 gcc_assert (XEXP (pat, 1) == const1_rtx);
6349 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6350 break;
6351 default:
6352 operands[1] = pat;
6353 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6354 break;
6355 }
6356 }
6357
6358 /* Output a return. */
6359
6360 const char *
6361 output_return (rtx_insn *insn)
6362 {
6363 if (crtl->calls_eh_return)
6364 {
6365 /* If the function uses __builtin_eh_return, the eh_return
6366 machinery occupies the delay slot. */
6367 gcc_assert (!final_sequence);
6368
6369 if (flag_delayed_branch)
6370 {
6371 if (!TARGET_FLAT && TARGET_V9)
6372 fputs ("\treturn\t%i7+8\n", asm_out_file);
6373 else
6374 {
6375 if (!TARGET_FLAT)
6376 fputs ("\trestore\n", asm_out_file);
6377
6378 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6379 }
6380
6381 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6382 }
6383 else
6384 {
6385 if (!TARGET_FLAT)
6386 fputs ("\trestore\n", asm_out_file);
6387
6388 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6389 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6390 }
6391 }
6392 else if (sparc_leaf_function_p || TARGET_FLAT)
6393 {
6394 /* This is a leaf or flat function so we don't have to bother restoring
6395 the register window, which frees us from dealing with the convoluted
6396 semantics of restore/return. We simply output the jump to the
6397 return address and the insn in the delay slot (if any). */
6398
6399 return "jmp\t%%o7+%)%#";
6400 }
6401 else
6402 {
6403 /* This is a regular function so we have to restore the register window.
6404 We may have a pending insn for the delay slot, which will be either
6405 combined with the 'restore' instruction or put in the delay slot of
6406 the 'return' instruction. */
6407
6408 if (final_sequence)
6409 {
6410 rtx_insn *delay;
6411 rtx pat;
6412 int seen;
6413
6414 delay = NEXT_INSN (insn);
6415 gcc_assert (delay);
6416
6417 pat = PATTERN (delay);
6418
6419 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6420 {
6421 epilogue_renumber (&pat, 0);
6422 return "return\t%%i7+%)%#";
6423 }
6424 else
6425 {
6426 output_asm_insn ("jmp\t%%i7+%)", NULL);
6427
6428 /* We're going to output the insn in the delay slot manually.
6429 Make sure to output its source location first. */
6430 PATTERN (delay) = gen_blockage ();
6431 INSN_CODE (delay) = -1;
6432 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6433 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6434
6435 output_restore (pat);
6436 }
6437 }
6438 else
6439 {
6440 /* The delay slot is empty. */
6441 if (TARGET_V9)
6442 return "return\t%%i7+%)\n\t nop";
6443 else if (flag_delayed_branch)
6444 return "jmp\t%%i7+%)\n\t restore";
6445 else
6446 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6447 }
6448 }
6449
6450 return "";
6451 }
6452
6453 /* Output a sibling call. */
6454
6455 const char *
6456 output_sibcall (rtx_insn *insn, rtx call_operand)
6457 {
6458 rtx operands[1];
6459
6460 gcc_assert (flag_delayed_branch);
6461
6462 operands[0] = call_operand;
6463
6464 if (sparc_leaf_function_p || TARGET_FLAT)
6465 {
6466 /* This is a leaf or flat function so we don't have to bother restoring
6467 the register window. We simply output the jump to the function and
6468 the insn in the delay slot (if any). */
6469
6470 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6471
6472 if (final_sequence)
6473 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6474 operands);
6475 else
6476 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6477 it into branch if possible. */
6478 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6479 operands);
6480 }
6481 else
6482 {
6483 /* This is a regular function so we have to restore the register window.
6484 We may have a pending insn for the delay slot, which will be combined
6485 with the 'restore' instruction. */
6486
6487 output_asm_insn ("call\t%a0, 0", operands);
6488
6489 if (final_sequence)
6490 {
6491 rtx_insn *delay;
6492 rtx pat;
6493 int seen;
6494
6495 delay = NEXT_INSN (insn);
6496 gcc_assert (delay);
6497
6498 pat = PATTERN (delay);
6499
6500 /* We're going to output the insn in the delay slot manually.
6501 Make sure to output its source location first. */
6502 PATTERN (delay) = gen_blockage ();
6503 INSN_CODE (delay) = -1;
6504 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6505 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6506
6507 output_restore (pat);
6508 }
6509 else
6510 output_restore (NULL_RTX);
6511 }
6512
6513 return "";
6514 }
6515 \f
6516 /* Functions for handling argument passing.
6517
6518 For 32-bit, the first 6 args are normally in registers and the rest are
6519 pushed. Any arg that starts within the first 6 words is at least
6520 partially passed in a register unless its data type forbids.
6521
6522 For 64-bit, the argument registers are laid out as an array of 16 elements
6523 and arguments are added sequentially. The first 6 int args and up to the
6524 first 16 fp args (depending on size) are passed in regs.
6525
6526 Slot Stack Integral Float Float in structure Double Long Double
6527 ---- ----- -------- ----- ------------------ ------ -----------
6528 15 [SP+248] %f31 %f30,%f31 %d30
6529 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6530 13 [SP+232] %f27 %f26,%f27 %d26
6531 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6532 11 [SP+216] %f23 %f22,%f23 %d22
6533 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6534 9 [SP+200] %f19 %f18,%f19 %d18
6535 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6536 7 [SP+184] %f15 %f14,%f15 %d14
6537 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6538 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6539 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6540 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6541 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6542 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6543 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6544
6545 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6546
6547 Integral arguments are always passed as 64-bit quantities appropriately
6548 extended.
6549
6550 Passing of floating point values is handled as follows.
6551 If a prototype is in scope:
6552 If the value is in a named argument (i.e. not a stdarg function or a
6553 value not part of the `...') then the value is passed in the appropriate
6554 fp reg.
6555 If the value is part of the `...' and is passed in one of the first 6
6556 slots then the value is passed in the appropriate int reg.
6557 If the value is part of the `...' and is not passed in one of the first 6
6558 slots then the value is passed in memory.
6559 If a prototype is not in scope:
6560 If the value is one of the first 6 arguments the value is passed in the
6561 appropriate integer reg and the appropriate fp reg.
6562 If the value is not one of the first 6 arguments the value is passed in
6563 the appropriate fp reg and in memory.
6564
6565
6566 Summary of the calling conventions implemented by GCC on the SPARC:
6567
6568 32-bit ABI:
6569 size argument return value
6570
6571 small integer <4 int. reg. int. reg.
6572 word 4 int. reg. int. reg.
6573 double word 8 int. reg. int. reg.
6574
6575 _Complex small integer <8 int. reg. int. reg.
6576 _Complex word 8 int. reg. int. reg.
6577 _Complex double word 16 memory int. reg.
6578
6579 vector integer <=8 int. reg. FP reg.
6580 vector integer >8 memory memory
6581
6582 float 4 int. reg. FP reg.
6583 double 8 int. reg. FP reg.
6584 long double 16 memory memory
6585
6586 _Complex float 8 memory FP reg.
6587 _Complex double 16 memory FP reg.
6588 _Complex long double 32 memory FP reg.
6589
6590 vector float any memory memory
6591
6592 aggregate any memory memory
6593
6594
6595
6596 64-bit ABI:
6597 size argument return value
6598
6599 small integer <8 int. reg. int. reg.
6600 word 8 int. reg. int. reg.
6601 double word 16 int. reg. int. reg.
6602
6603 _Complex small integer <16 int. reg. int. reg.
6604 _Complex word 16 int. reg. int. reg.
6605 _Complex double word 32 memory int. reg.
6606
6607 vector integer <=16 FP reg. FP reg.
6608 vector integer 16<s<=32 memory FP reg.
6609 vector integer >32 memory memory
6610
6611 float 4 FP reg. FP reg.
6612 double 8 FP reg. FP reg.
6613 long double 16 FP reg. FP reg.
6614
6615 _Complex float 8 FP reg. FP reg.
6616 _Complex double 16 FP reg. FP reg.
6617 _Complex long double 32 memory FP reg.
6618
6619 vector float <=16 FP reg. FP reg.
6620 vector float 16<s<=32 memory FP reg.
6621 vector float >32 memory memory
6622
6623 aggregate <=16 reg. reg.
6624 aggregate 16<s<=32 memory reg.
6625 aggregate >32 memory memory
6626
6627
6628
6629 Note #1: complex floating-point types follow the extended SPARC ABIs as
6630 implemented by the Sun compiler.
6631
6632 Note #2: integral vector types follow the scalar floating-point types
6633 conventions to match what is implemented by the Sun VIS SDK.
6634
6635 Note #3: floating-point vector types follow the aggregate types
6636 conventions. */
6637
6638
6639 /* Maximum number of int regs for args. */
6640 #define SPARC_INT_ARG_MAX 6
6641 /* Maximum number of fp regs for args. */
6642 #define SPARC_FP_ARG_MAX 16
6643 /* Number of words (partially) occupied for a given size in units. */
6644 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6645
6646 /* Handle the INIT_CUMULATIVE_ARGS macro.
6647 Initialize a variable CUM of type CUMULATIVE_ARGS
6648 for a call to a function whose data type is FNTYPE.
6649 For a library call, FNTYPE is 0. */
6650
6651 void
6652 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6653 {
6654 cum->words = 0;
6655 cum->prototype_p = fntype && prototype_p (fntype);
6656 cum->libcall_p = !fntype;
6657 }
6658
6659 /* Handle promotion of pointer and integer arguments. */
6660
6661 static machine_mode
6662 sparc_promote_function_mode (const_tree type, machine_mode mode,
6663 int *punsignedp, const_tree, int)
6664 {
6665 if (type && POINTER_TYPE_P (type))
6666 {
6667 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6668 return Pmode;
6669 }
6670
6671 /* Integral arguments are passed as full words, as per the ABI. */
6672 if (GET_MODE_CLASS (mode) == MODE_INT
6673 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6674 return word_mode;
6675
6676 return mode;
6677 }
6678
6679 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6680
6681 static bool
6682 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6683 {
6684 return TARGET_ARCH64 ? true : false;
6685 }
6686
6687 /* Traverse the record TYPE recursively and call FUNC on its fields.
6688 NAMED is true if this is for a named parameter. DATA is passed
6689 to FUNC for each field. OFFSET is the starting position and
6690 PACKED is true if we are inside a packed record. */
6691
6692 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6693 static void
6694 traverse_record_type (const_tree type, bool named, T *data,
6695 HOST_WIDE_INT offset = 0, bool packed = false)
6696 {
6697 /* The ABI obviously doesn't specify how packed structures are passed.
6698 These are passed in integer regs if possible, otherwise memory. */
6699 if (!packed)
6700 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6701 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6702 {
6703 packed = true;
6704 break;
6705 }
6706
6707 /* Walk the real fields, but skip those with no size or a zero size.
6708 ??? Fields with variable offset are handled as having zero offset. */
6709 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6710 if (TREE_CODE (field) == FIELD_DECL)
6711 {
6712 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6713 continue;
6714
6715 HOST_WIDE_INT bitpos = offset;
6716 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6717 bitpos += int_bit_position (field);
6718
6719 tree field_type = TREE_TYPE (field);
6720 if (TREE_CODE (field_type) == RECORD_TYPE)
6721 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6722 packed);
6723 else
6724 {
6725 const bool fp_type
6726 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6727 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6728 data);
6729 }
6730 }
6731 }
6732
6733 /* Handle recursive register classifying for structure layout. */
6734
6735 typedef struct
6736 {
6737 bool fp_regs; /* true if field eligible to FP registers. */
6738 bool fp_regs_in_first_word; /* true if such field in first word. */
6739 } classify_data_t;
6740
6741 /* A subroutine of function_arg_slotno. Classify the field. */
6742
6743 inline void
6744 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6745 classify_data_t *data)
6746 {
6747 if (fp)
6748 {
6749 data->fp_regs = true;
6750 if (bitpos < BITS_PER_WORD)
6751 data->fp_regs_in_first_word = true;
6752 }
6753 }
6754
6755 /* Compute the slot number to pass an argument in.
6756 Return the slot number or -1 if passing on the stack.
6757
6758 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6759 the preceding args and about the function being called.
6760 MODE is the argument's machine mode.
6761 TYPE is the data type of the argument (as a tree).
6762 This is null for libcalls where that information may
6763 not be available.
6764 NAMED is nonzero if this argument is a named parameter
6765 (otherwise it is an extra parameter matching an ellipsis).
6766 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6767 *PREGNO records the register number to use if scalar type.
6768 *PPADDING records the amount of padding needed in words. */
6769
6770 static int
6771 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6772 const_tree type, bool named, bool incoming,
6773 int *pregno, int *ppadding)
6774 {
6775 int regbase = (incoming
6776 ? SPARC_INCOMING_INT_ARG_FIRST
6777 : SPARC_OUTGOING_INT_ARG_FIRST);
6778 int slotno = cum->words;
6779 enum mode_class mclass;
6780 int regno;
6781
6782 *ppadding = 0;
6783
6784 if (type && TREE_ADDRESSABLE (type))
6785 return -1;
6786
6787 if (TARGET_ARCH32
6788 && mode == BLKmode
6789 && type
6790 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6791 return -1;
6792
6793 /* For SPARC64, objects requiring 16-byte alignment get it. */
6794 if (TARGET_ARCH64
6795 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6796 && (slotno & 1) != 0)
6797 slotno++, *ppadding = 1;
6798
6799 mclass = GET_MODE_CLASS (mode);
6800 if (type && TREE_CODE (type) == VECTOR_TYPE)
6801 {
6802 /* Vector types deserve special treatment because they are
6803 polymorphic wrt their mode, depending upon whether VIS
6804 instructions are enabled. */
6805 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6806 {
6807 /* The SPARC port defines no floating-point vector modes. */
6808 gcc_assert (mode == BLKmode);
6809 }
6810 else
6811 {
6812 /* Integral vector types should either have a vector
6813 mode or an integral mode, because we are guaranteed
6814 by pass_by_reference that their size is not greater
6815 than 16 bytes and TImode is 16-byte wide. */
6816 gcc_assert (mode != BLKmode);
6817
6818 /* Vector integers are handled like floats according to
6819 the Sun VIS SDK. */
6820 mclass = MODE_FLOAT;
6821 }
6822 }
6823
6824 switch (mclass)
6825 {
6826 case MODE_FLOAT:
6827 case MODE_COMPLEX_FLOAT:
6828 case MODE_VECTOR_INT:
6829 if (TARGET_ARCH64 && TARGET_FPU && named)
6830 {
6831 /* If all arg slots are filled, then must pass on stack. */
6832 if (slotno >= SPARC_FP_ARG_MAX)
6833 return -1;
6834
6835 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6836 /* Arguments filling only one single FP register are
6837 right-justified in the outer double FP register. */
6838 if (GET_MODE_SIZE (mode) <= 4)
6839 regno++;
6840 break;
6841 }
6842 /* fallthrough */
6843
6844 case MODE_INT:
6845 case MODE_COMPLEX_INT:
6846 /* If all arg slots are filled, then must pass on stack. */
6847 if (slotno >= SPARC_INT_ARG_MAX)
6848 return -1;
6849
6850 regno = regbase + slotno;
6851 break;
6852
6853 case MODE_RANDOM:
6854 if (mode == VOIDmode)
6855 /* MODE is VOIDmode when generating the actual call. */
6856 return -1;
6857
6858 gcc_assert (mode == BLKmode);
6859
6860 if (TARGET_ARCH32
6861 || !type
6862 || (TREE_CODE (type) != RECORD_TYPE
6863 && TREE_CODE (type) != VECTOR_TYPE))
6864 {
6865 /* If all arg slots are filled, then must pass on stack. */
6866 if (slotno >= SPARC_INT_ARG_MAX)
6867 return -1;
6868
6869 regno = regbase + slotno;
6870 }
6871 else /* TARGET_ARCH64 && type */
6872 {
6873 /* If all arg slots are filled, then must pass on stack. */
6874 if (slotno >= SPARC_FP_ARG_MAX)
6875 return -1;
6876
6877 if (TREE_CODE (type) == RECORD_TYPE)
6878 {
6879 classify_data_t data = { false, false };
6880 traverse_record_type<classify_data_t, classify_registers>
6881 (type, named, &data);
6882
6883 if (data.fp_regs)
6884 {
6885 /* If all FP slots are filled except for the last one and
6886 there is no FP field in the first word, then must pass
6887 on stack. */
6888 if (slotno >= SPARC_FP_ARG_MAX - 1
6889 && !data.fp_regs_in_first_word)
6890 return -1;
6891 }
6892 else
6893 {
6894 /* If all int slots are filled, then must pass on stack. */
6895 if (slotno >= SPARC_INT_ARG_MAX)
6896 return -1;
6897 }
6898 }
6899
6900 /* PREGNO isn't set since both int and FP regs can be used. */
6901 return slotno;
6902 }
6903 break;
6904
6905 default :
6906 gcc_unreachable ();
6907 }
6908
6909 *pregno = regno;
6910 return slotno;
6911 }
6912
6913 /* Handle recursive register counting/assigning for structure layout. */
6914
6915 typedef struct
6916 {
6917 int slotno; /* slot number of the argument. */
6918 int regbase; /* regno of the base register. */
6919 int intoffset; /* offset of the first pending integer field. */
6920 int nregs; /* number of words passed in registers. */
6921 bool stack; /* true if part of the argument is on the stack. */
6922 rtx ret; /* return expression being built. */
6923 } assign_data_t;
6924
6925 /* A subroutine of function_arg_record_value. Compute the number of integer
6926 registers to be assigned between PARMS->intoffset and BITPOS. Return
6927 true if at least one integer register is assigned or false otherwise. */
6928
6929 static bool
6930 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6931 {
6932 if (data->intoffset < 0)
6933 return false;
6934
6935 const int intoffset = data->intoffset;
6936 data->intoffset = -1;
6937
6938 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6939 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6940 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6941 int nregs = (endbit - startbit) / BITS_PER_WORD;
6942
6943 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6944 {
6945 nregs = SPARC_INT_ARG_MAX - this_slotno;
6946
6947 /* We need to pass this field (partly) on the stack. */
6948 data->stack = 1;
6949 }
6950
6951 if (nregs <= 0)
6952 return false;
6953
6954 *pnregs = nregs;
6955 return true;
6956 }
6957
6958 /* A subroutine of function_arg_record_value. Compute the number and the mode
6959 of the FP registers to be assigned for FIELD. Return true if at least one
6960 FP register is assigned or false otherwise. */
6961
6962 static bool
6963 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6964 assign_data_t *data,
6965 int *pnregs, machine_mode *pmode)
6966 {
6967 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6968 machine_mode mode = DECL_MODE (field);
6969 int nregs, nslots;
6970
6971 /* Slots are counted as words while regs are counted as having the size of
6972 the (inner) mode. */
6973 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6974 {
6975 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6976 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6977 }
6978 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6979 {
6980 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6981 nregs = 2;
6982 }
6983 else
6984 nregs = 1;
6985
6986 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6987
6988 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6989 {
6990 nslots = SPARC_FP_ARG_MAX - this_slotno;
6991 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6992
6993 /* We need to pass this field (partly) on the stack. */
6994 data->stack = 1;
6995
6996 if (nregs <= 0)
6997 return false;
6998 }
6999
7000 *pnregs = nregs;
7001 *pmode = mode;
7002 return true;
7003 }
7004
7005 /* A subroutine of function_arg_record_value. Count the number of registers
7006 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7007
7008 inline void
7009 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7010 assign_data_t *data)
7011 {
7012 if (fp)
7013 {
7014 int nregs;
7015 machine_mode mode;
7016
7017 if (compute_int_layout (bitpos, data, &nregs))
7018 data->nregs += nregs;
7019
7020 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7021 data->nregs += nregs;
7022 }
7023 else
7024 {
7025 if (data->intoffset < 0)
7026 data->intoffset = bitpos;
7027 }
7028 }
7029
7030 /* A subroutine of function_arg_record_value. Assign the bits of the
7031 structure between PARMS->intoffset and BITPOS to integer registers. */
7032
7033 static void
7034 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
7035 {
7036 int intoffset = data->intoffset;
7037 machine_mode mode;
7038 int nregs;
7039
7040 if (!compute_int_layout (bitpos, data, &nregs))
7041 return;
7042
7043 /* If this is the trailing part of a word, only load that much into
7044 the register. Otherwise load the whole register. Note that in
7045 the latter case we may pick up unwanted bits. It's not a problem
7046 at the moment but may wish to revisit. */
7047 if (intoffset % BITS_PER_WORD != 0)
7048 mode = smallest_int_mode_for_size (BITS_PER_WORD
7049 - intoffset % BITS_PER_WORD);
7050 else
7051 mode = word_mode;
7052
7053 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7054 unsigned int regno = data->regbase + this_slotno;
7055 intoffset /= BITS_PER_UNIT;
7056
7057 do
7058 {
7059 rtx reg = gen_rtx_REG (mode, regno);
7060 XVECEXP (data->ret, 0, data->stack + data->nregs)
7061 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7062 data->nregs += 1;
7063 mode = word_mode;
7064 regno += 1;
7065 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7066 }
7067 while (--nregs > 0);
7068 }
7069
7070 /* A subroutine of function_arg_record_value. Assign FIELD at position
7071 BITPOS to FP registers. */
7072
7073 static void
7074 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
7075 assign_data_t *data)
7076 {
7077 int nregs;
7078 machine_mode mode;
7079
7080 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7081 return;
7082
7083 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7084 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7085 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7086 regno++;
7087 int pos = bitpos / BITS_PER_UNIT;
7088
7089 do
7090 {
7091 rtx reg = gen_rtx_REG (mode, regno);
7092 XVECEXP (data->ret, 0, data->stack + data->nregs)
7093 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7094 data->nregs += 1;
7095 regno += GET_MODE_SIZE (mode) / 4;
7096 pos += GET_MODE_SIZE (mode);
7097 }
7098 while (--nregs > 0);
7099 }
7100
7101 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7102 the structure between PARMS->intoffset and BITPOS to registers. */
7103
7104 inline void
7105 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7106 assign_data_t *data)
7107 {
7108 if (fp)
7109 {
7110 assign_int_registers (bitpos, data);
7111
7112 assign_fp_registers (field, bitpos, data);
7113 }
7114 else
7115 {
7116 if (data->intoffset < 0)
7117 data->intoffset = bitpos;
7118 }
7119 }
7120
7121 /* Used by function_arg and sparc_function_value_1 to implement the complex
7122 conventions of the 64-bit ABI for passing and returning structures.
7123 Return an expression valid as a return value for the FUNCTION_ARG
7124 and TARGET_FUNCTION_VALUE.
7125
7126 TYPE is the data type of the argument (as a tree).
7127 This is null for libcalls where that information may
7128 not be available.
7129 MODE is the argument's machine mode.
7130 SLOTNO is the index number of the argument's slot in the parameter array.
7131 NAMED is true if this argument is a named parameter
7132 (otherwise it is an extra parameter matching an ellipsis).
7133 REGBASE is the regno of the base register for the parameter array. */
7134
7135 static rtx
7136 function_arg_record_value (const_tree type, machine_mode mode,
7137 int slotno, bool named, int regbase)
7138 {
7139 HOST_WIDE_INT typesize = int_size_in_bytes (type);
7140 assign_data_t data;
7141 int nregs;
7142
7143 data.slotno = slotno;
7144 data.regbase = regbase;
7145
7146 /* Count how many registers we need. */
7147 data.nregs = 0;
7148 data.intoffset = 0;
7149 data.stack = false;
7150 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7151
7152 /* Take into account pending integer fields. */
7153 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
7154 data.nregs += nregs;
7155
7156 /* Allocate the vector and handle some annoying special cases. */
7157 nregs = data.nregs;
7158
7159 if (nregs == 0)
7160 {
7161 /* ??? Empty structure has no value? Duh? */
7162 if (typesize <= 0)
7163 {
7164 /* Though there's nothing really to store, return a word register
7165 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7166 leads to breakage due to the fact that there are zero bytes to
7167 load. */
7168 return gen_rtx_REG (mode, regbase);
7169 }
7170
7171 /* ??? C++ has structures with no fields, and yet a size. Give up
7172 for now and pass everything back in integer registers. */
7173 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7174 if (nregs + slotno > SPARC_INT_ARG_MAX)
7175 nregs = SPARC_INT_ARG_MAX - slotno;
7176 }
7177
7178 gcc_assert (nregs > 0);
7179
7180 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7181
7182 /* If at least one field must be passed on the stack, generate
7183 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7184 also be passed on the stack. We can't do much better because the
7185 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7186 of structures for which the fields passed exclusively in registers
7187 are not at the beginning of the structure. */
7188 if (data.stack)
7189 XVECEXP (data.ret, 0, 0)
7190 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7191
7192 /* Assign the registers. */
7193 data.nregs = 0;
7194 data.intoffset = 0;
7195 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7196
7197 /* Assign pending integer fields. */
7198 assign_int_registers (typesize * BITS_PER_UNIT, &data);
7199
7200 gcc_assert (data.nregs == nregs);
7201
7202 return data.ret;
7203 }
7204
7205 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7206 of the 64-bit ABI for passing and returning unions.
7207 Return an expression valid as a return value for the FUNCTION_ARG
7208 and TARGET_FUNCTION_VALUE.
7209
7210 SIZE is the size in bytes of the union.
7211 MODE is the argument's machine mode.
7212 REGNO is the hard register the union will be passed in. */
7213
7214 static rtx
7215 function_arg_union_value (int size, machine_mode mode, int slotno,
7216 int regno)
7217 {
7218 int nwords = CEIL_NWORDS (size), i;
7219 rtx regs;
7220
7221 /* See comment in previous function for empty structures. */
7222 if (nwords == 0)
7223 return gen_rtx_REG (mode, regno);
7224
7225 if (slotno == SPARC_INT_ARG_MAX - 1)
7226 nwords = 1;
7227
7228 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7229
7230 for (i = 0; i < nwords; i++)
7231 {
7232 /* Unions are passed left-justified. */
7233 XVECEXP (regs, 0, i)
7234 = gen_rtx_EXPR_LIST (VOIDmode,
7235 gen_rtx_REG (word_mode, regno),
7236 GEN_INT (UNITS_PER_WORD * i));
7237 regno++;
7238 }
7239
7240 return regs;
7241 }
7242
7243 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7244 for passing and returning BLKmode vectors.
7245 Return an expression valid as a return value for the FUNCTION_ARG
7246 and TARGET_FUNCTION_VALUE.
7247
7248 SIZE is the size in bytes of the vector.
7249 REGNO is the FP hard register the vector will be passed in. */
7250
7251 static rtx
7252 function_arg_vector_value (int size, int regno)
7253 {
7254 const int nregs = MAX (1, size / 8);
7255 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7256
7257 if (size < 8)
7258 XVECEXP (regs, 0, 0)
7259 = gen_rtx_EXPR_LIST (VOIDmode,
7260 gen_rtx_REG (SImode, regno),
7261 const0_rtx);
7262 else
7263 for (int i = 0; i < nregs; i++)
7264 XVECEXP (regs, 0, i)
7265 = gen_rtx_EXPR_LIST (VOIDmode,
7266 gen_rtx_REG (DImode, regno + 2*i),
7267 GEN_INT (i*8));
7268
7269 return regs;
7270 }
7271
7272 /* Determine where to put an argument to a function.
7273 Value is zero to push the argument on the stack,
7274 or a hard register in which to store the argument.
7275
7276 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7277 the preceding args and about the function being called.
7278 MODE is the argument's machine mode.
7279 TYPE is the data type of the argument (as a tree).
7280 This is null for libcalls where that information may
7281 not be available.
7282 NAMED is true if this argument is a named parameter
7283 (otherwise it is an extra parameter matching an ellipsis).
7284 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7285 TARGET_FUNCTION_INCOMING_ARG. */
7286
7287 static rtx
7288 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7289 const_tree type, bool named, bool incoming)
7290 {
7291 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7292
7293 int regbase = (incoming
7294 ? SPARC_INCOMING_INT_ARG_FIRST
7295 : SPARC_OUTGOING_INT_ARG_FIRST);
7296 int slotno, regno, padding;
7297 enum mode_class mclass = GET_MODE_CLASS (mode);
7298
7299 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7300 &regno, &padding);
7301 if (slotno == -1)
7302 return 0;
7303
7304 /* Vector types deserve special treatment because they are polymorphic wrt
7305 their mode, depending upon whether VIS instructions are enabled. */
7306 if (type && TREE_CODE (type) == VECTOR_TYPE)
7307 {
7308 HOST_WIDE_INT size = int_size_in_bytes (type);
7309 gcc_assert ((TARGET_ARCH32 && size <= 8)
7310 || (TARGET_ARCH64 && size <= 16));
7311
7312 if (mode == BLKmode)
7313 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7314
7315 mclass = MODE_FLOAT;
7316 }
7317
7318 if (TARGET_ARCH32)
7319 return gen_rtx_REG (mode, regno);
7320
7321 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7322 and are promoted to registers if possible. */
7323 if (type && TREE_CODE (type) == RECORD_TYPE)
7324 {
7325 HOST_WIDE_INT size = int_size_in_bytes (type);
7326 gcc_assert (size <= 16);
7327
7328 return function_arg_record_value (type, mode, slotno, named, regbase);
7329 }
7330
7331 /* Unions up to 16 bytes in size are passed in integer registers. */
7332 else if (type && TREE_CODE (type) == UNION_TYPE)
7333 {
7334 HOST_WIDE_INT size = int_size_in_bytes (type);
7335 gcc_assert (size <= 16);
7336
7337 return function_arg_union_value (size, mode, slotno, regno);
7338 }
7339
7340 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7341 but also have the slot allocated for them.
7342 If no prototype is in scope fp values in register slots get passed
7343 in two places, either fp regs and int regs or fp regs and memory. */
7344 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7345 && SPARC_FP_REG_P (regno))
7346 {
7347 rtx reg = gen_rtx_REG (mode, regno);
7348 if (cum->prototype_p || cum->libcall_p)
7349 return reg;
7350 else
7351 {
7352 rtx v0, v1;
7353
7354 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7355 {
7356 int intreg;
7357
7358 /* On incoming, we don't need to know that the value
7359 is passed in %f0 and %i0, and it confuses other parts
7360 causing needless spillage even on the simplest cases. */
7361 if (incoming)
7362 return reg;
7363
7364 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7365 + (regno - SPARC_FP_ARG_FIRST) / 2);
7366
7367 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7368 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7369 const0_rtx);
7370 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7371 }
7372 else
7373 {
7374 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7375 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7376 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7377 }
7378 }
7379 }
7380
7381 /* All other aggregate types are passed in an integer register in a mode
7382 corresponding to the size of the type. */
7383 else if (type && AGGREGATE_TYPE_P (type))
7384 {
7385 HOST_WIDE_INT size = int_size_in_bytes (type);
7386 gcc_assert (size <= 16);
7387
7388 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7389 }
7390
7391 return gen_rtx_REG (mode, regno);
7392 }
7393
7394 /* Handle the TARGET_FUNCTION_ARG target hook. */
7395
7396 static rtx
7397 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7398 const_tree type, bool named)
7399 {
7400 return sparc_function_arg_1 (cum, mode, type, named, false);
7401 }
7402
7403 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7404
7405 static rtx
7406 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7407 const_tree type, bool named)
7408 {
7409 return sparc_function_arg_1 (cum, mode, type, named, true);
7410 }
7411
7412 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7413
7414 static unsigned int
7415 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7416 {
7417 return ((TARGET_ARCH64
7418 && (GET_MODE_ALIGNMENT (mode) == 128
7419 || (type && TYPE_ALIGN (type) == 128)))
7420 ? 128
7421 : PARM_BOUNDARY);
7422 }
7423
7424 /* For an arg passed partly in registers and partly in memory,
7425 this is the number of bytes of registers used.
7426 For args passed entirely in registers or entirely in memory, zero.
7427
7428 Any arg that starts in the first 6 regs but won't entirely fit in them
7429 needs partial registers on v8. On v9, structures with integer
7430 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7431 values that begin in the last fp reg [where "last fp reg" varies with the
7432 mode] will be split between that reg and memory. */
7433
7434 static int
7435 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7436 tree type, bool named)
7437 {
7438 int slotno, regno, padding;
7439
7440 /* We pass false for incoming here, it doesn't matter. */
7441 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7442 false, &regno, &padding);
7443
7444 if (slotno == -1)
7445 return 0;
7446
7447 if (TARGET_ARCH32)
7448 {
7449 if ((slotno + (mode == BLKmode
7450 ? CEIL_NWORDS (int_size_in_bytes (type))
7451 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7452 > SPARC_INT_ARG_MAX)
7453 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7454 }
7455 else
7456 {
7457 /* We are guaranteed by pass_by_reference that the size of the
7458 argument is not greater than 16 bytes, so we only need to return
7459 one word if the argument is partially passed in registers. */
7460
7461 if (type && AGGREGATE_TYPE_P (type))
7462 {
7463 int size = int_size_in_bytes (type);
7464
7465 if (size > UNITS_PER_WORD
7466 && (slotno == SPARC_INT_ARG_MAX - 1
7467 || slotno == SPARC_FP_ARG_MAX - 1))
7468 return UNITS_PER_WORD;
7469 }
7470 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7471 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7472 && ! (TARGET_FPU && named)))
7473 {
7474 /* The complex types are passed as packed types. */
7475 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7476 && slotno == SPARC_INT_ARG_MAX - 1)
7477 return UNITS_PER_WORD;
7478 }
7479 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7480 {
7481 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7482 > SPARC_FP_ARG_MAX)
7483 return UNITS_PER_WORD;
7484 }
7485 }
7486
7487 return 0;
7488 }
7489
7490 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7491 Specify whether to pass the argument by reference. */
7492
7493 static bool
7494 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7495 machine_mode mode, const_tree type,
7496 bool named ATTRIBUTE_UNUSED)
7497 {
7498 if (TARGET_ARCH32)
7499 /* Original SPARC 32-bit ABI says that structures and unions,
7500 and quad-precision floats are passed by reference. For Pascal,
7501 also pass arrays by reference. All other base types are passed
7502 in registers.
7503
7504 Extended ABI (as implemented by the Sun compiler) says that all
7505 complex floats are passed by reference. Pass complex integers
7506 in registers up to 8 bytes. More generally, enforce the 2-word
7507 cap for passing arguments in registers.
7508
7509 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7510 integers are passed like floats of the same size, that is in
7511 registers up to 8 bytes. Pass all vector floats by reference
7512 like structure and unions. */
7513 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7514 || mode == SCmode
7515 /* Catch CDImode, TFmode, DCmode and TCmode. */
7516 || GET_MODE_SIZE (mode) > 8
7517 || (type
7518 && TREE_CODE (type) == VECTOR_TYPE
7519 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7520 else
7521 /* Original SPARC 64-bit ABI says that structures and unions
7522 smaller than 16 bytes are passed in registers, as well as
7523 all other base types.
7524
7525 Extended ABI (as implemented by the Sun compiler) says that
7526 complex floats are passed in registers up to 16 bytes. Pass
7527 all complex integers in registers up to 16 bytes. More generally,
7528 enforce the 2-word cap for passing arguments in registers.
7529
7530 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7531 integers are passed like floats of the same size, that is in
7532 registers (up to 16 bytes). Pass all vector floats like structure
7533 and unions. */
7534 return ((type
7535 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7536 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7537 /* Catch CTImode and TCmode. */
7538 || GET_MODE_SIZE (mode) > 16);
7539 }
7540
7541 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7542 Update the data in CUM to advance over an argument
7543 of mode MODE and data type TYPE.
7544 TYPE is null for libcalls where that information may not be available. */
7545
7546 static void
7547 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7548 const_tree type, bool named)
7549 {
7550 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7551 int regno, padding;
7552
7553 /* We pass false for incoming here, it doesn't matter. */
7554 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7555
7556 /* If argument requires leading padding, add it. */
7557 cum->words += padding;
7558
7559 if (TARGET_ARCH32)
7560 cum->words += (mode == BLKmode
7561 ? CEIL_NWORDS (int_size_in_bytes (type))
7562 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7563 else
7564 {
7565 if (type && AGGREGATE_TYPE_P (type))
7566 {
7567 int size = int_size_in_bytes (type);
7568
7569 if (size <= 8)
7570 ++cum->words;
7571 else if (size <= 16)
7572 cum->words += 2;
7573 else /* passed by reference */
7574 ++cum->words;
7575 }
7576 else
7577 cum->words += (mode == BLKmode
7578 ? CEIL_NWORDS (int_size_in_bytes (type))
7579 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7580 }
7581 }
7582
7583 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7584 are always stored left shifted in their argument slot. */
7585
7586 static pad_direction
7587 sparc_function_arg_padding (machine_mode mode, const_tree type)
7588 {
7589 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7590 return PAD_UPWARD;
7591
7592 /* Fall back to the default. */
7593 return default_function_arg_padding (mode, type);
7594 }
7595
7596 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7597 Specify whether to return the return value in memory. */
7598
7599 static bool
7600 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7601 {
7602 if (TARGET_ARCH32)
7603 /* Original SPARC 32-bit ABI says that structures and unions,
7604 and quad-precision floats are returned in memory. All other
7605 base types are returned in registers.
7606
7607 Extended ABI (as implemented by the Sun compiler) says that
7608 all complex floats are returned in registers (8 FP registers
7609 at most for '_Complex long double'). Return all complex integers
7610 in registers (4 at most for '_Complex long long').
7611
7612 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7613 integers are returned like floats of the same size, that is in
7614 registers up to 8 bytes and in memory otherwise. Return all
7615 vector floats in memory like structure and unions; note that
7616 they always have BLKmode like the latter. */
7617 return (TYPE_MODE (type) == BLKmode
7618 || TYPE_MODE (type) == TFmode
7619 || (TREE_CODE (type) == VECTOR_TYPE
7620 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7621 else
7622 /* Original SPARC 64-bit ABI says that structures and unions
7623 smaller than 32 bytes are returned in registers, as well as
7624 all other base types.
7625
7626 Extended ABI (as implemented by the Sun compiler) says that all
7627 complex floats are returned in registers (8 FP registers at most
7628 for '_Complex long double'). Return all complex integers in
7629 registers (4 at most for '_Complex TItype').
7630
7631 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7632 integers are returned like floats of the same size, that is in
7633 registers. Return all vector floats like structure and unions;
7634 note that they always have BLKmode like the latter. */
7635 return (TYPE_MODE (type) == BLKmode
7636 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7637 }
7638
7639 /* Handle the TARGET_STRUCT_VALUE target hook.
7640 Return where to find the structure return value address. */
7641
7642 static rtx
7643 sparc_struct_value_rtx (tree fndecl, int incoming)
7644 {
7645 if (TARGET_ARCH64)
7646 return 0;
7647 else
7648 {
7649 rtx mem;
7650
7651 if (incoming)
7652 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7653 STRUCT_VALUE_OFFSET));
7654 else
7655 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7656 STRUCT_VALUE_OFFSET));
7657
7658 /* Only follow the SPARC ABI for fixed-size structure returns.
7659 Variable size structure returns are handled per the normal
7660 procedures in GCC. This is enabled by -mstd-struct-return */
7661 if (incoming == 2
7662 && sparc_std_struct_return
7663 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7664 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7665 {
7666 /* We must check and adjust the return address, as it is optional
7667 as to whether the return object is really provided. */
7668 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7669 rtx scratch = gen_reg_rtx (SImode);
7670 rtx_code_label *endlab = gen_label_rtx ();
7671
7672 /* Calculate the return object size. */
7673 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7674 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7675 /* Construct a temporary return value. */
7676 rtx temp_val
7677 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7678
7679 /* Implement SPARC 32-bit psABI callee return struct checking:
7680
7681 Fetch the instruction where we will return to and see if
7682 it's an unimp instruction (the most significant 10 bits
7683 will be zero). */
7684 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7685 plus_constant (Pmode,
7686 ret_reg, 8)));
7687 /* Assume the size is valid and pre-adjust. */
7688 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7689 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7690 0, endlab);
7691 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7692 /* Write the address of the memory pointed to by temp_val into
7693 the memory pointed to by mem. */
7694 emit_move_insn (mem, XEXP (temp_val, 0));
7695 emit_label (endlab);
7696 }
7697
7698 return mem;
7699 }
7700 }
7701
7702 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7703 For v9, function return values are subject to the same rules as arguments,
7704 except that up to 32 bytes may be returned in registers. */
7705
7706 static rtx
7707 sparc_function_value_1 (const_tree type, machine_mode mode,
7708 bool outgoing)
7709 {
7710 /* Beware that the two values are swapped here wrt function_arg. */
7711 int regbase = (outgoing
7712 ? SPARC_INCOMING_INT_ARG_FIRST
7713 : SPARC_OUTGOING_INT_ARG_FIRST);
7714 enum mode_class mclass = GET_MODE_CLASS (mode);
7715 int regno;
7716
7717 /* Vector types deserve special treatment because they are polymorphic wrt
7718 their mode, depending upon whether VIS instructions are enabled. */
7719 if (type && TREE_CODE (type) == VECTOR_TYPE)
7720 {
7721 HOST_WIDE_INT size = int_size_in_bytes (type);
7722 gcc_assert ((TARGET_ARCH32 && size <= 8)
7723 || (TARGET_ARCH64 && size <= 32));
7724
7725 if (mode == BLKmode)
7726 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7727
7728 mclass = MODE_FLOAT;
7729 }
7730
7731 if (TARGET_ARCH64 && type)
7732 {
7733 /* Structures up to 32 bytes in size are returned in registers. */
7734 if (TREE_CODE (type) == RECORD_TYPE)
7735 {
7736 HOST_WIDE_INT size = int_size_in_bytes (type);
7737 gcc_assert (size <= 32);
7738
7739 return function_arg_record_value (type, mode, 0, 1, regbase);
7740 }
7741
7742 /* Unions up to 32 bytes in size are returned in integer registers. */
7743 else if (TREE_CODE (type) == UNION_TYPE)
7744 {
7745 HOST_WIDE_INT size = int_size_in_bytes (type);
7746 gcc_assert (size <= 32);
7747
7748 return function_arg_union_value (size, mode, 0, regbase);
7749 }
7750
7751 /* Objects that require it are returned in FP registers. */
7752 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7753 ;
7754
7755 /* All other aggregate types are returned in an integer register in a
7756 mode corresponding to the size of the type. */
7757 else if (AGGREGATE_TYPE_P (type))
7758 {
7759 /* All other aggregate types are passed in an integer register
7760 in a mode corresponding to the size of the type. */
7761 HOST_WIDE_INT size = int_size_in_bytes (type);
7762 gcc_assert (size <= 32);
7763
7764 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7765
7766 /* ??? We probably should have made the same ABI change in
7767 3.4.0 as the one we made for unions. The latter was
7768 required by the SCD though, while the former is not
7769 specified, so we favored compatibility and efficiency.
7770
7771 Now we're stuck for aggregates larger than 16 bytes,
7772 because OImode vanished in the meantime. Let's not
7773 try to be unduly clever, and simply follow the ABI
7774 for unions in that case. */
7775 if (mode == BLKmode)
7776 return function_arg_union_value (size, mode, 0, regbase);
7777 else
7778 mclass = MODE_INT;
7779 }
7780
7781 /* We should only have pointer and integer types at this point. This
7782 must match sparc_promote_function_mode. */
7783 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7784 mode = word_mode;
7785 }
7786
7787 /* We should only have pointer and integer types at this point, except with
7788 -freg-struct-return. This must match sparc_promote_function_mode. */
7789 else if (TARGET_ARCH32
7790 && !(type && AGGREGATE_TYPE_P (type))
7791 && mclass == MODE_INT
7792 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7793 mode = word_mode;
7794
7795 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7796 regno = SPARC_FP_ARG_FIRST;
7797 else
7798 regno = regbase;
7799
7800 return gen_rtx_REG (mode, regno);
7801 }
7802
7803 /* Handle TARGET_FUNCTION_VALUE.
7804 On the SPARC, the value is found in the first "output" register, but the
7805 called function leaves it in the first "input" register. */
7806
7807 static rtx
7808 sparc_function_value (const_tree valtype,
7809 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7810 bool outgoing)
7811 {
7812 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7813 }
7814
7815 /* Handle TARGET_LIBCALL_VALUE. */
7816
7817 static rtx
7818 sparc_libcall_value (machine_mode mode,
7819 const_rtx fun ATTRIBUTE_UNUSED)
7820 {
7821 return sparc_function_value_1 (NULL_TREE, mode, false);
7822 }
7823
7824 /* Handle FUNCTION_VALUE_REGNO_P.
7825 On the SPARC, the first "output" reg is used for integer values, and the
7826 first floating point register is used for floating point values. */
7827
7828 static bool
7829 sparc_function_value_regno_p (const unsigned int regno)
7830 {
7831 return (regno == 8 || (TARGET_FPU && regno == 32));
7832 }
7833
7834 /* Do what is necessary for `va_start'. We look at the current function
7835 to determine if stdarg or varargs is used and return the address of
7836 the first unnamed parameter. */
7837
7838 static rtx
7839 sparc_builtin_saveregs (void)
7840 {
7841 int first_reg = crtl->args.info.words;
7842 rtx address;
7843 int regno;
7844
7845 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7846 emit_move_insn (gen_rtx_MEM (word_mode,
7847 gen_rtx_PLUS (Pmode,
7848 frame_pointer_rtx,
7849 GEN_INT (FIRST_PARM_OFFSET (0)
7850 + (UNITS_PER_WORD
7851 * regno)))),
7852 gen_rtx_REG (word_mode,
7853 SPARC_INCOMING_INT_ARG_FIRST + regno));
7854
7855 address = gen_rtx_PLUS (Pmode,
7856 frame_pointer_rtx,
7857 GEN_INT (FIRST_PARM_OFFSET (0)
7858 + UNITS_PER_WORD * first_reg));
7859
7860 return address;
7861 }
7862
7863 /* Implement `va_start' for stdarg. */
7864
7865 static void
7866 sparc_va_start (tree valist, rtx nextarg)
7867 {
7868 nextarg = expand_builtin_saveregs ();
7869 std_expand_builtin_va_start (valist, nextarg);
7870 }
7871
7872 /* Implement `va_arg' for stdarg. */
7873
7874 static tree
7875 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7876 gimple_seq *post_p)
7877 {
7878 HOST_WIDE_INT size, rsize, align;
7879 tree addr, incr;
7880 bool indirect;
7881 tree ptrtype = build_pointer_type (type);
7882
7883 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7884 {
7885 indirect = true;
7886 size = rsize = UNITS_PER_WORD;
7887 align = 0;
7888 }
7889 else
7890 {
7891 indirect = false;
7892 size = int_size_in_bytes (type);
7893 rsize = ROUND_UP (size, UNITS_PER_WORD);
7894 align = 0;
7895
7896 if (TARGET_ARCH64)
7897 {
7898 /* For SPARC64, objects requiring 16-byte alignment get it. */
7899 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7900 align = 2 * UNITS_PER_WORD;
7901
7902 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7903 are left-justified in their slots. */
7904 if (AGGREGATE_TYPE_P (type))
7905 {
7906 if (size == 0)
7907 size = rsize = UNITS_PER_WORD;
7908 else
7909 size = rsize;
7910 }
7911 }
7912 }
7913
7914 incr = valist;
7915 if (align)
7916 {
7917 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7918 incr = fold_convert (sizetype, incr);
7919 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7920 size_int (-align));
7921 incr = fold_convert (ptr_type_node, incr);
7922 }
7923
7924 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7925 addr = incr;
7926
7927 if (BYTES_BIG_ENDIAN && size < rsize)
7928 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7929
7930 if (indirect)
7931 {
7932 addr = fold_convert (build_pointer_type (ptrtype), addr);
7933 addr = build_va_arg_indirect_ref (addr);
7934 }
7935
7936 /* If the address isn't aligned properly for the type, we need a temporary.
7937 FIXME: This is inefficient, usually we can do this in registers. */
7938 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7939 {
7940 tree tmp = create_tmp_var (type, "va_arg_tmp");
7941 tree dest_addr = build_fold_addr_expr (tmp);
7942 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7943 3, dest_addr, addr, size_int (rsize));
7944 TREE_ADDRESSABLE (tmp) = 1;
7945 gimplify_and_add (copy, pre_p);
7946 addr = dest_addr;
7947 }
7948
7949 else
7950 addr = fold_convert (ptrtype, addr);
7951
7952 incr = fold_build_pointer_plus_hwi (incr, rsize);
7953 gimplify_assign (valist, incr, post_p);
7954
7955 return build_va_arg_indirect_ref (addr);
7956 }
7957 \f
7958 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7959 Specify whether the vector mode is supported by the hardware. */
7960
7961 static bool
7962 sparc_vector_mode_supported_p (machine_mode mode)
7963 {
7964 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7965 }
7966 \f
7967 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7968
7969 static machine_mode
7970 sparc_preferred_simd_mode (scalar_mode mode)
7971 {
7972 if (TARGET_VIS)
7973 switch (mode)
7974 {
7975 case E_SImode:
7976 return V2SImode;
7977 case E_HImode:
7978 return V4HImode;
7979 case E_QImode:
7980 return V8QImode;
7981
7982 default:;
7983 }
7984
7985 return word_mode;
7986 }
7987 \f
7988 /* Return the string to output an unconditional branch to LABEL, which is
7989 the operand number of the label.
7990
7991 DEST is the destination insn (i.e. the label), INSN is the source. */
7992
7993 const char *
7994 output_ubranch (rtx dest, rtx_insn *insn)
7995 {
7996 static char string[64];
7997 bool v9_form = false;
7998 int delta;
7999 char *p;
8000
8001 /* Even if we are trying to use cbcond for this, evaluate
8002 whether we can use V9 branches as our backup plan. */
8003
8004 delta = 5000000;
8005 if (INSN_ADDRESSES_SET_P ())
8006 delta = (INSN_ADDRESSES (INSN_UID (dest))
8007 - INSN_ADDRESSES (INSN_UID (insn)));
8008
8009 /* Leave some instructions for "slop". */
8010 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8011 v9_form = true;
8012
8013 if (TARGET_CBCOND)
8014 {
8015 bool emit_nop = emit_cbcond_nop (insn);
8016 bool far = false;
8017 const char *rval;
8018
8019 if (delta < -500 || delta > 500)
8020 far = true;
8021
8022 if (far)
8023 {
8024 if (v9_form)
8025 rval = "ba,a,pt\t%%xcc, %l0";
8026 else
8027 rval = "b,a\t%l0";
8028 }
8029 else
8030 {
8031 if (emit_nop)
8032 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8033 else
8034 rval = "cwbe\t%%g0, %%g0, %l0";
8035 }
8036 return rval;
8037 }
8038
8039 if (v9_form)
8040 strcpy (string, "ba%*,pt\t%%xcc, ");
8041 else
8042 strcpy (string, "b%*\t");
8043
8044 p = strchr (string, '\0');
8045 *p++ = '%';
8046 *p++ = 'l';
8047 *p++ = '0';
8048 *p++ = '%';
8049 *p++ = '(';
8050 *p = '\0';
8051
8052 return string;
8053 }
8054
8055 /* Return the string to output a conditional branch to LABEL, which is
8056 the operand number of the label. OP is the conditional expression.
8057 XEXP (OP, 0) is assumed to be a condition code register (integer or
8058 floating point) and its mode specifies what kind of comparison we made.
8059
8060 DEST is the destination insn (i.e. the label), INSN is the source.
8061
8062 REVERSED is nonzero if we should reverse the sense of the comparison.
8063
8064 ANNUL is nonzero if we should generate an annulling branch. */
8065
8066 const char *
8067 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8068 rtx_insn *insn)
8069 {
8070 static char string[64];
8071 enum rtx_code code = GET_CODE (op);
8072 rtx cc_reg = XEXP (op, 0);
8073 machine_mode mode = GET_MODE (cc_reg);
8074 const char *labelno, *branch;
8075 int spaces = 8, far;
8076 char *p;
8077
8078 /* v9 branches are limited to +-1MB. If it is too far away,
8079 change
8080
8081 bne,pt %xcc, .LC30
8082
8083 to
8084
8085 be,pn %xcc, .+12
8086 nop
8087 ba .LC30
8088
8089 and
8090
8091 fbne,a,pn %fcc2, .LC29
8092
8093 to
8094
8095 fbe,pt %fcc2, .+16
8096 nop
8097 ba .LC29 */
8098
8099 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8100 if (reversed ^ far)
8101 {
8102 /* Reversal of FP compares takes care -- an ordered compare
8103 becomes an unordered compare and vice versa. */
8104 if (mode == CCFPmode || mode == CCFPEmode)
8105 code = reverse_condition_maybe_unordered (code);
8106 else
8107 code = reverse_condition (code);
8108 }
8109
8110 /* Start by writing the branch condition. */
8111 if (mode == CCFPmode || mode == CCFPEmode)
8112 {
8113 switch (code)
8114 {
8115 case NE:
8116 branch = "fbne";
8117 break;
8118 case EQ:
8119 branch = "fbe";
8120 break;
8121 case GE:
8122 branch = "fbge";
8123 break;
8124 case GT:
8125 branch = "fbg";
8126 break;
8127 case LE:
8128 branch = "fble";
8129 break;
8130 case LT:
8131 branch = "fbl";
8132 break;
8133 case UNORDERED:
8134 branch = "fbu";
8135 break;
8136 case ORDERED:
8137 branch = "fbo";
8138 break;
8139 case UNGT:
8140 branch = "fbug";
8141 break;
8142 case UNLT:
8143 branch = "fbul";
8144 break;
8145 case UNEQ:
8146 branch = "fbue";
8147 break;
8148 case UNGE:
8149 branch = "fbuge";
8150 break;
8151 case UNLE:
8152 branch = "fbule";
8153 break;
8154 case LTGT:
8155 branch = "fblg";
8156 break;
8157 default:
8158 gcc_unreachable ();
8159 }
8160
8161 /* ??? !v9: FP branches cannot be preceded by another floating point
8162 insn. Because there is currently no concept of pre-delay slots,
8163 we can fix this only by always emitting a nop before a floating
8164 point branch. */
8165
8166 string[0] = '\0';
8167 if (! TARGET_V9)
8168 strcpy (string, "nop\n\t");
8169 strcat (string, branch);
8170 }
8171 else
8172 {
8173 switch (code)
8174 {
8175 case NE:
8176 if (mode == CCVmode || mode == CCXVmode)
8177 branch = "bvs";
8178 else
8179 branch = "bne";
8180 break;
8181 case EQ:
8182 if (mode == CCVmode || mode == CCXVmode)
8183 branch = "bvc";
8184 else
8185 branch = "be";
8186 break;
8187 case GE:
8188 if (mode == CCNZmode || mode == CCXNZmode)
8189 branch = "bpos";
8190 else
8191 branch = "bge";
8192 break;
8193 case GT:
8194 branch = "bg";
8195 break;
8196 case LE:
8197 branch = "ble";
8198 break;
8199 case LT:
8200 if (mode == CCNZmode || mode == CCXNZmode)
8201 branch = "bneg";
8202 else
8203 branch = "bl";
8204 break;
8205 case GEU:
8206 branch = "bgeu";
8207 break;
8208 case GTU:
8209 branch = "bgu";
8210 break;
8211 case LEU:
8212 branch = "bleu";
8213 break;
8214 case LTU:
8215 branch = "blu";
8216 break;
8217 default:
8218 gcc_unreachable ();
8219 }
8220 strcpy (string, branch);
8221 }
8222 spaces -= strlen (branch);
8223 p = strchr (string, '\0');
8224
8225 /* Now add the annulling, the label, and a possible noop. */
8226 if (annul && ! far)
8227 {
8228 strcpy (p, ",a");
8229 p += 2;
8230 spaces -= 2;
8231 }
8232
8233 if (TARGET_V9)
8234 {
8235 rtx note;
8236 int v8 = 0;
8237
8238 if (! far && insn && INSN_ADDRESSES_SET_P ())
8239 {
8240 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8241 - INSN_ADDRESSES (INSN_UID (insn)));
8242 /* Leave some instructions for "slop". */
8243 if (delta < -260000 || delta >= 260000)
8244 v8 = 1;
8245 }
8246
8247 switch (mode)
8248 {
8249 case E_CCmode:
8250 case E_CCNZmode:
8251 case E_CCCmode:
8252 case E_CCVmode:
8253 labelno = "%%icc, ";
8254 if (v8)
8255 labelno = "";
8256 break;
8257 case E_CCXmode:
8258 case E_CCXNZmode:
8259 case E_CCXCmode:
8260 case E_CCXVmode:
8261 labelno = "%%xcc, ";
8262 gcc_assert (!v8);
8263 break;
8264 case E_CCFPmode:
8265 case E_CCFPEmode:
8266 {
8267 static char v9_fcc_labelno[] = "%%fccX, ";
8268 /* Set the char indicating the number of the fcc reg to use. */
8269 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8270 labelno = v9_fcc_labelno;
8271 if (v8)
8272 {
8273 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8274 labelno = "";
8275 }
8276 }
8277 break;
8278 default:
8279 gcc_unreachable ();
8280 }
8281
8282 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8283 {
8284 strcpy (p,
8285 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8286 >= profile_probability::even ()) ^ far)
8287 ? ",pt" : ",pn");
8288 p += 3;
8289 spaces -= 3;
8290 }
8291 }
8292 else
8293 labelno = "";
8294
8295 if (spaces > 0)
8296 *p++ = '\t';
8297 else
8298 *p++ = ' ';
8299 strcpy (p, labelno);
8300 p = strchr (p, '\0');
8301 if (far)
8302 {
8303 strcpy (p, ".+12\n\t nop\n\tb\t");
8304 /* Skip the next insn if requested or
8305 if we know that it will be a nop. */
8306 if (annul || ! final_sequence)
8307 p[3] = '6';
8308 p += 14;
8309 }
8310 *p++ = '%';
8311 *p++ = 'l';
8312 *p++ = label + '0';
8313 *p++ = '%';
8314 *p++ = '#';
8315 *p = '\0';
8316
8317 return string;
8318 }
8319
8320 /* Emit a library call comparison between floating point X and Y.
8321 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8322 Return the new operator to be used in the comparison sequence.
8323
8324 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8325 values as arguments instead of the TFmode registers themselves,
8326 that's why we cannot call emit_float_lib_cmp. */
8327
8328 rtx
8329 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8330 {
8331 const char *qpfunc;
8332 rtx slot0, slot1, result, tem, tem2, libfunc;
8333 machine_mode mode;
8334 enum rtx_code new_comparison;
8335
8336 switch (comparison)
8337 {
8338 case EQ:
8339 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8340 break;
8341
8342 case NE:
8343 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8344 break;
8345
8346 case GT:
8347 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8348 break;
8349
8350 case GE:
8351 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8352 break;
8353
8354 case LT:
8355 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8356 break;
8357
8358 case LE:
8359 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8360 break;
8361
8362 case ORDERED:
8363 case UNORDERED:
8364 case UNGT:
8365 case UNLT:
8366 case UNEQ:
8367 case UNGE:
8368 case UNLE:
8369 case LTGT:
8370 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8371 break;
8372
8373 default:
8374 gcc_unreachable ();
8375 }
8376
8377 if (TARGET_ARCH64)
8378 {
8379 if (MEM_P (x))
8380 {
8381 tree expr = MEM_EXPR (x);
8382 if (expr)
8383 mark_addressable (expr);
8384 slot0 = x;
8385 }
8386 else
8387 {
8388 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8389 emit_move_insn (slot0, x);
8390 }
8391
8392 if (MEM_P (y))
8393 {
8394 tree expr = MEM_EXPR (y);
8395 if (expr)
8396 mark_addressable (expr);
8397 slot1 = y;
8398 }
8399 else
8400 {
8401 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8402 emit_move_insn (slot1, y);
8403 }
8404
8405 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8406 emit_library_call (libfunc, LCT_NORMAL,
8407 DImode,
8408 XEXP (slot0, 0), Pmode,
8409 XEXP (slot1, 0), Pmode);
8410 mode = DImode;
8411 }
8412 else
8413 {
8414 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8415 emit_library_call (libfunc, LCT_NORMAL,
8416 SImode,
8417 x, TFmode, y, TFmode);
8418 mode = SImode;
8419 }
8420
8421
8422 /* Immediately move the result of the libcall into a pseudo
8423 register so reload doesn't clobber the value if it needs
8424 the return register for a spill reg. */
8425 result = gen_reg_rtx (mode);
8426 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8427
8428 switch (comparison)
8429 {
8430 default:
8431 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8432 case ORDERED:
8433 case UNORDERED:
8434 new_comparison = (comparison == UNORDERED ? EQ : NE);
8435 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8436 case UNGT:
8437 case UNGE:
8438 new_comparison = (comparison == UNGT ? GT : NE);
8439 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8440 case UNLE:
8441 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8442 case UNLT:
8443 tem = gen_reg_rtx (mode);
8444 if (TARGET_ARCH32)
8445 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8446 else
8447 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8448 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8449 case UNEQ:
8450 case LTGT:
8451 tem = gen_reg_rtx (mode);
8452 if (TARGET_ARCH32)
8453 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8454 else
8455 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8456 tem2 = gen_reg_rtx (mode);
8457 if (TARGET_ARCH32)
8458 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8459 else
8460 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8461 new_comparison = (comparison == UNEQ ? EQ : NE);
8462 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8463 }
8464
8465 gcc_unreachable ();
8466 }
8467
8468 /* Generate an unsigned DImode to FP conversion. This is the same code
8469 optabs would emit if we didn't have TFmode patterns. */
8470
8471 void
8472 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8473 {
8474 rtx i0, i1, f0, in, out;
8475
8476 out = operands[0];
8477 in = force_reg (DImode, operands[1]);
8478 rtx_code_label *neglab = gen_label_rtx ();
8479 rtx_code_label *donelab = gen_label_rtx ();
8480 i0 = gen_reg_rtx (DImode);
8481 i1 = gen_reg_rtx (DImode);
8482 f0 = gen_reg_rtx (mode);
8483
8484 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8485
8486 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8487 emit_jump_insn (gen_jump (donelab));
8488 emit_barrier ();
8489
8490 emit_label (neglab);
8491
8492 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8493 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8494 emit_insn (gen_iordi3 (i0, i0, i1));
8495 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8496 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8497
8498 emit_label (donelab);
8499 }
8500
8501 /* Generate an FP to unsigned DImode conversion. This is the same code
8502 optabs would emit if we didn't have TFmode patterns. */
8503
8504 void
8505 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8506 {
8507 rtx i0, i1, f0, in, out, limit;
8508
8509 out = operands[0];
8510 in = force_reg (mode, operands[1]);
8511 rtx_code_label *neglab = gen_label_rtx ();
8512 rtx_code_label *donelab = gen_label_rtx ();
8513 i0 = gen_reg_rtx (DImode);
8514 i1 = gen_reg_rtx (DImode);
8515 limit = gen_reg_rtx (mode);
8516 f0 = gen_reg_rtx (mode);
8517
8518 emit_move_insn (limit,
8519 const_double_from_real_value (
8520 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8521 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8522
8523 emit_insn (gen_rtx_SET (out,
8524 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8525 emit_jump_insn (gen_jump (donelab));
8526 emit_barrier ();
8527
8528 emit_label (neglab);
8529
8530 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8531 emit_insn (gen_rtx_SET (i0,
8532 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8533 emit_insn (gen_movdi (i1, const1_rtx));
8534 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8535 emit_insn (gen_xordi3 (out, i0, i1));
8536
8537 emit_label (donelab);
8538 }
8539
8540 /* Return the string to output a compare and branch instruction to DEST.
8541 DEST is the destination insn (i.e. the label), INSN is the source,
8542 and OP is the conditional expression. */
8543
8544 const char *
8545 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8546 {
8547 machine_mode mode = GET_MODE (XEXP (op, 0));
8548 enum rtx_code code = GET_CODE (op);
8549 const char *cond_str, *tmpl;
8550 int far, emit_nop, len;
8551 static char string[64];
8552 char size_char;
8553
8554 /* Compare and Branch is limited to +-2KB. If it is too far away,
8555 change
8556
8557 cxbne X, Y, .LC30
8558
8559 to
8560
8561 cxbe X, Y, .+16
8562 nop
8563 ba,pt xcc, .LC30
8564 nop */
8565
8566 len = get_attr_length (insn);
8567
8568 far = len == 4;
8569 emit_nop = len == 2;
8570
8571 if (far)
8572 code = reverse_condition (code);
8573
8574 size_char = ((mode == SImode) ? 'w' : 'x');
8575
8576 switch (code)
8577 {
8578 case NE:
8579 cond_str = "ne";
8580 break;
8581
8582 case EQ:
8583 cond_str = "e";
8584 break;
8585
8586 case GE:
8587 cond_str = "ge";
8588 break;
8589
8590 case GT:
8591 cond_str = "g";
8592 break;
8593
8594 case LE:
8595 cond_str = "le";
8596 break;
8597
8598 case LT:
8599 cond_str = "l";
8600 break;
8601
8602 case GEU:
8603 cond_str = "cc";
8604 break;
8605
8606 case GTU:
8607 cond_str = "gu";
8608 break;
8609
8610 case LEU:
8611 cond_str = "leu";
8612 break;
8613
8614 case LTU:
8615 cond_str = "cs";
8616 break;
8617
8618 default:
8619 gcc_unreachable ();
8620 }
8621
8622 if (far)
8623 {
8624 int veryfar = 1, delta;
8625
8626 if (INSN_ADDRESSES_SET_P ())
8627 {
8628 delta = (INSN_ADDRESSES (INSN_UID (dest))
8629 - INSN_ADDRESSES (INSN_UID (insn)));
8630 /* Leave some instructions for "slop". */
8631 if (delta >= -260000 && delta < 260000)
8632 veryfar = 0;
8633 }
8634
8635 if (veryfar)
8636 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8637 else
8638 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8639 }
8640 else
8641 {
8642 if (emit_nop)
8643 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8644 else
8645 tmpl = "c%cb%s\t%%1, %%2, %%3";
8646 }
8647
8648 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8649
8650 return string;
8651 }
8652
8653 /* Return the string to output a conditional branch to LABEL, testing
8654 register REG. LABEL is the operand number of the label; REG is the
8655 operand number of the reg. OP is the conditional expression. The mode
8656 of REG says what kind of comparison we made.
8657
8658 DEST is the destination insn (i.e. the label), INSN is the source.
8659
8660 REVERSED is nonzero if we should reverse the sense of the comparison.
8661
8662 ANNUL is nonzero if we should generate an annulling branch. */
8663
8664 const char *
8665 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8666 int annul, rtx_insn *insn)
8667 {
8668 static char string[64];
8669 enum rtx_code code = GET_CODE (op);
8670 machine_mode mode = GET_MODE (XEXP (op, 0));
8671 rtx note;
8672 int far;
8673 char *p;
8674
8675 /* branch on register are limited to +-128KB. If it is too far away,
8676 change
8677
8678 brnz,pt %g1, .LC30
8679
8680 to
8681
8682 brz,pn %g1, .+12
8683 nop
8684 ba,pt %xcc, .LC30
8685
8686 and
8687
8688 brgez,a,pn %o1, .LC29
8689
8690 to
8691
8692 brlz,pt %o1, .+16
8693 nop
8694 ba,pt %xcc, .LC29 */
8695
8696 far = get_attr_length (insn) >= 3;
8697
8698 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8699 if (reversed ^ far)
8700 code = reverse_condition (code);
8701
8702 /* Only 64-bit versions of these instructions exist. */
8703 gcc_assert (mode == DImode);
8704
8705 /* Start by writing the branch condition. */
8706
8707 switch (code)
8708 {
8709 case NE:
8710 strcpy (string, "brnz");
8711 break;
8712
8713 case EQ:
8714 strcpy (string, "brz");
8715 break;
8716
8717 case GE:
8718 strcpy (string, "brgez");
8719 break;
8720
8721 case LT:
8722 strcpy (string, "brlz");
8723 break;
8724
8725 case LE:
8726 strcpy (string, "brlez");
8727 break;
8728
8729 case GT:
8730 strcpy (string, "brgz");
8731 break;
8732
8733 default:
8734 gcc_unreachable ();
8735 }
8736
8737 p = strchr (string, '\0');
8738
8739 /* Now add the annulling, reg, label, and nop. */
8740 if (annul && ! far)
8741 {
8742 strcpy (p, ",a");
8743 p += 2;
8744 }
8745
8746 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8747 {
8748 strcpy (p,
8749 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8750 >= profile_probability::even ()) ^ far)
8751 ? ",pt" : ",pn");
8752 p += 3;
8753 }
8754
8755 *p = p < string + 8 ? '\t' : ' ';
8756 p++;
8757 *p++ = '%';
8758 *p++ = '0' + reg;
8759 *p++ = ',';
8760 *p++ = ' ';
8761 if (far)
8762 {
8763 int veryfar = 1, delta;
8764
8765 if (INSN_ADDRESSES_SET_P ())
8766 {
8767 delta = (INSN_ADDRESSES (INSN_UID (dest))
8768 - INSN_ADDRESSES (INSN_UID (insn)));
8769 /* Leave some instructions for "slop". */
8770 if (delta >= -260000 && delta < 260000)
8771 veryfar = 0;
8772 }
8773
8774 strcpy (p, ".+12\n\t nop\n\t");
8775 /* Skip the next insn if requested or
8776 if we know that it will be a nop. */
8777 if (annul || ! final_sequence)
8778 p[3] = '6';
8779 p += 12;
8780 if (veryfar)
8781 {
8782 strcpy (p, "b\t");
8783 p += 2;
8784 }
8785 else
8786 {
8787 strcpy (p, "ba,pt\t%%xcc, ");
8788 p += 13;
8789 }
8790 }
8791 *p++ = '%';
8792 *p++ = 'l';
8793 *p++ = '0' + label;
8794 *p++ = '%';
8795 *p++ = '#';
8796 *p = '\0';
8797
8798 return string;
8799 }
8800
8801 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8802 Such instructions cannot be used in the delay slot of return insn on v9.
8803 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8804 */
8805
8806 static int
8807 epilogue_renumber (register rtx *where, int test)
8808 {
8809 register const char *fmt;
8810 register int i;
8811 register enum rtx_code code;
8812
8813 if (*where == 0)
8814 return 0;
8815
8816 code = GET_CODE (*where);
8817
8818 switch (code)
8819 {
8820 case REG:
8821 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8822 return 1;
8823 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8824 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8825 /* fallthrough */
8826 case SCRATCH:
8827 case CC0:
8828 case PC:
8829 case CONST_INT:
8830 case CONST_WIDE_INT:
8831 case CONST_DOUBLE:
8832 return 0;
8833
8834 /* Do not replace the frame pointer with the stack pointer because
8835 it can cause the delayed instruction to load below the stack.
8836 This occurs when instructions like:
8837
8838 (set (reg/i:SI 24 %i0)
8839 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8840 (const_int -20 [0xffffffec])) 0))
8841
8842 are in the return delayed slot. */
8843 case PLUS:
8844 if (GET_CODE (XEXP (*where, 0)) == REG
8845 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8846 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8847 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8848 return 1;
8849 break;
8850
8851 case MEM:
8852 if (SPARC_STACK_BIAS
8853 && GET_CODE (XEXP (*where, 0)) == REG
8854 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8855 return 1;
8856 break;
8857
8858 default:
8859 break;
8860 }
8861
8862 fmt = GET_RTX_FORMAT (code);
8863
8864 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8865 {
8866 if (fmt[i] == 'E')
8867 {
8868 register int j;
8869 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8870 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8871 return 1;
8872 }
8873 else if (fmt[i] == 'e'
8874 && epilogue_renumber (&(XEXP (*where, i)), test))
8875 return 1;
8876 }
8877 return 0;
8878 }
8879 \f
8880 /* Leaf functions and non-leaf functions have different needs. */
8881
8882 static const int
8883 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8884
8885 static const int
8886 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8887
8888 static const int *const reg_alloc_orders[] = {
8889 reg_leaf_alloc_order,
8890 reg_nonleaf_alloc_order};
8891
8892 void
8893 order_regs_for_local_alloc (void)
8894 {
8895 static int last_order_nonleaf = 1;
8896
8897 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8898 {
8899 last_order_nonleaf = !last_order_nonleaf;
8900 memcpy ((char *) reg_alloc_order,
8901 (const char *) reg_alloc_orders[last_order_nonleaf],
8902 FIRST_PSEUDO_REGISTER * sizeof (int));
8903 }
8904 }
8905 \f
8906 /* Return 1 if REG and MEM are legitimate enough to allow the various
8907 MEM<-->REG splits to be run. */
8908
8909 int
8910 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8911 {
8912 /* Punt if we are here by mistake. */
8913 gcc_assert (reload_completed);
8914
8915 /* We must have an offsettable memory reference. */
8916 if (!offsettable_memref_p (mem))
8917 return 0;
8918
8919 /* If we have legitimate args for ldd/std, we do not want
8920 the split to happen. */
8921 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8922 return 0;
8923
8924 /* Success. */
8925 return 1;
8926 }
8927
8928 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8929
8930 void
8931 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8932 {
8933 rtx high_part = gen_highpart (mode, dest);
8934 rtx low_part = gen_lowpart (mode, dest);
8935 rtx word0 = adjust_address (src, mode, 0);
8936 rtx word1 = adjust_address (src, mode, 4);
8937
8938 if (reg_overlap_mentioned_p (high_part, word1))
8939 {
8940 emit_move_insn_1 (low_part, word1);
8941 emit_move_insn_1 (high_part, word0);
8942 }
8943 else
8944 {
8945 emit_move_insn_1 (high_part, word0);
8946 emit_move_insn_1 (low_part, word1);
8947 }
8948 }
8949
8950 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8951
8952 void
8953 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8954 {
8955 rtx word0 = adjust_address (dest, mode, 0);
8956 rtx word1 = adjust_address (dest, mode, 4);
8957 rtx high_part = gen_highpart (mode, src);
8958 rtx low_part = gen_lowpart (mode, src);
8959
8960 emit_move_insn_1 (word0, high_part);
8961 emit_move_insn_1 (word1, low_part);
8962 }
8963
8964 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8965
8966 int
8967 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8968 {
8969 /* Punt if we are here by mistake. */
8970 gcc_assert (reload_completed);
8971
8972 if (GET_CODE (reg1) == SUBREG)
8973 reg1 = SUBREG_REG (reg1);
8974 if (GET_CODE (reg1) != REG)
8975 return 0;
8976 const int regno1 = REGNO (reg1);
8977
8978 if (GET_CODE (reg2) == SUBREG)
8979 reg2 = SUBREG_REG (reg2);
8980 if (GET_CODE (reg2) != REG)
8981 return 0;
8982 const int regno2 = REGNO (reg2);
8983
8984 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8985 return 1;
8986
8987 if (TARGET_VIS3)
8988 {
8989 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8990 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8991 return 1;
8992 }
8993
8994 return 0;
8995 }
8996
8997 /* Split a REG <--> REG move into a pair of moves in MODE. */
8998
8999 void
9000 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9001 {
9002 rtx dest1 = gen_highpart (mode, dest);
9003 rtx dest2 = gen_lowpart (mode, dest);
9004 rtx src1 = gen_highpart (mode, src);
9005 rtx src2 = gen_lowpart (mode, src);
9006
9007 /* Now emit using the real source and destination we found, swapping
9008 the order if we detect overlap. */
9009 if (reg_overlap_mentioned_p (dest1, src2))
9010 {
9011 emit_move_insn_1 (dest2, src2);
9012 emit_move_insn_1 (dest1, src1);
9013 }
9014 else
9015 {
9016 emit_move_insn_1 (dest1, src1);
9017 emit_move_insn_1 (dest2, src2);
9018 }
9019 }
9020
9021 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9022 This makes them candidates for using ldd and std insns.
9023
9024 Note reg1 and reg2 *must* be hard registers. */
9025
9026 int
9027 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9028 {
9029 /* We might have been passed a SUBREG. */
9030 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9031 return 0;
9032
9033 if (REGNO (reg1) % 2 != 0)
9034 return 0;
9035
9036 /* Integer ldd is deprecated in SPARC V9 */
9037 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9038 return 0;
9039
9040 return (REGNO (reg1) == REGNO (reg2) - 1);
9041 }
9042
9043 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9044 an ldd or std insn.
9045
9046 This can only happen when addr1 and addr2, the addresses in mem1
9047 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9048 addr1 must also be aligned on a 64-bit boundary.
9049
9050 Also iff dependent_reg_rtx is not null it should not be used to
9051 compute the address for mem1, i.e. we cannot optimize a sequence
9052 like:
9053 ld [%o0], %o0
9054 ld [%o0 + 4], %o1
9055 to
9056 ldd [%o0], %o0
9057 nor:
9058 ld [%g3 + 4], %g3
9059 ld [%g3], %g2
9060 to
9061 ldd [%g3], %g2
9062
9063 But, note that the transformation from:
9064 ld [%g2 + 4], %g3
9065 ld [%g2], %g2
9066 to
9067 ldd [%g2], %g2
9068 is perfectly fine. Thus, the peephole2 patterns always pass us
9069 the destination register of the first load, never the second one.
9070
9071 For stores we don't have a similar problem, so dependent_reg_rtx is
9072 NULL_RTX. */
9073
9074 int
9075 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9076 {
9077 rtx addr1, addr2;
9078 unsigned int reg1;
9079 HOST_WIDE_INT offset1;
9080
9081 /* The mems cannot be volatile. */
9082 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9083 return 0;
9084
9085 /* MEM1 should be aligned on a 64-bit boundary. */
9086 if (MEM_ALIGN (mem1) < 64)
9087 return 0;
9088
9089 addr1 = XEXP (mem1, 0);
9090 addr2 = XEXP (mem2, 0);
9091
9092 /* Extract a register number and offset (if used) from the first addr. */
9093 if (GET_CODE (addr1) == PLUS)
9094 {
9095 /* If not a REG, return zero. */
9096 if (GET_CODE (XEXP (addr1, 0)) != REG)
9097 return 0;
9098 else
9099 {
9100 reg1 = REGNO (XEXP (addr1, 0));
9101 /* The offset must be constant! */
9102 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9103 return 0;
9104 offset1 = INTVAL (XEXP (addr1, 1));
9105 }
9106 }
9107 else if (GET_CODE (addr1) != REG)
9108 return 0;
9109 else
9110 {
9111 reg1 = REGNO (addr1);
9112 /* This was a simple (mem (reg)) expression. Offset is 0. */
9113 offset1 = 0;
9114 }
9115
9116 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9117 if (GET_CODE (addr2) != PLUS)
9118 return 0;
9119
9120 if (GET_CODE (XEXP (addr2, 0)) != REG
9121 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9122 return 0;
9123
9124 if (reg1 != REGNO (XEXP (addr2, 0)))
9125 return 0;
9126
9127 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9128 return 0;
9129
9130 /* The first offset must be evenly divisible by 8 to ensure the
9131 address is 64-bit aligned. */
9132 if (offset1 % 8 != 0)
9133 return 0;
9134
9135 /* The offset for the second addr must be 4 more than the first addr. */
9136 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9137 return 0;
9138
9139 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9140 instructions. */
9141 return 1;
9142 }
9143
9144 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9145
9146 rtx
9147 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9148 {
9149 rtx x = widen_memory_access (mem1, mode, 0);
9150 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9151 return x;
9152 }
9153
9154 /* Return 1 if reg is a pseudo, or is the first register in
9155 a hard register pair. This makes it suitable for use in
9156 ldd and std insns. */
9157
9158 int
9159 register_ok_for_ldd (rtx reg)
9160 {
9161 /* We might have been passed a SUBREG. */
9162 if (!REG_P (reg))
9163 return 0;
9164
9165 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9166 return (REGNO (reg) % 2 == 0);
9167
9168 return 1;
9169 }
9170
9171 /* Return 1 if OP, a MEM, has an address which is known to be
9172 aligned to an 8-byte boundary. */
9173
9174 int
9175 memory_ok_for_ldd (rtx op)
9176 {
9177 /* In 64-bit mode, we assume that the address is word-aligned. */
9178 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9179 return 0;
9180
9181 if (! can_create_pseudo_p ()
9182 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9183 return 0;
9184
9185 return 1;
9186 }
9187 \f
9188 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9189
9190 static bool
9191 sparc_print_operand_punct_valid_p (unsigned char code)
9192 {
9193 if (code == '#'
9194 || code == '*'
9195 || code == '('
9196 || code == ')'
9197 || code == '_'
9198 || code == '&')
9199 return true;
9200
9201 return false;
9202 }
9203
9204 /* Implement TARGET_PRINT_OPERAND.
9205 Print operand X (an rtx) in assembler syntax to file FILE.
9206 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9207 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9208
9209 static void
9210 sparc_print_operand (FILE *file, rtx x, int code)
9211 {
9212 const char *s;
9213
9214 switch (code)
9215 {
9216 case '#':
9217 /* Output an insn in a delay slot. */
9218 if (final_sequence)
9219 sparc_indent_opcode = 1;
9220 else
9221 fputs ("\n\t nop", file);
9222 return;
9223 case '*':
9224 /* Output an annul flag if there's nothing for the delay slot and we
9225 are optimizing. This is always used with '(' below.
9226 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9227 this is a dbx bug. So, we only do this when optimizing.
9228 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9229 Always emit a nop in case the next instruction is a branch. */
9230 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9231 fputs (",a", file);
9232 return;
9233 case '(':
9234 /* Output a 'nop' if there's nothing for the delay slot and we are
9235 not optimizing. This is always used with '*' above. */
9236 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9237 fputs ("\n\t nop", file);
9238 else if (final_sequence)
9239 sparc_indent_opcode = 1;
9240 return;
9241 case ')':
9242 /* Output the right displacement from the saved PC on function return.
9243 The caller may have placed an "unimp" insn immediately after the call
9244 so we have to account for it. This insn is used in the 32-bit ABI
9245 when calling a function that returns a non zero-sized structure. The
9246 64-bit ABI doesn't have it. Be careful to have this test be the same
9247 as that for the call. The exception is when sparc_std_struct_return
9248 is enabled, the psABI is followed exactly and the adjustment is made
9249 by the code in sparc_struct_value_rtx. The call emitted is the same
9250 when sparc_std_struct_return is enabled. */
9251 if (!TARGET_ARCH64
9252 && cfun->returns_struct
9253 && !sparc_std_struct_return
9254 && DECL_SIZE (DECL_RESULT (current_function_decl))
9255 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9256 == INTEGER_CST
9257 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9258 fputs ("12", file);
9259 else
9260 fputc ('8', file);
9261 return;
9262 case '_':
9263 /* Output the Embedded Medium/Anywhere code model base register. */
9264 fputs (EMBMEDANY_BASE_REG, file);
9265 return;
9266 case '&':
9267 /* Print some local dynamic TLS name. */
9268 if (const char *name = get_some_local_dynamic_name ())
9269 assemble_name (file, name);
9270 else
9271 output_operand_lossage ("'%%&' used without any "
9272 "local dynamic TLS references");
9273 return;
9274
9275 case 'Y':
9276 /* Adjust the operand to take into account a RESTORE operation. */
9277 if (GET_CODE (x) == CONST_INT)
9278 break;
9279 else if (GET_CODE (x) != REG)
9280 output_operand_lossage ("invalid %%Y operand");
9281 else if (REGNO (x) < 8)
9282 fputs (reg_names[REGNO (x)], file);
9283 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9284 fputs (reg_names[REGNO (x)-16], file);
9285 else
9286 output_operand_lossage ("invalid %%Y operand");
9287 return;
9288 case 'L':
9289 /* Print out the low order register name of a register pair. */
9290 if (WORDS_BIG_ENDIAN)
9291 fputs (reg_names[REGNO (x)+1], file);
9292 else
9293 fputs (reg_names[REGNO (x)], file);
9294 return;
9295 case 'H':
9296 /* Print out the high order register name of a register pair. */
9297 if (WORDS_BIG_ENDIAN)
9298 fputs (reg_names[REGNO (x)], file);
9299 else
9300 fputs (reg_names[REGNO (x)+1], file);
9301 return;
9302 case 'R':
9303 /* Print out the second register name of a register pair or quad.
9304 I.e., R (%o0) => %o1. */
9305 fputs (reg_names[REGNO (x)+1], file);
9306 return;
9307 case 'S':
9308 /* Print out the third register name of a register quad.
9309 I.e., S (%o0) => %o2. */
9310 fputs (reg_names[REGNO (x)+2], file);
9311 return;
9312 case 'T':
9313 /* Print out the fourth register name of a register quad.
9314 I.e., T (%o0) => %o3. */
9315 fputs (reg_names[REGNO (x)+3], file);
9316 return;
9317 case 'x':
9318 /* Print a condition code register. */
9319 if (REGNO (x) == SPARC_ICC_REG)
9320 {
9321 switch (GET_MODE (x))
9322 {
9323 case E_CCmode:
9324 case E_CCNZmode:
9325 case E_CCCmode:
9326 case E_CCVmode:
9327 s = "%icc";
9328 break;
9329 case E_CCXmode:
9330 case E_CCXNZmode:
9331 case E_CCXCmode:
9332 case E_CCXVmode:
9333 s = "%xcc";
9334 break;
9335 default:
9336 gcc_unreachable ();
9337 }
9338 fputs (s, file);
9339 }
9340 else
9341 /* %fccN register */
9342 fputs (reg_names[REGNO (x)], file);
9343 return;
9344 case 'm':
9345 /* Print the operand's address only. */
9346 output_address (GET_MODE (x), XEXP (x, 0));
9347 return;
9348 case 'r':
9349 /* In this case we need a register. Use %g0 if the
9350 operand is const0_rtx. */
9351 if (x == const0_rtx
9352 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9353 {
9354 fputs ("%g0", file);
9355 return;
9356 }
9357 else
9358 break;
9359
9360 case 'A':
9361 switch (GET_CODE (x))
9362 {
9363 case IOR:
9364 s = "or";
9365 break;
9366 case AND:
9367 s = "and";
9368 break;
9369 case XOR:
9370 s = "xor";
9371 break;
9372 default:
9373 output_operand_lossage ("invalid %%A operand");
9374 s = "";
9375 break;
9376 }
9377 fputs (s, file);
9378 return;
9379
9380 case 'B':
9381 switch (GET_CODE (x))
9382 {
9383 case IOR:
9384 s = "orn";
9385 break;
9386 case AND:
9387 s = "andn";
9388 break;
9389 case XOR:
9390 s = "xnor";
9391 break;
9392 default:
9393 output_operand_lossage ("invalid %%B operand");
9394 s = "";
9395 break;
9396 }
9397 fputs (s, file);
9398 return;
9399
9400 /* This is used by the conditional move instructions. */
9401 case 'C':
9402 {
9403 machine_mode mode = GET_MODE (XEXP (x, 0));
9404 switch (GET_CODE (x))
9405 {
9406 case NE:
9407 if (mode == CCVmode || mode == CCXVmode)
9408 s = "vs";
9409 else
9410 s = "ne";
9411 break;
9412 case EQ:
9413 if (mode == CCVmode || mode == CCXVmode)
9414 s = "vc";
9415 else
9416 s = "e";
9417 break;
9418 case GE:
9419 if (mode == CCNZmode || mode == CCXNZmode)
9420 s = "pos";
9421 else
9422 s = "ge";
9423 break;
9424 case GT:
9425 s = "g";
9426 break;
9427 case LE:
9428 s = "le";
9429 break;
9430 case LT:
9431 if (mode == CCNZmode || mode == CCXNZmode)
9432 s = "neg";
9433 else
9434 s = "l";
9435 break;
9436 case GEU:
9437 s = "geu";
9438 break;
9439 case GTU:
9440 s = "gu";
9441 break;
9442 case LEU:
9443 s = "leu";
9444 break;
9445 case LTU:
9446 s = "lu";
9447 break;
9448 case LTGT:
9449 s = "lg";
9450 break;
9451 case UNORDERED:
9452 s = "u";
9453 break;
9454 case ORDERED:
9455 s = "o";
9456 break;
9457 case UNLT:
9458 s = "ul";
9459 break;
9460 case UNLE:
9461 s = "ule";
9462 break;
9463 case UNGT:
9464 s = "ug";
9465 break;
9466 case UNGE:
9467 s = "uge"
9468 ; break;
9469 case UNEQ:
9470 s = "ue";
9471 break;
9472 default:
9473 output_operand_lossage ("invalid %%C operand");
9474 s = "";
9475 break;
9476 }
9477 fputs (s, file);
9478 return;
9479 }
9480
9481 /* This are used by the movr instruction pattern. */
9482 case 'D':
9483 {
9484 switch (GET_CODE (x))
9485 {
9486 case NE:
9487 s = "ne";
9488 break;
9489 case EQ:
9490 s = "e";
9491 break;
9492 case GE:
9493 s = "gez";
9494 break;
9495 case LT:
9496 s = "lz";
9497 break;
9498 case LE:
9499 s = "lez";
9500 break;
9501 case GT:
9502 s = "gz";
9503 break;
9504 default:
9505 output_operand_lossage ("invalid %%D operand");
9506 s = "";
9507 break;
9508 }
9509 fputs (s, file);
9510 return;
9511 }
9512
9513 case 'b':
9514 {
9515 /* Print a sign-extended character. */
9516 int i = trunc_int_for_mode (INTVAL (x), QImode);
9517 fprintf (file, "%d", i);
9518 return;
9519 }
9520
9521 case 'f':
9522 /* Operand must be a MEM; write its address. */
9523 if (GET_CODE (x) != MEM)
9524 output_operand_lossage ("invalid %%f operand");
9525 output_address (GET_MODE (x), XEXP (x, 0));
9526 return;
9527
9528 case 's':
9529 {
9530 /* Print a sign-extended 32-bit value. */
9531 HOST_WIDE_INT i;
9532 if (GET_CODE(x) == CONST_INT)
9533 i = INTVAL (x);
9534 else
9535 {
9536 output_operand_lossage ("invalid %%s operand");
9537 return;
9538 }
9539 i = trunc_int_for_mode (i, SImode);
9540 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9541 return;
9542 }
9543
9544 case 0:
9545 /* Do nothing special. */
9546 break;
9547
9548 default:
9549 /* Undocumented flag. */
9550 output_operand_lossage ("invalid operand output code");
9551 }
9552
9553 if (GET_CODE (x) == REG)
9554 fputs (reg_names[REGNO (x)], file);
9555 else if (GET_CODE (x) == MEM)
9556 {
9557 fputc ('[', file);
9558 /* Poor Sun assembler doesn't understand absolute addressing. */
9559 if (CONSTANT_P (XEXP (x, 0)))
9560 fputs ("%g0+", file);
9561 output_address (GET_MODE (x), XEXP (x, 0));
9562 fputc (']', file);
9563 }
9564 else if (GET_CODE (x) == HIGH)
9565 {
9566 fputs ("%hi(", file);
9567 output_addr_const (file, XEXP (x, 0));
9568 fputc (')', file);
9569 }
9570 else if (GET_CODE (x) == LO_SUM)
9571 {
9572 sparc_print_operand (file, XEXP (x, 0), 0);
9573 if (TARGET_CM_MEDMID)
9574 fputs ("+%l44(", file);
9575 else
9576 fputs ("+%lo(", file);
9577 output_addr_const (file, XEXP (x, 1));
9578 fputc (')', file);
9579 }
9580 else if (GET_CODE (x) == CONST_DOUBLE)
9581 output_operand_lossage ("floating-point constant not a valid immediate operand");
9582 else
9583 output_addr_const (file, x);
9584 }
9585
9586 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9587
9588 static void
9589 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9590 {
9591 register rtx base, index = 0;
9592 int offset = 0;
9593 register rtx addr = x;
9594
9595 if (REG_P (addr))
9596 fputs (reg_names[REGNO (addr)], file);
9597 else if (GET_CODE (addr) == PLUS)
9598 {
9599 if (CONST_INT_P (XEXP (addr, 0)))
9600 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9601 else if (CONST_INT_P (XEXP (addr, 1)))
9602 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9603 else
9604 base = XEXP (addr, 0), index = XEXP (addr, 1);
9605 if (GET_CODE (base) == LO_SUM)
9606 {
9607 gcc_assert (USE_AS_OFFSETABLE_LO10
9608 && TARGET_ARCH64
9609 && ! TARGET_CM_MEDMID);
9610 output_operand (XEXP (base, 0), 0);
9611 fputs ("+%lo(", file);
9612 output_address (VOIDmode, XEXP (base, 1));
9613 fprintf (file, ")+%d", offset);
9614 }
9615 else
9616 {
9617 fputs (reg_names[REGNO (base)], file);
9618 if (index == 0)
9619 fprintf (file, "%+d", offset);
9620 else if (REG_P (index))
9621 fprintf (file, "+%s", reg_names[REGNO (index)]);
9622 else if (GET_CODE (index) == SYMBOL_REF
9623 || GET_CODE (index) == LABEL_REF
9624 || GET_CODE (index) == CONST)
9625 fputc ('+', file), output_addr_const (file, index);
9626 else gcc_unreachable ();
9627 }
9628 }
9629 else if (GET_CODE (addr) == MINUS
9630 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9631 {
9632 output_addr_const (file, XEXP (addr, 0));
9633 fputs ("-(", file);
9634 output_addr_const (file, XEXP (addr, 1));
9635 fputs ("-.)", file);
9636 }
9637 else if (GET_CODE (addr) == LO_SUM)
9638 {
9639 output_operand (XEXP (addr, 0), 0);
9640 if (TARGET_CM_MEDMID)
9641 fputs ("+%l44(", file);
9642 else
9643 fputs ("+%lo(", file);
9644 output_address (VOIDmode, XEXP (addr, 1));
9645 fputc (')', file);
9646 }
9647 else if (flag_pic
9648 && GET_CODE (addr) == CONST
9649 && GET_CODE (XEXP (addr, 0)) == MINUS
9650 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9651 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9652 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9653 {
9654 addr = XEXP (addr, 0);
9655 output_addr_const (file, XEXP (addr, 0));
9656 /* Group the args of the second CONST in parenthesis. */
9657 fputs ("-(", file);
9658 /* Skip past the second CONST--it does nothing for us. */
9659 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9660 /* Close the parenthesis. */
9661 fputc (')', file);
9662 }
9663 else
9664 {
9665 output_addr_const (file, addr);
9666 }
9667 }
9668 \f
9669 /* Target hook for assembling integer objects. The sparc version has
9670 special handling for aligned DI-mode objects. */
9671
9672 static bool
9673 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9674 {
9675 /* ??? We only output .xword's for symbols and only then in environments
9676 where the assembler can handle them. */
9677 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9678 {
9679 if (TARGET_V9)
9680 {
9681 assemble_integer_with_op ("\t.xword\t", x);
9682 return true;
9683 }
9684 else
9685 {
9686 assemble_aligned_integer (4, const0_rtx);
9687 assemble_aligned_integer (4, x);
9688 return true;
9689 }
9690 }
9691 return default_assemble_integer (x, size, aligned_p);
9692 }
9693 \f
9694 /* Return the value of a code used in the .proc pseudo-op that says
9695 what kind of result this function returns. For non-C types, we pick
9696 the closest C type. */
9697
9698 #ifndef SHORT_TYPE_SIZE
9699 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9700 #endif
9701
9702 #ifndef INT_TYPE_SIZE
9703 #define INT_TYPE_SIZE BITS_PER_WORD
9704 #endif
9705
9706 #ifndef LONG_TYPE_SIZE
9707 #define LONG_TYPE_SIZE BITS_PER_WORD
9708 #endif
9709
9710 #ifndef LONG_LONG_TYPE_SIZE
9711 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9712 #endif
9713
9714 #ifndef FLOAT_TYPE_SIZE
9715 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9716 #endif
9717
9718 #ifndef DOUBLE_TYPE_SIZE
9719 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9720 #endif
9721
9722 #ifndef LONG_DOUBLE_TYPE_SIZE
9723 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9724 #endif
9725
9726 unsigned long
9727 sparc_type_code (register tree type)
9728 {
9729 register unsigned long qualifiers = 0;
9730 register unsigned shift;
9731
9732 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9733 setting more, since some assemblers will give an error for this. Also,
9734 we must be careful to avoid shifts of 32 bits or more to avoid getting
9735 unpredictable results. */
9736
9737 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9738 {
9739 switch (TREE_CODE (type))
9740 {
9741 case ERROR_MARK:
9742 return qualifiers;
9743
9744 case ARRAY_TYPE:
9745 qualifiers |= (3 << shift);
9746 break;
9747
9748 case FUNCTION_TYPE:
9749 case METHOD_TYPE:
9750 qualifiers |= (2 << shift);
9751 break;
9752
9753 case POINTER_TYPE:
9754 case REFERENCE_TYPE:
9755 case OFFSET_TYPE:
9756 qualifiers |= (1 << shift);
9757 break;
9758
9759 case RECORD_TYPE:
9760 return (qualifiers | 8);
9761
9762 case UNION_TYPE:
9763 case QUAL_UNION_TYPE:
9764 return (qualifiers | 9);
9765
9766 case ENUMERAL_TYPE:
9767 return (qualifiers | 10);
9768
9769 case VOID_TYPE:
9770 return (qualifiers | 16);
9771
9772 case INTEGER_TYPE:
9773 /* If this is a range type, consider it to be the underlying
9774 type. */
9775 if (TREE_TYPE (type) != 0)
9776 break;
9777
9778 /* Carefully distinguish all the standard types of C,
9779 without messing up if the language is not C. We do this by
9780 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9781 look at both the names and the above fields, but that's redundant.
9782 Any type whose size is between two C types will be considered
9783 to be the wider of the two types. Also, we do not have a
9784 special code to use for "long long", so anything wider than
9785 long is treated the same. Note that we can't distinguish
9786 between "int" and "long" in this code if they are the same
9787 size, but that's fine, since neither can the assembler. */
9788
9789 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9790 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9791
9792 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9793 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9794
9795 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9796 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9797
9798 else
9799 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9800
9801 case REAL_TYPE:
9802 /* If this is a range type, consider it to be the underlying
9803 type. */
9804 if (TREE_TYPE (type) != 0)
9805 break;
9806
9807 /* Carefully distinguish all the standard types of C,
9808 without messing up if the language is not C. */
9809
9810 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9811 return (qualifiers | 6);
9812
9813 else
9814 return (qualifiers | 7);
9815
9816 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9817 /* ??? We need to distinguish between double and float complex types,
9818 but I don't know how yet because I can't reach this code from
9819 existing front-ends. */
9820 return (qualifiers | 7); /* Who knows? */
9821
9822 case VECTOR_TYPE:
9823 case BOOLEAN_TYPE: /* Boolean truth value type. */
9824 case LANG_TYPE:
9825 case NULLPTR_TYPE:
9826 return qualifiers;
9827
9828 default:
9829 gcc_unreachable (); /* Not a type! */
9830 }
9831 }
9832
9833 return qualifiers;
9834 }
9835 \f
9836 /* Nested function support. */
9837
9838 /* Emit RTL insns to initialize the variable parts of a trampoline.
9839 FNADDR is an RTX for the address of the function's pure code.
9840 CXT is an RTX for the static chain value for the function.
9841
9842 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9843 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9844 (to store insns). This is a bit excessive. Perhaps a different
9845 mechanism would be better here.
9846
9847 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9848
9849 static void
9850 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9851 {
9852 /* SPARC 32-bit trampoline:
9853
9854 sethi %hi(fn), %g1
9855 sethi %hi(static), %g2
9856 jmp %g1+%lo(fn)
9857 or %g2, %lo(static), %g2
9858
9859 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9860 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9861 */
9862
9863 emit_move_insn
9864 (adjust_address (m_tramp, SImode, 0),
9865 expand_binop (SImode, ior_optab,
9866 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9867 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9868 NULL_RTX, 1, OPTAB_DIRECT));
9869
9870 emit_move_insn
9871 (adjust_address (m_tramp, SImode, 4),
9872 expand_binop (SImode, ior_optab,
9873 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9874 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9875 NULL_RTX, 1, OPTAB_DIRECT));
9876
9877 emit_move_insn
9878 (adjust_address (m_tramp, SImode, 8),
9879 expand_binop (SImode, ior_optab,
9880 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9881 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9882 NULL_RTX, 1, OPTAB_DIRECT));
9883
9884 emit_move_insn
9885 (adjust_address (m_tramp, SImode, 12),
9886 expand_binop (SImode, ior_optab,
9887 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9888 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9889 NULL_RTX, 1, OPTAB_DIRECT));
9890
9891 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9892 aligned on a 16 byte boundary so one flush clears it all. */
9893 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9894 if (sparc_cpu != PROCESSOR_ULTRASPARC
9895 && sparc_cpu != PROCESSOR_ULTRASPARC3
9896 && sparc_cpu != PROCESSOR_NIAGARA
9897 && sparc_cpu != PROCESSOR_NIAGARA2
9898 && sparc_cpu != PROCESSOR_NIAGARA3
9899 && sparc_cpu != PROCESSOR_NIAGARA4
9900 && sparc_cpu != PROCESSOR_NIAGARA7
9901 && sparc_cpu != PROCESSOR_M8)
9902 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9903
9904 /* Call __enable_execute_stack after writing onto the stack to make sure
9905 the stack address is accessible. */
9906 #ifdef HAVE_ENABLE_EXECUTE_STACK
9907 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9908 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9909 #endif
9910
9911 }
9912
9913 /* The 64-bit version is simpler because it makes more sense to load the
9914 values as "immediate" data out of the trampoline. It's also easier since
9915 we can read the PC without clobbering a register. */
9916
9917 static void
9918 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9919 {
9920 /* SPARC 64-bit trampoline:
9921
9922 rd %pc, %g1
9923 ldx [%g1+24], %g5
9924 jmp %g5
9925 ldx [%g1+16], %g5
9926 +16 bytes data
9927 */
9928
9929 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9930 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9931 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9932 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9933 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9934 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9935 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9936 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9937 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9938 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9939 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9940
9941 if (sparc_cpu != PROCESSOR_ULTRASPARC
9942 && sparc_cpu != PROCESSOR_ULTRASPARC3
9943 && sparc_cpu != PROCESSOR_NIAGARA
9944 && sparc_cpu != PROCESSOR_NIAGARA2
9945 && sparc_cpu != PROCESSOR_NIAGARA3
9946 && sparc_cpu != PROCESSOR_NIAGARA4
9947 && sparc_cpu != PROCESSOR_NIAGARA7
9948 && sparc_cpu != PROCESSOR_M8)
9949 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9950
9951 /* Call __enable_execute_stack after writing onto the stack to make sure
9952 the stack address is accessible. */
9953 #ifdef HAVE_ENABLE_EXECUTE_STACK
9954 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9955 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9956 #endif
9957 }
9958
9959 /* Worker for TARGET_TRAMPOLINE_INIT. */
9960
9961 static void
9962 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9963 {
9964 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9965 cxt = force_reg (Pmode, cxt);
9966 if (TARGET_ARCH64)
9967 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9968 else
9969 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9970 }
9971 \f
9972 /* Adjust the cost of a scheduling dependency. Return the new cost of
9973 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9974
9975 static int
9976 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9977 int cost)
9978 {
9979 enum attr_type insn_type;
9980
9981 if (recog_memoized (insn) < 0)
9982 return cost;
9983
9984 insn_type = get_attr_type (insn);
9985
9986 if (dep_type == 0)
9987 {
9988 /* Data dependency; DEP_INSN writes a register that INSN reads some
9989 cycles later. */
9990
9991 /* if a load, then the dependence must be on the memory address;
9992 add an extra "cycle". Note that the cost could be two cycles
9993 if the reg was written late in an instruction group; we ca not tell
9994 here. */
9995 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9996 return cost + 3;
9997
9998 /* Get the delay only if the address of the store is the dependence. */
9999 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10000 {
10001 rtx pat = PATTERN(insn);
10002 rtx dep_pat = PATTERN (dep_insn);
10003
10004 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10005 return cost; /* This should not happen! */
10006
10007 /* The dependency between the two instructions was on the data that
10008 is being stored. Assume that this implies that the address of the
10009 store is not dependent. */
10010 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10011 return cost;
10012
10013 return cost + 3; /* An approximation. */
10014 }
10015
10016 /* A shift instruction cannot receive its data from an instruction
10017 in the same cycle; add a one cycle penalty. */
10018 if (insn_type == TYPE_SHIFT)
10019 return cost + 3; /* Split before cascade into shift. */
10020 }
10021 else
10022 {
10023 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10024 INSN writes some cycles later. */
10025
10026 /* These are only significant for the fpu unit; writing a fp reg before
10027 the fpu has finished with it stalls the processor. */
10028
10029 /* Reusing an integer register causes no problems. */
10030 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10031 return 0;
10032 }
10033
10034 return cost;
10035 }
10036
10037 static int
10038 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10039 int cost)
10040 {
10041 enum attr_type insn_type, dep_type;
10042 rtx pat = PATTERN(insn);
10043 rtx dep_pat = PATTERN (dep_insn);
10044
10045 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10046 return cost;
10047
10048 insn_type = get_attr_type (insn);
10049 dep_type = get_attr_type (dep_insn);
10050
10051 switch (dtype)
10052 {
10053 case 0:
10054 /* Data dependency; DEP_INSN writes a register that INSN reads some
10055 cycles later. */
10056
10057 switch (insn_type)
10058 {
10059 case TYPE_STORE:
10060 case TYPE_FPSTORE:
10061 /* Get the delay iff the address of the store is the dependence. */
10062 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10063 return cost;
10064
10065 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10066 return cost;
10067 return cost + 3;
10068
10069 case TYPE_LOAD:
10070 case TYPE_SLOAD:
10071 case TYPE_FPLOAD:
10072 /* If a load, then the dependence must be on the memory address. If
10073 the addresses aren't equal, then it might be a false dependency */
10074 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10075 {
10076 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10077 || GET_CODE (SET_DEST (dep_pat)) != MEM
10078 || GET_CODE (SET_SRC (pat)) != MEM
10079 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10080 XEXP (SET_SRC (pat), 0)))
10081 return cost + 2;
10082
10083 return cost + 8;
10084 }
10085 break;
10086
10087 case TYPE_BRANCH:
10088 /* Compare to branch latency is 0. There is no benefit from
10089 separating compare and branch. */
10090 if (dep_type == TYPE_COMPARE)
10091 return 0;
10092 /* Floating point compare to branch latency is less than
10093 compare to conditional move. */
10094 if (dep_type == TYPE_FPCMP)
10095 return cost - 1;
10096 break;
10097 default:
10098 break;
10099 }
10100 break;
10101
10102 case REG_DEP_ANTI:
10103 /* Anti-dependencies only penalize the fpu unit. */
10104 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10105 return 0;
10106 break;
10107
10108 default:
10109 break;
10110 }
10111
10112 return cost;
10113 }
10114
10115 static int
10116 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10117 unsigned int)
10118 {
10119 switch (sparc_cpu)
10120 {
10121 case PROCESSOR_SUPERSPARC:
10122 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10123 break;
10124 case PROCESSOR_HYPERSPARC:
10125 case PROCESSOR_SPARCLITE86X:
10126 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10127 break;
10128 default:
10129 break;
10130 }
10131 return cost;
10132 }
10133
10134 static void
10135 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10136 int sched_verbose ATTRIBUTE_UNUSED,
10137 int max_ready ATTRIBUTE_UNUSED)
10138 {}
10139
10140 static int
10141 sparc_use_sched_lookahead (void)
10142 {
10143 if (sparc_cpu == PROCESSOR_NIAGARA
10144 || sparc_cpu == PROCESSOR_NIAGARA2
10145 || sparc_cpu == PROCESSOR_NIAGARA3)
10146 return 0;
10147 if (sparc_cpu == PROCESSOR_NIAGARA4
10148 || sparc_cpu == PROCESSOR_NIAGARA7
10149 || sparc_cpu == PROCESSOR_M8)
10150 return 2;
10151 if (sparc_cpu == PROCESSOR_ULTRASPARC
10152 || sparc_cpu == PROCESSOR_ULTRASPARC3)
10153 return 4;
10154 if ((1 << sparc_cpu) &
10155 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
10156 (1 << PROCESSOR_SPARCLITE86X)))
10157 return 3;
10158 return 0;
10159 }
10160
10161 static int
10162 sparc_issue_rate (void)
10163 {
10164 switch (sparc_cpu)
10165 {
10166 case PROCESSOR_NIAGARA:
10167 case PROCESSOR_NIAGARA2:
10168 case PROCESSOR_NIAGARA3:
10169 default:
10170 return 1;
10171 case PROCESSOR_NIAGARA4:
10172 case PROCESSOR_NIAGARA7:
10173 case PROCESSOR_V9:
10174 /* Assume V9 processors are capable of at least dual-issue. */
10175 return 2;
10176 case PROCESSOR_SUPERSPARC:
10177 return 3;
10178 case PROCESSOR_HYPERSPARC:
10179 case PROCESSOR_SPARCLITE86X:
10180 return 2;
10181 case PROCESSOR_ULTRASPARC:
10182 case PROCESSOR_ULTRASPARC3:
10183 case PROCESSOR_M8:
10184 return 4;
10185 }
10186 }
10187
10188 static int
10189 set_extends (rtx_insn *insn)
10190 {
10191 register rtx pat = PATTERN (insn);
10192
10193 switch (GET_CODE (SET_SRC (pat)))
10194 {
10195 /* Load and some shift instructions zero extend. */
10196 case MEM:
10197 case ZERO_EXTEND:
10198 /* sethi clears the high bits */
10199 case HIGH:
10200 /* LO_SUM is used with sethi. sethi cleared the high
10201 bits and the values used with lo_sum are positive */
10202 case LO_SUM:
10203 /* Store flag stores 0 or 1 */
10204 case LT: case LTU:
10205 case GT: case GTU:
10206 case LE: case LEU:
10207 case GE: case GEU:
10208 case EQ:
10209 case NE:
10210 return 1;
10211 case AND:
10212 {
10213 rtx op0 = XEXP (SET_SRC (pat), 0);
10214 rtx op1 = XEXP (SET_SRC (pat), 1);
10215 if (GET_CODE (op1) == CONST_INT)
10216 return INTVAL (op1) >= 0;
10217 if (GET_CODE (op0) != REG)
10218 return 0;
10219 if (sparc_check_64 (op0, insn) == 1)
10220 return 1;
10221 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10222 }
10223 case IOR:
10224 case XOR:
10225 {
10226 rtx op0 = XEXP (SET_SRC (pat), 0);
10227 rtx op1 = XEXP (SET_SRC (pat), 1);
10228 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10229 return 0;
10230 if (GET_CODE (op1) == CONST_INT)
10231 return INTVAL (op1) >= 0;
10232 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10233 }
10234 case LSHIFTRT:
10235 return GET_MODE (SET_SRC (pat)) == SImode;
10236 /* Positive integers leave the high bits zero. */
10237 case CONST_INT:
10238 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10239 case ASHIFTRT:
10240 case SIGN_EXTEND:
10241 return - (GET_MODE (SET_SRC (pat)) == SImode);
10242 case REG:
10243 return sparc_check_64 (SET_SRC (pat), insn);
10244 default:
10245 return 0;
10246 }
10247 }
10248
10249 /* We _ought_ to have only one kind per function, but... */
10250 static GTY(()) rtx sparc_addr_diff_list;
10251 static GTY(()) rtx sparc_addr_list;
10252
10253 void
10254 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10255 {
10256 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10257 if (diff)
10258 sparc_addr_diff_list
10259 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10260 else
10261 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10262 }
10263
10264 static void
10265 sparc_output_addr_vec (rtx vec)
10266 {
10267 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10268 int idx, vlen = XVECLEN (body, 0);
10269
10270 #ifdef ASM_OUTPUT_ADDR_VEC_START
10271 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10272 #endif
10273
10274 #ifdef ASM_OUTPUT_CASE_LABEL
10275 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10276 NEXT_INSN (lab));
10277 #else
10278 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10279 #endif
10280
10281 for (idx = 0; idx < vlen; idx++)
10282 {
10283 ASM_OUTPUT_ADDR_VEC_ELT
10284 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10285 }
10286
10287 #ifdef ASM_OUTPUT_ADDR_VEC_END
10288 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10289 #endif
10290 }
10291
10292 static void
10293 sparc_output_addr_diff_vec (rtx vec)
10294 {
10295 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10296 rtx base = XEXP (XEXP (body, 0), 0);
10297 int idx, vlen = XVECLEN (body, 1);
10298
10299 #ifdef ASM_OUTPUT_ADDR_VEC_START
10300 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10301 #endif
10302
10303 #ifdef ASM_OUTPUT_CASE_LABEL
10304 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10305 NEXT_INSN (lab));
10306 #else
10307 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10308 #endif
10309
10310 for (idx = 0; idx < vlen; idx++)
10311 {
10312 ASM_OUTPUT_ADDR_DIFF_ELT
10313 (asm_out_file,
10314 body,
10315 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10316 CODE_LABEL_NUMBER (base));
10317 }
10318
10319 #ifdef ASM_OUTPUT_ADDR_VEC_END
10320 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10321 #endif
10322 }
10323
10324 static void
10325 sparc_output_deferred_case_vectors (void)
10326 {
10327 rtx t;
10328 int align;
10329
10330 if (sparc_addr_list == NULL_RTX
10331 && sparc_addr_diff_list == NULL_RTX)
10332 return;
10333
10334 /* Align to cache line in the function's code section. */
10335 switch_to_section (current_function_section ());
10336
10337 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10338 if (align > 0)
10339 ASM_OUTPUT_ALIGN (asm_out_file, align);
10340
10341 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10342 sparc_output_addr_vec (XEXP (t, 0));
10343 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10344 sparc_output_addr_diff_vec (XEXP (t, 0));
10345
10346 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10347 }
10348
10349 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10350 unknown. Return 1 if the high bits are zero, -1 if the register is
10351 sign extended. */
10352 int
10353 sparc_check_64 (rtx x, rtx_insn *insn)
10354 {
10355 /* If a register is set only once it is safe to ignore insns this
10356 code does not know how to handle. The loop will either recognize
10357 the single set and return the correct value or fail to recognize
10358 it and return 0. */
10359 int set_once = 0;
10360 rtx y = x;
10361
10362 gcc_assert (GET_CODE (x) == REG);
10363
10364 if (GET_MODE (x) == DImode)
10365 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10366
10367 if (flag_expensive_optimizations
10368 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10369 set_once = 1;
10370
10371 if (insn == 0)
10372 {
10373 if (set_once)
10374 insn = get_last_insn_anywhere ();
10375 else
10376 return 0;
10377 }
10378
10379 while ((insn = PREV_INSN (insn)))
10380 {
10381 switch (GET_CODE (insn))
10382 {
10383 case JUMP_INSN:
10384 case NOTE:
10385 break;
10386 case CODE_LABEL:
10387 case CALL_INSN:
10388 default:
10389 if (! set_once)
10390 return 0;
10391 break;
10392 case INSN:
10393 {
10394 rtx pat = PATTERN (insn);
10395 if (GET_CODE (pat) != SET)
10396 return 0;
10397 if (rtx_equal_p (x, SET_DEST (pat)))
10398 return set_extends (insn);
10399 if (y && rtx_equal_p (y, SET_DEST (pat)))
10400 return set_extends (insn);
10401 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10402 return 0;
10403 }
10404 }
10405 }
10406 return 0;
10407 }
10408
10409 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10410 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10411
10412 const char *
10413 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10414 {
10415 static char asm_code[60];
10416
10417 /* The scratch register is only required when the destination
10418 register is not a 64-bit global or out register. */
10419 if (which_alternative != 2)
10420 operands[3] = operands[0];
10421
10422 /* We can only shift by constants <= 63. */
10423 if (GET_CODE (operands[2]) == CONST_INT)
10424 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10425
10426 if (GET_CODE (operands[1]) == CONST_INT)
10427 {
10428 output_asm_insn ("mov\t%1, %3", operands);
10429 }
10430 else
10431 {
10432 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10433 if (sparc_check_64 (operands[1], insn) <= 0)
10434 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10435 output_asm_insn ("or\t%L1, %3, %3", operands);
10436 }
10437
10438 strcpy (asm_code, opcode);
10439
10440 if (which_alternative != 2)
10441 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10442 else
10443 return
10444 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10445 }
10446 \f
10447 /* Output rtl to increment the profiler label LABELNO
10448 for profiling a function entry. */
10449
10450 void
10451 sparc_profile_hook (int labelno)
10452 {
10453 char buf[32];
10454 rtx lab, fun;
10455
10456 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10457 if (NO_PROFILE_COUNTERS)
10458 {
10459 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10460 }
10461 else
10462 {
10463 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10464 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10465 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10466 }
10467 }
10468 \f
10469 #ifdef TARGET_SOLARIS
10470 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10471
10472 static void
10473 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10474 tree decl ATTRIBUTE_UNUSED)
10475 {
10476 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10477 {
10478 solaris_elf_asm_comdat_section (name, flags, decl);
10479 return;
10480 }
10481
10482 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10483
10484 if (!(flags & SECTION_DEBUG))
10485 fputs (",#alloc", asm_out_file);
10486 if (flags & SECTION_WRITE)
10487 fputs (",#write", asm_out_file);
10488 if (flags & SECTION_TLS)
10489 fputs (",#tls", asm_out_file);
10490 if (flags & SECTION_CODE)
10491 fputs (",#execinstr", asm_out_file);
10492
10493 if (flags & SECTION_NOTYPE)
10494 ;
10495 else if (flags & SECTION_BSS)
10496 fputs (",#nobits", asm_out_file);
10497 else
10498 fputs (",#progbits", asm_out_file);
10499
10500 fputc ('\n', asm_out_file);
10501 }
10502 #endif /* TARGET_SOLARIS */
10503
10504 /* We do not allow indirect calls to be optimized into sibling calls.
10505
10506 We cannot use sibling calls when delayed branches are disabled
10507 because they will likely require the call delay slot to be filled.
10508
10509 Also, on SPARC 32-bit we cannot emit a sibling call when the
10510 current function returns a structure. This is because the "unimp
10511 after call" convention would cause the callee to return to the
10512 wrong place. The generic code already disallows cases where the
10513 function being called returns a structure.
10514
10515 It may seem strange how this last case could occur. Usually there
10516 is code after the call which jumps to epilogue code which dumps the
10517 return value into the struct return area. That ought to invalidate
10518 the sibling call right? Well, in the C++ case we can end up passing
10519 the pointer to the struct return area to a constructor (which returns
10520 void) and then nothing else happens. Such a sibling call would look
10521 valid without the added check here.
10522
10523 VxWorks PIC PLT entries require the global pointer to be initialized
10524 on entry. We therefore can't emit sibling calls to them. */
10525 static bool
10526 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10527 {
10528 return (decl
10529 && flag_delayed_branch
10530 && (TARGET_ARCH64 || ! cfun->returns_struct)
10531 && !(TARGET_VXWORKS_RTP
10532 && flag_pic
10533 && !targetm.binds_local_p (decl)));
10534 }
10535 \f
10536 /* libfunc renaming. */
10537
10538 static void
10539 sparc_init_libfuncs (void)
10540 {
10541 if (TARGET_ARCH32)
10542 {
10543 /* Use the subroutines that Sun's library provides for integer
10544 multiply and divide. The `*' prevents an underscore from
10545 being prepended by the compiler. .umul is a little faster
10546 than .mul. */
10547 set_optab_libfunc (smul_optab, SImode, "*.umul");
10548 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10549 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10550 set_optab_libfunc (smod_optab, SImode, "*.rem");
10551 set_optab_libfunc (umod_optab, SImode, "*.urem");
10552
10553 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10554 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10555 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10556 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10557 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10558 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10559
10560 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10561 is because with soft-float, the SFmode and DFmode sqrt
10562 instructions will be absent, and the compiler will notice and
10563 try to use the TFmode sqrt instruction for calls to the
10564 builtin function sqrt, but this fails. */
10565 if (TARGET_FPU)
10566 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10567
10568 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10569 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10570 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10571 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10572 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10573 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10574
10575 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10576 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10577 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10578 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10579
10580 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10581 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10582 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10583 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10584
10585 if (DITF_CONVERSION_LIBFUNCS)
10586 {
10587 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10588 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10589 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10590 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10591 }
10592
10593 if (SUN_CONVERSION_LIBFUNCS)
10594 {
10595 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10596 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10597 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10598 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10599 }
10600 }
10601 if (TARGET_ARCH64)
10602 {
10603 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10604 do not exist in the library. Make sure the compiler does not
10605 emit calls to them by accident. (It should always use the
10606 hardware instructions.) */
10607 set_optab_libfunc (smul_optab, SImode, 0);
10608 set_optab_libfunc (sdiv_optab, SImode, 0);
10609 set_optab_libfunc (udiv_optab, SImode, 0);
10610 set_optab_libfunc (smod_optab, SImode, 0);
10611 set_optab_libfunc (umod_optab, SImode, 0);
10612
10613 if (SUN_INTEGER_MULTIPLY_64)
10614 {
10615 set_optab_libfunc (smul_optab, DImode, "__mul64");
10616 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10617 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10618 set_optab_libfunc (smod_optab, DImode, "__rem64");
10619 set_optab_libfunc (umod_optab, DImode, "__urem64");
10620 }
10621
10622 if (SUN_CONVERSION_LIBFUNCS)
10623 {
10624 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10625 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10626 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10627 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10628 }
10629 }
10630 }
10631 \f
10632 /* SPARC builtins. */
10633 enum sparc_builtins
10634 {
10635 /* FPU builtins. */
10636 SPARC_BUILTIN_LDFSR,
10637 SPARC_BUILTIN_STFSR,
10638
10639 /* VIS 1.0 builtins. */
10640 SPARC_BUILTIN_FPACK16,
10641 SPARC_BUILTIN_FPACK32,
10642 SPARC_BUILTIN_FPACKFIX,
10643 SPARC_BUILTIN_FEXPAND,
10644 SPARC_BUILTIN_FPMERGE,
10645 SPARC_BUILTIN_FMUL8X16,
10646 SPARC_BUILTIN_FMUL8X16AU,
10647 SPARC_BUILTIN_FMUL8X16AL,
10648 SPARC_BUILTIN_FMUL8SUX16,
10649 SPARC_BUILTIN_FMUL8ULX16,
10650 SPARC_BUILTIN_FMULD8SUX16,
10651 SPARC_BUILTIN_FMULD8ULX16,
10652 SPARC_BUILTIN_FALIGNDATAV4HI,
10653 SPARC_BUILTIN_FALIGNDATAV8QI,
10654 SPARC_BUILTIN_FALIGNDATAV2SI,
10655 SPARC_BUILTIN_FALIGNDATADI,
10656 SPARC_BUILTIN_WRGSR,
10657 SPARC_BUILTIN_RDGSR,
10658 SPARC_BUILTIN_ALIGNADDR,
10659 SPARC_BUILTIN_ALIGNADDRL,
10660 SPARC_BUILTIN_PDIST,
10661 SPARC_BUILTIN_EDGE8,
10662 SPARC_BUILTIN_EDGE8L,
10663 SPARC_BUILTIN_EDGE16,
10664 SPARC_BUILTIN_EDGE16L,
10665 SPARC_BUILTIN_EDGE32,
10666 SPARC_BUILTIN_EDGE32L,
10667 SPARC_BUILTIN_FCMPLE16,
10668 SPARC_BUILTIN_FCMPLE32,
10669 SPARC_BUILTIN_FCMPNE16,
10670 SPARC_BUILTIN_FCMPNE32,
10671 SPARC_BUILTIN_FCMPGT16,
10672 SPARC_BUILTIN_FCMPGT32,
10673 SPARC_BUILTIN_FCMPEQ16,
10674 SPARC_BUILTIN_FCMPEQ32,
10675 SPARC_BUILTIN_FPADD16,
10676 SPARC_BUILTIN_FPADD16S,
10677 SPARC_BUILTIN_FPADD32,
10678 SPARC_BUILTIN_FPADD32S,
10679 SPARC_BUILTIN_FPSUB16,
10680 SPARC_BUILTIN_FPSUB16S,
10681 SPARC_BUILTIN_FPSUB32,
10682 SPARC_BUILTIN_FPSUB32S,
10683 SPARC_BUILTIN_ARRAY8,
10684 SPARC_BUILTIN_ARRAY16,
10685 SPARC_BUILTIN_ARRAY32,
10686
10687 /* VIS 2.0 builtins. */
10688 SPARC_BUILTIN_EDGE8N,
10689 SPARC_BUILTIN_EDGE8LN,
10690 SPARC_BUILTIN_EDGE16N,
10691 SPARC_BUILTIN_EDGE16LN,
10692 SPARC_BUILTIN_EDGE32N,
10693 SPARC_BUILTIN_EDGE32LN,
10694 SPARC_BUILTIN_BMASK,
10695 SPARC_BUILTIN_BSHUFFLEV4HI,
10696 SPARC_BUILTIN_BSHUFFLEV8QI,
10697 SPARC_BUILTIN_BSHUFFLEV2SI,
10698 SPARC_BUILTIN_BSHUFFLEDI,
10699
10700 /* VIS 3.0 builtins. */
10701 SPARC_BUILTIN_CMASK8,
10702 SPARC_BUILTIN_CMASK16,
10703 SPARC_BUILTIN_CMASK32,
10704 SPARC_BUILTIN_FCHKSM16,
10705 SPARC_BUILTIN_FSLL16,
10706 SPARC_BUILTIN_FSLAS16,
10707 SPARC_BUILTIN_FSRL16,
10708 SPARC_BUILTIN_FSRA16,
10709 SPARC_BUILTIN_FSLL32,
10710 SPARC_BUILTIN_FSLAS32,
10711 SPARC_BUILTIN_FSRL32,
10712 SPARC_BUILTIN_FSRA32,
10713 SPARC_BUILTIN_PDISTN,
10714 SPARC_BUILTIN_FMEAN16,
10715 SPARC_BUILTIN_FPADD64,
10716 SPARC_BUILTIN_FPSUB64,
10717 SPARC_BUILTIN_FPADDS16,
10718 SPARC_BUILTIN_FPADDS16S,
10719 SPARC_BUILTIN_FPSUBS16,
10720 SPARC_BUILTIN_FPSUBS16S,
10721 SPARC_BUILTIN_FPADDS32,
10722 SPARC_BUILTIN_FPADDS32S,
10723 SPARC_BUILTIN_FPSUBS32,
10724 SPARC_BUILTIN_FPSUBS32S,
10725 SPARC_BUILTIN_FUCMPLE8,
10726 SPARC_BUILTIN_FUCMPNE8,
10727 SPARC_BUILTIN_FUCMPGT8,
10728 SPARC_BUILTIN_FUCMPEQ8,
10729 SPARC_BUILTIN_FHADDS,
10730 SPARC_BUILTIN_FHADDD,
10731 SPARC_BUILTIN_FHSUBS,
10732 SPARC_BUILTIN_FHSUBD,
10733 SPARC_BUILTIN_FNHADDS,
10734 SPARC_BUILTIN_FNHADDD,
10735 SPARC_BUILTIN_UMULXHI,
10736 SPARC_BUILTIN_XMULX,
10737 SPARC_BUILTIN_XMULXHI,
10738
10739 /* VIS 4.0 builtins. */
10740 SPARC_BUILTIN_FPADD8,
10741 SPARC_BUILTIN_FPADDS8,
10742 SPARC_BUILTIN_FPADDUS8,
10743 SPARC_BUILTIN_FPADDUS16,
10744 SPARC_BUILTIN_FPCMPLE8,
10745 SPARC_BUILTIN_FPCMPGT8,
10746 SPARC_BUILTIN_FPCMPULE16,
10747 SPARC_BUILTIN_FPCMPUGT16,
10748 SPARC_BUILTIN_FPCMPULE32,
10749 SPARC_BUILTIN_FPCMPUGT32,
10750 SPARC_BUILTIN_FPMAX8,
10751 SPARC_BUILTIN_FPMAX16,
10752 SPARC_BUILTIN_FPMAX32,
10753 SPARC_BUILTIN_FPMAXU8,
10754 SPARC_BUILTIN_FPMAXU16,
10755 SPARC_BUILTIN_FPMAXU32,
10756 SPARC_BUILTIN_FPMIN8,
10757 SPARC_BUILTIN_FPMIN16,
10758 SPARC_BUILTIN_FPMIN32,
10759 SPARC_BUILTIN_FPMINU8,
10760 SPARC_BUILTIN_FPMINU16,
10761 SPARC_BUILTIN_FPMINU32,
10762 SPARC_BUILTIN_FPSUB8,
10763 SPARC_BUILTIN_FPSUBS8,
10764 SPARC_BUILTIN_FPSUBUS8,
10765 SPARC_BUILTIN_FPSUBUS16,
10766
10767 /* VIS 4.0B builtins. */
10768
10769 /* Note that all the DICTUNPACK* entries should be kept
10770 contiguous. */
10771 SPARC_BUILTIN_FIRST_DICTUNPACK,
10772 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10773 SPARC_BUILTIN_DICTUNPACK16,
10774 SPARC_BUILTIN_DICTUNPACK32,
10775 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10776
10777 /* Note that all the FPCMP*SHL entries should be kept
10778 contiguous. */
10779 SPARC_BUILTIN_FIRST_FPCMPSHL,
10780 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10781 SPARC_BUILTIN_FPCMPGT8SHL,
10782 SPARC_BUILTIN_FPCMPEQ8SHL,
10783 SPARC_BUILTIN_FPCMPNE8SHL,
10784 SPARC_BUILTIN_FPCMPLE16SHL,
10785 SPARC_BUILTIN_FPCMPGT16SHL,
10786 SPARC_BUILTIN_FPCMPEQ16SHL,
10787 SPARC_BUILTIN_FPCMPNE16SHL,
10788 SPARC_BUILTIN_FPCMPLE32SHL,
10789 SPARC_BUILTIN_FPCMPGT32SHL,
10790 SPARC_BUILTIN_FPCMPEQ32SHL,
10791 SPARC_BUILTIN_FPCMPNE32SHL,
10792 SPARC_BUILTIN_FPCMPULE8SHL,
10793 SPARC_BUILTIN_FPCMPUGT8SHL,
10794 SPARC_BUILTIN_FPCMPULE16SHL,
10795 SPARC_BUILTIN_FPCMPUGT16SHL,
10796 SPARC_BUILTIN_FPCMPULE32SHL,
10797 SPARC_BUILTIN_FPCMPUGT32SHL,
10798 SPARC_BUILTIN_FPCMPDE8SHL,
10799 SPARC_BUILTIN_FPCMPDE16SHL,
10800 SPARC_BUILTIN_FPCMPDE32SHL,
10801 SPARC_BUILTIN_FPCMPUR8SHL,
10802 SPARC_BUILTIN_FPCMPUR16SHL,
10803 SPARC_BUILTIN_FPCMPUR32SHL,
10804 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10805
10806 SPARC_BUILTIN_MAX
10807 };
10808
10809 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10810 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10811
10812 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10813 The instruction should require a constant operand of some sort. The
10814 function prints an error if OPVAL is not valid. */
10815
10816 static int
10817 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10818 {
10819 if (GET_CODE (opval) != CONST_INT)
10820 {
10821 error ("%qs expects a constant argument", insn_data[icode].name);
10822 return false;
10823 }
10824
10825 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10826 {
10827 error ("constant argument out of range for %qs", insn_data[icode].name);
10828 return false;
10829 }
10830 return true;
10831 }
10832
10833 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10834 function decl or NULL_TREE if the builtin was not added. */
10835
10836 static tree
10837 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10838 tree type)
10839 {
10840 tree t
10841 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10842
10843 if (t)
10844 {
10845 sparc_builtins[code] = t;
10846 sparc_builtins_icode[code] = icode;
10847 }
10848
10849 return t;
10850 }
10851
10852 /* Likewise, but also marks the function as "const". */
10853
10854 static tree
10855 def_builtin_const (const char *name, enum insn_code icode,
10856 enum sparc_builtins code, tree type)
10857 {
10858 tree t = def_builtin (name, icode, code, type);
10859
10860 if (t)
10861 TREE_READONLY (t) = 1;
10862
10863 return t;
10864 }
10865
10866 /* Implement the TARGET_INIT_BUILTINS target hook.
10867 Create builtin functions for special SPARC instructions. */
10868
10869 static void
10870 sparc_init_builtins (void)
10871 {
10872 if (TARGET_FPU)
10873 sparc_fpu_init_builtins ();
10874
10875 if (TARGET_VIS)
10876 sparc_vis_init_builtins ();
10877 }
10878
10879 /* Create builtin functions for FPU instructions. */
10880
10881 static void
10882 sparc_fpu_init_builtins (void)
10883 {
10884 tree ftype
10885 = build_function_type_list (void_type_node,
10886 build_pointer_type (unsigned_type_node), 0);
10887 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10888 SPARC_BUILTIN_LDFSR, ftype);
10889 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10890 SPARC_BUILTIN_STFSR, ftype);
10891 }
10892
10893 /* Create builtin functions for VIS instructions. */
10894
10895 static void
10896 sparc_vis_init_builtins (void)
10897 {
10898 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10899 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10900 tree v4hi = build_vector_type (intHI_type_node, 4);
10901 tree v2hi = build_vector_type (intHI_type_node, 2);
10902 tree v2si = build_vector_type (intSI_type_node, 2);
10903 tree v1si = build_vector_type (intSI_type_node, 1);
10904
10905 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10906 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10907 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10908 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10909 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10910 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10911 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10912 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10913 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10914 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10915 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10916 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10917 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10918 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10919 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10920 v8qi, v8qi,
10921 intDI_type_node, 0);
10922 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10923 v8qi, v8qi, 0);
10924 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10925 v8qi, v8qi, 0);
10926 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10927 intSI_type_node, 0);
10928 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10929 intSI_type_node, 0);
10930 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10931 intDI_type_node, 0);
10932 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10933 intDI_type_node,
10934 intDI_type_node, 0);
10935 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10936 intSI_type_node,
10937 intSI_type_node, 0);
10938 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10939 ptr_type_node,
10940 intSI_type_node, 0);
10941 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10942 ptr_type_node,
10943 intDI_type_node, 0);
10944 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10945 ptr_type_node,
10946 ptr_type_node, 0);
10947 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10948 ptr_type_node,
10949 ptr_type_node, 0);
10950 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10951 v4hi, v4hi, 0);
10952 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10953 v2si, v2si, 0);
10954 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10955 v4hi, v4hi, 0);
10956 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10957 v2si, v2si, 0);
10958 tree void_ftype_di = build_function_type_list (void_type_node,
10959 intDI_type_node, 0);
10960 tree di_ftype_void = build_function_type_list (intDI_type_node,
10961 void_type_node, 0);
10962 tree void_ftype_si = build_function_type_list (void_type_node,
10963 intSI_type_node, 0);
10964 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10965 float_type_node,
10966 float_type_node, 0);
10967 tree df_ftype_df_df = build_function_type_list (double_type_node,
10968 double_type_node,
10969 double_type_node, 0);
10970
10971 /* Packing and expanding vectors. */
10972 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10973 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10974 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10975 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10976 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10977 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10978 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10979 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10980 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10981 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10982
10983 /* Multiplications. */
10984 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10985 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10986 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10987 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10988 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10989 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10990 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10991 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10992 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10993 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10994 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10995 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10996 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10997 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10998
10999 /* Data aligning. */
11000 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11001 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11002 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11003 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11004 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11005 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11006 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11007 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11008
11009 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11010 SPARC_BUILTIN_WRGSR, void_ftype_di);
11011 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11012 SPARC_BUILTIN_RDGSR, di_ftype_void);
11013
11014 if (TARGET_ARCH64)
11015 {
11016 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11017 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11018 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11019 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11020 }
11021 else
11022 {
11023 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11024 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11025 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11026 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11027 }
11028
11029 /* Pixel distance. */
11030 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11031 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11032
11033 /* Edge handling. */
11034 if (TARGET_ARCH64)
11035 {
11036 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11037 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11038 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11039 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11040 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11041 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11042 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11043 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11044 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11045 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11046 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11047 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11048 }
11049 else
11050 {
11051 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11052 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11053 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11054 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11055 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11056 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11057 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11058 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11059 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11060 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11061 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11062 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11063 }
11064
11065 /* Pixel compare. */
11066 if (TARGET_ARCH64)
11067 {
11068 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11069 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11070 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11071 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11072 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11073 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11074 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11075 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11076 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11077 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11078 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11079 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11080 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11081 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11082 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11083 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11084 }
11085 else
11086 {
11087 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11088 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11089 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11090 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11091 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11092 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11093 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11094 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11095 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11096 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11097 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11098 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11099 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11100 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11101 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11102 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11103 }
11104
11105 /* Addition and subtraction. */
11106 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11107 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11108 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11109 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11110 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11111 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11112 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11113 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11114 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11115 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11116 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11117 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11118 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11119 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11120 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11121 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11122
11123 /* Three-dimensional array addressing. */
11124 if (TARGET_ARCH64)
11125 {
11126 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11127 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11128 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11129 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11130 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11131 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11132 }
11133 else
11134 {
11135 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11136 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11137 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11138 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11139 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11140 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11141 }
11142
11143 if (TARGET_VIS2)
11144 {
11145 /* Edge handling. */
11146 if (TARGET_ARCH64)
11147 {
11148 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11149 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11150 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11151 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11152 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11153 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11154 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11155 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11156 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11157 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11158 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11159 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11160 }
11161 else
11162 {
11163 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11164 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11165 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11166 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11167 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11168 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11169 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11170 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11171 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11172 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11173 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11174 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11175 }
11176
11177 /* Byte mask and shuffle. */
11178 if (TARGET_ARCH64)
11179 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11180 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11181 else
11182 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11183 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11184 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11185 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11186 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11187 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11188 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11189 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11190 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11191 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11192 }
11193
11194 if (TARGET_VIS3)
11195 {
11196 if (TARGET_ARCH64)
11197 {
11198 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11199 SPARC_BUILTIN_CMASK8, void_ftype_di);
11200 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11201 SPARC_BUILTIN_CMASK16, void_ftype_di);
11202 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11203 SPARC_BUILTIN_CMASK32, void_ftype_di);
11204 }
11205 else
11206 {
11207 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11208 SPARC_BUILTIN_CMASK8, void_ftype_si);
11209 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11210 SPARC_BUILTIN_CMASK16, void_ftype_si);
11211 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11212 SPARC_BUILTIN_CMASK32, void_ftype_si);
11213 }
11214
11215 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11216 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11217
11218 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11219 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11220 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11221 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11222 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11223 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11224 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11225 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11226 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11227 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11228 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11229 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11230 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11231 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11232 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11233 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11234
11235 if (TARGET_ARCH64)
11236 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11237 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11238 else
11239 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11240 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11241
11242 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11243 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11244 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11245 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11246 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11247 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11248
11249 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11250 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11251 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11252 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11253 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11254 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11255 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11256 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11257 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11258 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11259 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11260 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11261 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11262 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11263 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11264 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11265
11266 if (TARGET_ARCH64)
11267 {
11268 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11269 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11270 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11271 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11272 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11273 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11274 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11275 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11276 }
11277 else
11278 {
11279 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11280 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11281 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11282 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11283 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11284 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11285 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11286 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11287 }
11288
11289 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11290 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11291 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11292 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11293 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11294 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11295 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11296 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11297 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11298 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11299 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11300 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11301
11302 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11303 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11304 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11305 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11306 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11307 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11308 }
11309
11310 if (TARGET_VIS4)
11311 {
11312 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11313 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11314 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11315 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11316 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11317 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11318 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11319 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11320
11321
11322 if (TARGET_ARCH64)
11323 {
11324 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11325 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11326 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11327 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11328 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11329 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11330 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11331 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11332 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11333 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11334 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11335 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11336 }
11337 else
11338 {
11339 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11340 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11341 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11342 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11343 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11344 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11345 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11346 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11347 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11348 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11349 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11350 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11351 }
11352
11353 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11354 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11355 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11356 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11357 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11358 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11359 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11360 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11361 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11362 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11363 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11364 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11365 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11366 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11367 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11368 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11369 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11370 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11371 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11372 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11373 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11374 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11375 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11376 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11377 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11378 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11379 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11380 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11381 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11382 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11383 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11384 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11385 }
11386
11387 if (TARGET_VIS4B)
11388 {
11389 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11390 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11391 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11392 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11393 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11394 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11395
11396 if (TARGET_ARCH64)
11397 {
11398 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11399 v8qi, v8qi,
11400 intSI_type_node, 0);
11401 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11402 v4hi, v4hi,
11403 intSI_type_node, 0);
11404 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11405 v2si, v2si,
11406 intSI_type_node, 0);
11407
11408 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11409 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11410 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11411 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11412 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11413 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11414 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11415 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11416
11417 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11418 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11419 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11420 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11421 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11422 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11423 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11424 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11425
11426 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11427 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11428 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11429 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11430 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11431 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11432 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11433 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11434
11435
11436 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11437 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11438 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11439 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11440
11441 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11442 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11443 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11444 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11445
11446 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11447 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11448 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11449 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11450
11451 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11452 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11453 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11454 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11455 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11456 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11457
11458 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11459 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11460 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11461 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11462 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11463 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11464
11465 }
11466 else
11467 {
11468 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11469 v8qi, v8qi,
11470 intSI_type_node, 0);
11471 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11472 v4hi, v4hi,
11473 intSI_type_node, 0);
11474 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11475 v2si, v2si,
11476 intSI_type_node, 0);
11477
11478 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11479 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11480 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11481 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11482 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11483 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11484 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11485 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11486
11487 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11488 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11489 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11490 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11491 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11492 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11493 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11494 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11495
11496 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11497 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11498 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11499 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11500 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11501 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11502 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11503 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11504
11505
11506 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11507 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11508 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11509 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11510
11511 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11512 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11513 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11514 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11515
11516 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11517 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11518 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11519 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11520
11521 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11522 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11523 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11524 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11525 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11526 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11527
11528 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11529 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11530 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11531 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11532 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11533 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11534 }
11535 }
11536 }
11537
11538 /* Implement TARGET_BUILTIN_DECL hook. */
11539
11540 static tree
11541 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11542 {
11543 if (code >= SPARC_BUILTIN_MAX)
11544 return error_mark_node;
11545
11546 return sparc_builtins[code];
11547 }
11548
11549 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11550
11551 static rtx
11552 sparc_expand_builtin (tree exp, rtx target,
11553 rtx subtarget ATTRIBUTE_UNUSED,
11554 machine_mode tmode ATTRIBUTE_UNUSED,
11555 int ignore ATTRIBUTE_UNUSED)
11556 {
11557 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11558 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11559 enum insn_code icode = sparc_builtins_icode[code];
11560 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11561 call_expr_arg_iterator iter;
11562 int arg_count = 0;
11563 rtx pat, op[4];
11564 tree arg;
11565
11566 if (nonvoid)
11567 {
11568 machine_mode tmode = insn_data[icode].operand[0].mode;
11569 if (!target
11570 || GET_MODE (target) != tmode
11571 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11572 op[0] = gen_reg_rtx (tmode);
11573 else
11574 op[0] = target;
11575 }
11576
11577 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11578 {
11579 const struct insn_operand_data *insn_op;
11580 int idx;
11581
11582 if (arg == error_mark_node)
11583 return NULL_RTX;
11584
11585 arg_count++;
11586 idx = arg_count - !nonvoid;
11587 insn_op = &insn_data[icode].operand[idx];
11588 op[arg_count] = expand_normal (arg);
11589
11590 /* Some of the builtins require constant arguments. We check
11591 for this here. */
11592 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11593 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11594 && arg_count == 3)
11595 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11596 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11597 && arg_count == 2))
11598 {
11599 if (!check_constant_argument (icode, idx, op[arg_count]))
11600 return const0_rtx;
11601 }
11602
11603 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11604 {
11605 if (!address_operand (op[arg_count], SImode))
11606 {
11607 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11608 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11609 }
11610 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11611 }
11612
11613 else if (insn_op->mode == V1DImode
11614 && GET_MODE (op[arg_count]) == DImode)
11615 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11616
11617 else if (insn_op->mode == V1SImode
11618 && GET_MODE (op[arg_count]) == SImode)
11619 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11620
11621 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11622 insn_op->mode))
11623 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11624 }
11625
11626 switch (arg_count)
11627 {
11628 case 0:
11629 pat = GEN_FCN (icode) (op[0]);
11630 break;
11631 case 1:
11632 if (nonvoid)
11633 pat = GEN_FCN (icode) (op[0], op[1]);
11634 else
11635 pat = GEN_FCN (icode) (op[1]);
11636 break;
11637 case 2:
11638 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11639 break;
11640 case 3:
11641 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11642 break;
11643 default:
11644 gcc_unreachable ();
11645 }
11646
11647 if (!pat)
11648 return NULL_RTX;
11649
11650 emit_insn (pat);
11651
11652 return (nonvoid ? op[0] : const0_rtx);
11653 }
11654
11655 /* Return the upper 16 bits of the 8x16 multiplication. */
11656
11657 static int
11658 sparc_vis_mul8x16 (int e8, int e16)
11659 {
11660 return (e8 * e16 + 128) / 256;
11661 }
11662
11663 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11664 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11665
11666 static void
11667 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11668 tree inner_type, tree cst0, tree cst1)
11669 {
11670 unsigned i, num = VECTOR_CST_NELTS (cst0);
11671 int scale;
11672
11673 switch (fncode)
11674 {
11675 case SPARC_BUILTIN_FMUL8X16:
11676 for (i = 0; i < num; ++i)
11677 {
11678 int val
11679 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11680 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11681 n_elts->quick_push (build_int_cst (inner_type, val));
11682 }
11683 break;
11684
11685 case SPARC_BUILTIN_FMUL8X16AU:
11686 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11687
11688 for (i = 0; i < num; ++i)
11689 {
11690 int val
11691 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11692 scale);
11693 n_elts->quick_push (build_int_cst (inner_type, val));
11694 }
11695 break;
11696
11697 case SPARC_BUILTIN_FMUL8X16AL:
11698 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11699
11700 for (i = 0; i < num; ++i)
11701 {
11702 int val
11703 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11704 scale);
11705 n_elts->quick_push (build_int_cst (inner_type, val));
11706 }
11707 break;
11708
11709 default:
11710 gcc_unreachable ();
11711 }
11712 }
11713
11714 /* Implement TARGET_FOLD_BUILTIN hook.
11715
11716 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11717 result of the function call is ignored. NULL_TREE is returned if the
11718 function could not be folded. */
11719
11720 static tree
11721 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11722 tree *args, bool ignore)
11723 {
11724 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11725 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11726 tree arg0, arg1, arg2;
11727
11728 if (ignore)
11729 switch (code)
11730 {
11731 case SPARC_BUILTIN_LDFSR:
11732 case SPARC_BUILTIN_STFSR:
11733 case SPARC_BUILTIN_ALIGNADDR:
11734 case SPARC_BUILTIN_WRGSR:
11735 case SPARC_BUILTIN_BMASK:
11736 case SPARC_BUILTIN_CMASK8:
11737 case SPARC_BUILTIN_CMASK16:
11738 case SPARC_BUILTIN_CMASK32:
11739 break;
11740
11741 default:
11742 return build_zero_cst (rtype);
11743 }
11744
11745 switch (code)
11746 {
11747 case SPARC_BUILTIN_FEXPAND:
11748 arg0 = args[0];
11749 STRIP_NOPS (arg0);
11750
11751 if (TREE_CODE (arg0) == VECTOR_CST)
11752 {
11753 tree inner_type = TREE_TYPE (rtype);
11754 unsigned i;
11755
11756 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11757 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11758 {
11759 unsigned HOST_WIDE_INT val
11760 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11761 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11762 }
11763 return n_elts.build ();
11764 }
11765 break;
11766
11767 case SPARC_BUILTIN_FMUL8X16:
11768 case SPARC_BUILTIN_FMUL8X16AU:
11769 case SPARC_BUILTIN_FMUL8X16AL:
11770 arg0 = args[0];
11771 arg1 = args[1];
11772 STRIP_NOPS (arg0);
11773 STRIP_NOPS (arg1);
11774
11775 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11776 {
11777 tree inner_type = TREE_TYPE (rtype);
11778 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11779 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11780 return n_elts.build ();
11781 }
11782 break;
11783
11784 case SPARC_BUILTIN_FPMERGE:
11785 arg0 = args[0];
11786 arg1 = args[1];
11787 STRIP_NOPS (arg0);
11788 STRIP_NOPS (arg1);
11789
11790 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11791 {
11792 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11793 unsigned i;
11794 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11795 {
11796 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11797 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11798 }
11799
11800 return n_elts.build ();
11801 }
11802 break;
11803
11804 case SPARC_BUILTIN_PDIST:
11805 case SPARC_BUILTIN_PDISTN:
11806 arg0 = args[0];
11807 arg1 = args[1];
11808 STRIP_NOPS (arg0);
11809 STRIP_NOPS (arg1);
11810 if (code == SPARC_BUILTIN_PDIST)
11811 {
11812 arg2 = args[2];
11813 STRIP_NOPS (arg2);
11814 }
11815 else
11816 arg2 = integer_zero_node;
11817
11818 if (TREE_CODE (arg0) == VECTOR_CST
11819 && TREE_CODE (arg1) == VECTOR_CST
11820 && TREE_CODE (arg2) == INTEGER_CST)
11821 {
11822 bool overflow = false;
11823 widest_int result = wi::to_widest (arg2);
11824 widest_int tmp;
11825 unsigned i;
11826
11827 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11828 {
11829 tree e0 = VECTOR_CST_ELT (arg0, i);
11830 tree e1 = VECTOR_CST_ELT (arg1, i);
11831
11832 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11833
11834 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11835 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11836 if (wi::neg_p (tmp))
11837 tmp = wi::neg (tmp, &neg2_ovf);
11838 else
11839 neg2_ovf = false;
11840 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11841 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11842 }
11843
11844 gcc_assert (!overflow);
11845
11846 return wide_int_to_tree (rtype, result);
11847 }
11848
11849 default:
11850 break;
11851 }
11852
11853 return NULL_TREE;
11854 }
11855 \f
11856 /* ??? This duplicates information provided to the compiler by the
11857 ??? scheduler description. Some day, teach genautomata to output
11858 ??? the latencies and then CSE will just use that. */
11859
11860 static bool
11861 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11862 int opno ATTRIBUTE_UNUSED,
11863 int *total, bool speed ATTRIBUTE_UNUSED)
11864 {
11865 int code = GET_CODE (x);
11866 bool float_mode_p = FLOAT_MODE_P (mode);
11867
11868 switch (code)
11869 {
11870 case CONST_INT:
11871 if (SMALL_INT (x))
11872 *total = 0;
11873 else
11874 *total = 2;
11875 return true;
11876
11877 case CONST_WIDE_INT:
11878 *total = 0;
11879 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11880 *total += 2;
11881 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11882 *total += 2;
11883 return true;
11884
11885 case HIGH:
11886 *total = 2;
11887 return true;
11888
11889 case CONST:
11890 case LABEL_REF:
11891 case SYMBOL_REF:
11892 *total = 4;
11893 return true;
11894
11895 case CONST_DOUBLE:
11896 *total = 8;
11897 return true;
11898
11899 case MEM:
11900 /* If outer-code was a sign or zero extension, a cost
11901 of COSTS_N_INSNS (1) was already added in. This is
11902 why we are subtracting it back out. */
11903 if (outer_code == ZERO_EXTEND)
11904 {
11905 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11906 }
11907 else if (outer_code == SIGN_EXTEND)
11908 {
11909 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11910 }
11911 else if (float_mode_p)
11912 {
11913 *total = sparc_costs->float_load;
11914 }
11915 else
11916 {
11917 *total = sparc_costs->int_load;
11918 }
11919
11920 return true;
11921
11922 case PLUS:
11923 case MINUS:
11924 if (float_mode_p)
11925 *total = sparc_costs->float_plusminus;
11926 else
11927 *total = COSTS_N_INSNS (1);
11928 return false;
11929
11930 case FMA:
11931 {
11932 rtx sub;
11933
11934 gcc_assert (float_mode_p);
11935 *total = sparc_costs->float_mul;
11936
11937 sub = XEXP (x, 0);
11938 if (GET_CODE (sub) == NEG)
11939 sub = XEXP (sub, 0);
11940 *total += rtx_cost (sub, mode, FMA, 0, speed);
11941
11942 sub = XEXP (x, 2);
11943 if (GET_CODE (sub) == NEG)
11944 sub = XEXP (sub, 0);
11945 *total += rtx_cost (sub, mode, FMA, 2, speed);
11946 return true;
11947 }
11948
11949 case MULT:
11950 if (float_mode_p)
11951 *total = sparc_costs->float_mul;
11952 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11953 *total = COSTS_N_INSNS (25);
11954 else
11955 {
11956 int bit_cost;
11957
11958 bit_cost = 0;
11959 if (sparc_costs->int_mul_bit_factor)
11960 {
11961 int nbits;
11962
11963 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11964 {
11965 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11966 for (nbits = 0; value != 0; value &= value - 1)
11967 nbits++;
11968 }
11969 else
11970 nbits = 7;
11971
11972 if (nbits < 3)
11973 nbits = 3;
11974 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11975 bit_cost = COSTS_N_INSNS (bit_cost);
11976 }
11977
11978 if (mode == DImode || !TARGET_HARD_MUL)
11979 *total = sparc_costs->int_mulX + bit_cost;
11980 else
11981 *total = sparc_costs->int_mul + bit_cost;
11982 }
11983 return false;
11984
11985 case ASHIFT:
11986 case ASHIFTRT:
11987 case LSHIFTRT:
11988 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11989 return false;
11990
11991 case DIV:
11992 case UDIV:
11993 case MOD:
11994 case UMOD:
11995 if (float_mode_p)
11996 {
11997 if (mode == DFmode)
11998 *total = sparc_costs->float_div_df;
11999 else
12000 *total = sparc_costs->float_div_sf;
12001 }
12002 else
12003 {
12004 if (mode == DImode)
12005 *total = sparc_costs->int_divX;
12006 else
12007 *total = sparc_costs->int_div;
12008 }
12009 return false;
12010
12011 case NEG:
12012 if (! float_mode_p)
12013 {
12014 *total = COSTS_N_INSNS (1);
12015 return false;
12016 }
12017 /* FALLTHRU */
12018
12019 case ABS:
12020 case FLOAT:
12021 case UNSIGNED_FLOAT:
12022 case FIX:
12023 case UNSIGNED_FIX:
12024 case FLOAT_EXTEND:
12025 case FLOAT_TRUNCATE:
12026 *total = sparc_costs->float_move;
12027 return false;
12028
12029 case SQRT:
12030 if (mode == DFmode)
12031 *total = sparc_costs->float_sqrt_df;
12032 else
12033 *total = sparc_costs->float_sqrt_sf;
12034 return false;
12035
12036 case COMPARE:
12037 if (float_mode_p)
12038 *total = sparc_costs->float_cmp;
12039 else
12040 *total = COSTS_N_INSNS (1);
12041 return false;
12042
12043 case IF_THEN_ELSE:
12044 if (float_mode_p)
12045 *total = sparc_costs->float_cmove;
12046 else
12047 *total = sparc_costs->int_cmove;
12048 return false;
12049
12050 case IOR:
12051 /* Handle the NAND vector patterns. */
12052 if (sparc_vector_mode_supported_p (mode)
12053 && GET_CODE (XEXP (x, 0)) == NOT
12054 && GET_CODE (XEXP (x, 1)) == NOT)
12055 {
12056 *total = COSTS_N_INSNS (1);
12057 return true;
12058 }
12059 else
12060 return false;
12061
12062 default:
12063 return false;
12064 }
12065 }
12066
12067 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12068
12069 static inline bool
12070 general_or_i64_p (reg_class_t rclass)
12071 {
12072 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12073 }
12074
12075 /* Implement TARGET_REGISTER_MOVE_COST. */
12076
12077 static int
12078 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12079 reg_class_t from, reg_class_t to)
12080 {
12081 bool need_memory = false;
12082
12083 /* This helps postreload CSE to eliminate redundant comparisons. */
12084 if (from == NO_REGS || to == NO_REGS)
12085 return 100;
12086
12087 if (from == FPCC_REGS || to == FPCC_REGS)
12088 need_memory = true;
12089 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12090 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12091 {
12092 if (TARGET_VIS3)
12093 {
12094 int size = GET_MODE_SIZE (mode);
12095 if (size == 8 || size == 4)
12096 {
12097 if (! TARGET_ARCH32 || size == 4)
12098 return 4;
12099 else
12100 return 6;
12101 }
12102 }
12103 need_memory = true;
12104 }
12105
12106 if (need_memory)
12107 {
12108 if (sparc_cpu == PROCESSOR_ULTRASPARC
12109 || sparc_cpu == PROCESSOR_ULTRASPARC3
12110 || sparc_cpu == PROCESSOR_NIAGARA
12111 || sparc_cpu == PROCESSOR_NIAGARA2
12112 || sparc_cpu == PROCESSOR_NIAGARA3
12113 || sparc_cpu == PROCESSOR_NIAGARA4
12114 || sparc_cpu == PROCESSOR_NIAGARA7
12115 || sparc_cpu == PROCESSOR_M8)
12116 return 12;
12117
12118 return 6;
12119 }
12120
12121 return 2;
12122 }
12123
12124 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12125 This is achieved by means of a manual dynamic stack space allocation in
12126 the current frame. We make the assumption that SEQ doesn't contain any
12127 function calls, with the possible exception of calls to the GOT helper. */
12128
12129 static void
12130 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12131 {
12132 /* We must preserve the lowest 16 words for the register save area. */
12133 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12134 /* We really need only 2 words of fresh stack space. */
12135 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12136
12137 rtx slot
12138 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12139 SPARC_STACK_BIAS + offset));
12140
12141 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12142 emit_insn (gen_rtx_SET (slot, reg));
12143 if (reg2)
12144 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12145 reg2));
12146 emit_insn (seq);
12147 if (reg2)
12148 emit_insn (gen_rtx_SET (reg2,
12149 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12150 emit_insn (gen_rtx_SET (reg, slot));
12151 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12152 }
12153
12154 /* Output the assembler code for a thunk function. THUNK_DECL is the
12155 declaration for the thunk function itself, FUNCTION is the decl for
12156 the target function. DELTA is an immediate constant offset to be
12157 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12158 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12159
12160 static void
12161 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12162 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12163 tree function)
12164 {
12165 rtx this_rtx, funexp;
12166 rtx_insn *insn;
12167 unsigned int int_arg_first;
12168
12169 reload_completed = 1;
12170 epilogue_completed = 1;
12171
12172 emit_note (NOTE_INSN_PROLOGUE_END);
12173
12174 if (TARGET_FLAT)
12175 {
12176 sparc_leaf_function_p = 1;
12177
12178 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12179 }
12180 else if (flag_delayed_branch)
12181 {
12182 /* We will emit a regular sibcall below, so we need to instruct
12183 output_sibcall that we are in a leaf function. */
12184 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12185
12186 /* This will cause final.c to invoke leaf_renumber_regs so we
12187 must behave as if we were in a not-yet-leafified function. */
12188 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12189 }
12190 else
12191 {
12192 /* We will emit the sibcall manually below, so we will need to
12193 manually spill non-leaf registers. */
12194 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12195
12196 /* We really are in a leaf function. */
12197 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12198 }
12199
12200 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12201 returns a structure, the structure return pointer is there instead. */
12202 if (TARGET_ARCH64
12203 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12204 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12205 else
12206 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12207
12208 /* Add DELTA. When possible use a plain add, otherwise load it into
12209 a register first. */
12210 if (delta)
12211 {
12212 rtx delta_rtx = GEN_INT (delta);
12213
12214 if (! SPARC_SIMM13_P (delta))
12215 {
12216 rtx scratch = gen_rtx_REG (Pmode, 1);
12217 emit_move_insn (scratch, delta_rtx);
12218 delta_rtx = scratch;
12219 }
12220
12221 /* THIS_RTX += DELTA. */
12222 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12223 }
12224
12225 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12226 if (vcall_offset)
12227 {
12228 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12229 rtx scratch = gen_rtx_REG (Pmode, 1);
12230
12231 gcc_assert (vcall_offset < 0);
12232
12233 /* SCRATCH = *THIS_RTX. */
12234 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12235
12236 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12237 may not have any available scratch register at this point. */
12238 if (SPARC_SIMM13_P (vcall_offset))
12239 ;
12240 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12241 else if (! fixed_regs[5]
12242 /* The below sequence is made up of at least 2 insns,
12243 while the default method may need only one. */
12244 && vcall_offset < -8192)
12245 {
12246 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12247 emit_move_insn (scratch2, vcall_offset_rtx);
12248 vcall_offset_rtx = scratch2;
12249 }
12250 else
12251 {
12252 rtx increment = GEN_INT (-4096);
12253
12254 /* VCALL_OFFSET is a negative number whose typical range can be
12255 estimated as -32768..0 in 32-bit mode. In almost all cases
12256 it is therefore cheaper to emit multiple add insns than
12257 spilling and loading the constant into a register (at least
12258 6 insns). */
12259 while (! SPARC_SIMM13_P (vcall_offset))
12260 {
12261 emit_insn (gen_add2_insn (scratch, increment));
12262 vcall_offset += 4096;
12263 }
12264 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12265 }
12266
12267 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12268 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12269 gen_rtx_PLUS (Pmode,
12270 scratch,
12271 vcall_offset_rtx)));
12272
12273 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12274 emit_insn (gen_add2_insn (this_rtx, scratch));
12275 }
12276
12277 /* Generate a tail call to the target function. */
12278 if (! TREE_USED (function))
12279 {
12280 assemble_external (function);
12281 TREE_USED (function) = 1;
12282 }
12283 funexp = XEXP (DECL_RTL (function), 0);
12284
12285 if (flag_delayed_branch)
12286 {
12287 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12288 insn = emit_call_insn (gen_sibcall (funexp));
12289 SIBLING_CALL_P (insn) = 1;
12290 }
12291 else
12292 {
12293 /* The hoops we have to jump through in order to generate a sibcall
12294 without using delay slots... */
12295 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12296
12297 if (flag_pic)
12298 {
12299 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12300 start_sequence ();
12301 load_got_register (); /* clobbers %o7 */
12302 scratch = sparc_legitimize_pic_address (funexp, scratch);
12303 seq = get_insns ();
12304 end_sequence ();
12305 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12306 }
12307 else if (TARGET_ARCH32)
12308 {
12309 emit_insn (gen_rtx_SET (scratch,
12310 gen_rtx_HIGH (SImode, funexp)));
12311 emit_insn (gen_rtx_SET (scratch,
12312 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12313 }
12314 else /* TARGET_ARCH64 */
12315 {
12316 switch (sparc_cmodel)
12317 {
12318 case CM_MEDLOW:
12319 case CM_MEDMID:
12320 /* The destination can serve as a temporary. */
12321 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12322 break;
12323
12324 case CM_MEDANY:
12325 case CM_EMBMEDANY:
12326 /* The destination cannot serve as a temporary. */
12327 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12328 start_sequence ();
12329 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12330 seq = get_insns ();
12331 end_sequence ();
12332 emit_and_preserve (seq, spill_reg, 0);
12333 break;
12334
12335 default:
12336 gcc_unreachable ();
12337 }
12338 }
12339
12340 emit_jump_insn (gen_indirect_jump (scratch));
12341 }
12342
12343 emit_barrier ();
12344
12345 /* Run just enough of rest_of_compilation to get the insns emitted.
12346 There's not really enough bulk here to make other passes such as
12347 instruction scheduling worth while. Note that use_thunk calls
12348 assemble_start_function and assemble_end_function. */
12349 insn = get_insns ();
12350 shorten_branches (insn);
12351 final_start_function (insn, file, 1);
12352 final (insn, file, 1);
12353 final_end_function ();
12354
12355 reload_completed = 0;
12356 epilogue_completed = 0;
12357 }
12358
12359 /* Return true if sparc_output_mi_thunk would be able to output the
12360 assembler code for the thunk function specified by the arguments
12361 it is passed, and false otherwise. */
12362 static bool
12363 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12364 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12365 HOST_WIDE_INT vcall_offset,
12366 const_tree function ATTRIBUTE_UNUSED)
12367 {
12368 /* Bound the loop used in the default method above. */
12369 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12370 }
12371
12372 /* How to allocate a 'struct machine_function'. */
12373
12374 static struct machine_function *
12375 sparc_init_machine_status (void)
12376 {
12377 return ggc_cleared_alloc<machine_function> ();
12378 }
12379
12380 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12381 We need to emit DTP-relative relocations. */
12382
12383 static void
12384 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12385 {
12386 switch (size)
12387 {
12388 case 4:
12389 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12390 break;
12391 case 8:
12392 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12393 break;
12394 default:
12395 gcc_unreachable ();
12396 }
12397 output_addr_const (file, x);
12398 fputs (")", file);
12399 }
12400
12401 /* Do whatever processing is required at the end of a file. */
12402
12403 static void
12404 sparc_file_end (void)
12405 {
12406 /* If we need to emit the special GOT helper function, do so now. */
12407 if (got_helper_rtx)
12408 {
12409 const char *name = XSTR (got_helper_rtx, 0);
12410 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12411 #ifdef DWARF2_UNWIND_INFO
12412 bool do_cfi;
12413 #endif
12414
12415 if (USE_HIDDEN_LINKONCE)
12416 {
12417 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12418 get_identifier (name),
12419 build_function_type_list (void_type_node,
12420 NULL_TREE));
12421 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12422 NULL_TREE, void_type_node);
12423 TREE_PUBLIC (decl) = 1;
12424 TREE_STATIC (decl) = 1;
12425 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12426 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12427 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12428 resolve_unique_section (decl, 0, flag_function_sections);
12429 allocate_struct_function (decl, true);
12430 cfun->is_thunk = 1;
12431 current_function_decl = decl;
12432 init_varasm_status ();
12433 assemble_start_function (decl, name);
12434 }
12435 else
12436 {
12437 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12438 switch_to_section (text_section);
12439 if (align > 0)
12440 ASM_OUTPUT_ALIGN (asm_out_file, align);
12441 ASM_OUTPUT_LABEL (asm_out_file, name);
12442 }
12443
12444 #ifdef DWARF2_UNWIND_INFO
12445 do_cfi = dwarf2out_do_cfi_asm ();
12446 if (do_cfi)
12447 fprintf (asm_out_file, "\t.cfi_startproc\n");
12448 #endif
12449 if (flag_delayed_branch)
12450 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12451 reg_name, reg_name);
12452 else
12453 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12454 reg_name, reg_name);
12455 #ifdef DWARF2_UNWIND_INFO
12456 if (do_cfi)
12457 fprintf (asm_out_file, "\t.cfi_endproc\n");
12458 #endif
12459 }
12460
12461 if (NEED_INDICATE_EXEC_STACK)
12462 file_end_indicate_exec_stack ();
12463
12464 #ifdef TARGET_SOLARIS
12465 solaris_file_end ();
12466 #endif
12467 }
12468
12469 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12470 /* Implement TARGET_MANGLE_TYPE. */
12471
12472 static const char *
12473 sparc_mangle_type (const_tree type)
12474 {
12475 if (TARGET_ARCH32
12476 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12477 && TARGET_LONG_DOUBLE_128)
12478 return "g";
12479
12480 /* For all other types, use normal C++ mangling. */
12481 return NULL;
12482 }
12483 #endif
12484
12485 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12486 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12487 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12488
12489 void
12490 sparc_emit_membar_for_model (enum memmodel model,
12491 int load_store, int before_after)
12492 {
12493 /* Bits for the MEMBAR mmask field. */
12494 const int LoadLoad = 1;
12495 const int StoreLoad = 2;
12496 const int LoadStore = 4;
12497 const int StoreStore = 8;
12498
12499 int mm = 0, implied = 0;
12500
12501 switch (sparc_memory_model)
12502 {
12503 case SMM_SC:
12504 /* Sequential Consistency. All memory transactions are immediately
12505 visible in sequential execution order. No barriers needed. */
12506 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12507 break;
12508
12509 case SMM_TSO:
12510 /* Total Store Ordering: all memory transactions with store semantics
12511 are followed by an implied StoreStore. */
12512 implied |= StoreStore;
12513
12514 /* If we're not looking for a raw barrer (before+after), then atomic
12515 operations get the benefit of being both load and store. */
12516 if (load_store == 3 && before_after == 1)
12517 implied |= StoreLoad;
12518 /* FALLTHRU */
12519
12520 case SMM_PSO:
12521 /* Partial Store Ordering: all memory transactions with load semantics
12522 are followed by an implied LoadLoad | LoadStore. */
12523 implied |= LoadLoad | LoadStore;
12524
12525 /* If we're not looking for a raw barrer (before+after), then atomic
12526 operations get the benefit of being both load and store. */
12527 if (load_store == 3 && before_after == 2)
12528 implied |= StoreLoad | StoreStore;
12529 /* FALLTHRU */
12530
12531 case SMM_RMO:
12532 /* Relaxed Memory Ordering: no implicit bits. */
12533 break;
12534
12535 default:
12536 gcc_unreachable ();
12537 }
12538
12539 if (before_after & 1)
12540 {
12541 if (is_mm_release (model) || is_mm_acq_rel (model)
12542 || is_mm_seq_cst (model))
12543 {
12544 if (load_store & 1)
12545 mm |= LoadLoad | StoreLoad;
12546 if (load_store & 2)
12547 mm |= LoadStore | StoreStore;
12548 }
12549 }
12550 if (before_after & 2)
12551 {
12552 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12553 || is_mm_seq_cst (model))
12554 {
12555 if (load_store & 1)
12556 mm |= LoadLoad | LoadStore;
12557 if (load_store & 2)
12558 mm |= StoreLoad | StoreStore;
12559 }
12560 }
12561
12562 /* Remove the bits implied by the system memory model. */
12563 mm &= ~implied;
12564
12565 /* For raw barriers (before+after), always emit a barrier.
12566 This will become a compile-time barrier if needed. */
12567 if (mm || before_after == 3)
12568 emit_insn (gen_membar (GEN_INT (mm)));
12569 }
12570
12571 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12572 compare and swap on the word containing the byte or half-word. */
12573
12574 static void
12575 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12576 rtx oldval, rtx newval)
12577 {
12578 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12579 rtx addr = gen_reg_rtx (Pmode);
12580 rtx off = gen_reg_rtx (SImode);
12581 rtx oldv = gen_reg_rtx (SImode);
12582 rtx newv = gen_reg_rtx (SImode);
12583 rtx oldvalue = gen_reg_rtx (SImode);
12584 rtx newvalue = gen_reg_rtx (SImode);
12585 rtx res = gen_reg_rtx (SImode);
12586 rtx resv = gen_reg_rtx (SImode);
12587 rtx memsi, val, mask, cc;
12588
12589 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12590
12591 if (Pmode != SImode)
12592 addr1 = gen_lowpart (SImode, addr1);
12593 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12594
12595 memsi = gen_rtx_MEM (SImode, addr);
12596 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12597 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12598
12599 val = copy_to_reg (memsi);
12600
12601 emit_insn (gen_rtx_SET (off,
12602 gen_rtx_XOR (SImode, off,
12603 GEN_INT (GET_MODE (mem) == QImode
12604 ? 3 : 2))));
12605
12606 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12607
12608 if (GET_MODE (mem) == QImode)
12609 mask = force_reg (SImode, GEN_INT (0xff));
12610 else
12611 mask = force_reg (SImode, GEN_INT (0xffff));
12612
12613 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12614
12615 emit_insn (gen_rtx_SET (val,
12616 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12617 val)));
12618
12619 oldval = gen_lowpart (SImode, oldval);
12620 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12621
12622 newval = gen_lowpart_common (SImode, newval);
12623 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12624
12625 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12626
12627 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12628
12629 rtx_code_label *end_label = gen_label_rtx ();
12630 rtx_code_label *loop_label = gen_label_rtx ();
12631 emit_label (loop_label);
12632
12633 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12634
12635 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12636
12637 emit_move_insn (bool_result, const1_rtx);
12638
12639 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12640
12641 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12642
12643 emit_insn (gen_rtx_SET (resv,
12644 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12645 res)));
12646
12647 emit_move_insn (bool_result, const0_rtx);
12648
12649 cc = gen_compare_reg_1 (NE, resv, val);
12650 emit_insn (gen_rtx_SET (val, resv));
12651
12652 /* Use cbranchcc4 to separate the compare and branch! */
12653 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12654 cc, const0_rtx, loop_label));
12655
12656 emit_label (end_label);
12657
12658 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12659
12660 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12661
12662 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12663 }
12664
12665 /* Expand code to perform a compare-and-swap. */
12666
12667 void
12668 sparc_expand_compare_and_swap (rtx operands[])
12669 {
12670 rtx bval, retval, mem, oldval, newval;
12671 machine_mode mode;
12672 enum memmodel model;
12673
12674 bval = operands[0];
12675 retval = operands[1];
12676 mem = operands[2];
12677 oldval = operands[3];
12678 newval = operands[4];
12679 model = (enum memmodel) INTVAL (operands[6]);
12680 mode = GET_MODE (mem);
12681
12682 sparc_emit_membar_for_model (model, 3, 1);
12683
12684 if (reg_overlap_mentioned_p (retval, oldval))
12685 oldval = copy_to_reg (oldval);
12686
12687 if (mode == QImode || mode == HImode)
12688 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12689 else
12690 {
12691 rtx (*gen) (rtx, rtx, rtx, rtx);
12692 rtx x;
12693
12694 if (mode == SImode)
12695 gen = gen_atomic_compare_and_swapsi_1;
12696 else
12697 gen = gen_atomic_compare_and_swapdi_1;
12698 emit_insn (gen (retval, mem, oldval, newval));
12699
12700 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12701 if (x != bval)
12702 convert_move (bval, x, 1);
12703 }
12704
12705 sparc_emit_membar_for_model (model, 3, 2);
12706 }
12707
12708 void
12709 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12710 {
12711 rtx t_1, t_2, t_3;
12712
12713 sel = gen_lowpart (DImode, sel);
12714 switch (vmode)
12715 {
12716 case E_V2SImode:
12717 /* inp = xxxxxxxAxxxxxxxB */
12718 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12719 NULL_RTX, 1, OPTAB_DIRECT);
12720 /* t_1 = ....xxxxxxxAxxx. */
12721 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12722 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12723 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12724 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12725 /* sel = .......B */
12726 /* t_1 = ...A.... */
12727 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12728 /* sel = ...A...B */
12729 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12730 /* sel = AAAABBBB * 4 */
12731 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12732 /* sel = { A*4, A*4+1, A*4+2, ... } */
12733 break;
12734
12735 case E_V4HImode:
12736 /* inp = xxxAxxxBxxxCxxxD */
12737 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12738 NULL_RTX, 1, OPTAB_DIRECT);
12739 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12740 NULL_RTX, 1, OPTAB_DIRECT);
12741 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12742 NULL_RTX, 1, OPTAB_DIRECT);
12743 /* t_1 = ..xxxAxxxBxxxCxx */
12744 /* t_2 = ....xxxAxxxBxxxC */
12745 /* t_3 = ......xxxAxxxBxx */
12746 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12747 GEN_INT (0x07),
12748 NULL_RTX, 1, OPTAB_DIRECT);
12749 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12750 GEN_INT (0x0700),
12751 NULL_RTX, 1, OPTAB_DIRECT);
12752 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12753 GEN_INT (0x070000),
12754 NULL_RTX, 1, OPTAB_DIRECT);
12755 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12756 GEN_INT (0x07000000),
12757 NULL_RTX, 1, OPTAB_DIRECT);
12758 /* sel = .......D */
12759 /* t_1 = .....C.. */
12760 /* t_2 = ...B.... */
12761 /* t_3 = .A...... */
12762 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12763 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12764 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12765 /* sel = .A.B.C.D */
12766 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12767 /* sel = AABBCCDD * 2 */
12768 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12769 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12770 break;
12771
12772 case E_V8QImode:
12773 /* input = xAxBxCxDxExFxGxH */
12774 sel = expand_simple_binop (DImode, AND, sel,
12775 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12776 | 0x0f0f0f0f),
12777 NULL_RTX, 1, OPTAB_DIRECT);
12778 /* sel = .A.B.C.D.E.F.G.H */
12779 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12780 NULL_RTX, 1, OPTAB_DIRECT);
12781 /* t_1 = ..A.B.C.D.E.F.G. */
12782 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12783 NULL_RTX, 1, OPTAB_DIRECT);
12784 /* sel = .AABBCCDDEEFFGGH */
12785 sel = expand_simple_binop (DImode, AND, sel,
12786 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12787 | 0xff00ff),
12788 NULL_RTX, 1, OPTAB_DIRECT);
12789 /* sel = ..AB..CD..EF..GH */
12790 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12791 NULL_RTX, 1, OPTAB_DIRECT);
12792 /* t_1 = ....AB..CD..EF.. */
12793 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12794 NULL_RTX, 1, OPTAB_DIRECT);
12795 /* sel = ..ABABCDCDEFEFGH */
12796 sel = expand_simple_binop (DImode, AND, sel,
12797 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12798 NULL_RTX, 1, OPTAB_DIRECT);
12799 /* sel = ....ABCD....EFGH */
12800 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12801 NULL_RTX, 1, OPTAB_DIRECT);
12802 /* t_1 = ........ABCD.... */
12803 sel = gen_lowpart (SImode, sel);
12804 t_1 = gen_lowpart (SImode, t_1);
12805 break;
12806
12807 default:
12808 gcc_unreachable ();
12809 }
12810
12811 /* Always perform the final addition/merge within the bmask insn. */
12812 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12813 }
12814
12815 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12816
12817 static bool
12818 sparc_frame_pointer_required (void)
12819 {
12820 /* If the stack pointer is dynamically modified in the function, it cannot
12821 serve as the frame pointer. */
12822 if (cfun->calls_alloca)
12823 return true;
12824
12825 /* If the function receives nonlocal gotos, it needs to save the frame
12826 pointer in the nonlocal_goto_save_area object. */
12827 if (cfun->has_nonlocal_label)
12828 return true;
12829
12830 /* In flat mode, that's it. */
12831 if (TARGET_FLAT)
12832 return false;
12833
12834 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12835 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12836 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12837 }
12838
12839 /* The way this is structured, we can't eliminate SFP in favor of SP
12840 if the frame pointer is required: we want to use the SFP->HFP elimination
12841 in that case. But the test in update_eliminables doesn't know we are
12842 assuming below that we only do the former elimination. */
12843
12844 static bool
12845 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12846 {
12847 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12848 }
12849
12850 /* Return the hard frame pointer directly to bypass the stack bias. */
12851
12852 static rtx
12853 sparc_builtin_setjmp_frame_value (void)
12854 {
12855 return hard_frame_pointer_rtx;
12856 }
12857
12858 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12859 they won't be allocated. */
12860
12861 static void
12862 sparc_conditional_register_usage (void)
12863 {
12864 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12865 {
12866 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12867 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12868 }
12869 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12870 /* then honor it. */
12871 if (TARGET_ARCH32 && fixed_regs[5])
12872 fixed_regs[5] = 1;
12873 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12874 fixed_regs[5] = 0;
12875 if (! TARGET_V9)
12876 {
12877 int regno;
12878 for (regno = SPARC_FIRST_V9_FP_REG;
12879 regno <= SPARC_LAST_V9_FP_REG;
12880 regno++)
12881 fixed_regs[regno] = 1;
12882 /* %fcc0 is used by v8 and v9. */
12883 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12884 regno <= SPARC_LAST_V9_FCC_REG;
12885 regno++)
12886 fixed_regs[regno] = 1;
12887 }
12888 if (! TARGET_FPU)
12889 {
12890 int regno;
12891 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12892 fixed_regs[regno] = 1;
12893 }
12894 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12895 /* then honor it. Likewise with g3 and g4. */
12896 if (fixed_regs[2] == 2)
12897 fixed_regs[2] = ! TARGET_APP_REGS;
12898 if (fixed_regs[3] == 2)
12899 fixed_regs[3] = ! TARGET_APP_REGS;
12900 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12901 fixed_regs[4] = ! TARGET_APP_REGS;
12902 else if (TARGET_CM_EMBMEDANY)
12903 fixed_regs[4] = 1;
12904 else if (fixed_regs[4] == 2)
12905 fixed_regs[4] = 0;
12906 if (TARGET_FLAT)
12907 {
12908 int regno;
12909 /* Disable leaf functions. */
12910 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12911 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12912 leaf_reg_remap [regno] = regno;
12913 }
12914 if (TARGET_VIS)
12915 global_regs[SPARC_GSR_REG] = 1;
12916 }
12917
12918 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12919
12920 - We can't load constants into FP registers.
12921 - We can't load FP constants into integer registers when soft-float,
12922 because there is no soft-float pattern with a r/F constraint.
12923 - We can't load FP constants into integer registers for TFmode unless
12924 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12925 - Try and reload integer constants (symbolic or otherwise) back into
12926 registers directly, rather than having them dumped to memory. */
12927
12928 static reg_class_t
12929 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12930 {
12931 machine_mode mode = GET_MODE (x);
12932 if (CONSTANT_P (x))
12933 {
12934 if (FP_REG_CLASS_P (rclass)
12935 || rclass == GENERAL_OR_FP_REGS
12936 || rclass == GENERAL_OR_EXTRA_FP_REGS
12937 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12938 || (mode == TFmode && ! const_zero_operand (x, mode)))
12939 return NO_REGS;
12940
12941 if (GET_MODE_CLASS (mode) == MODE_INT)
12942 return GENERAL_REGS;
12943
12944 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12945 {
12946 if (! FP_REG_CLASS_P (rclass)
12947 || !(const_zero_operand (x, mode)
12948 || const_all_ones_operand (x, mode)))
12949 return NO_REGS;
12950 }
12951 }
12952
12953 if (TARGET_VIS3
12954 && ! TARGET_ARCH64
12955 && (rclass == EXTRA_FP_REGS
12956 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12957 {
12958 int regno = true_regnum (x);
12959
12960 if (SPARC_INT_REG_P (regno))
12961 return (rclass == EXTRA_FP_REGS
12962 ? FP_REGS : GENERAL_OR_FP_REGS);
12963 }
12964
12965 return rclass;
12966 }
12967
12968 /* Return true if we use LRA instead of reload pass. */
12969
12970 static bool
12971 sparc_lra_p (void)
12972 {
12973 return TARGET_LRA;
12974 }
12975
12976 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12977 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12978
12979 const char *
12980 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12981 {
12982 char mulstr[32];
12983
12984 gcc_assert (! TARGET_ARCH64);
12985
12986 if (sparc_check_64 (operands[1], insn) <= 0)
12987 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12988 if (which_alternative == 1)
12989 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12990 if (GET_CODE (operands[2]) == CONST_INT)
12991 {
12992 if (which_alternative == 1)
12993 {
12994 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12995 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12996 output_asm_insn (mulstr, operands);
12997 return "srlx\t%L0, 32, %H0";
12998 }
12999 else
13000 {
13001 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13002 output_asm_insn ("or\t%L1, %3, %3", operands);
13003 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13004 output_asm_insn (mulstr, operands);
13005 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13006 return "mov\t%3, %L0";
13007 }
13008 }
13009 else if (rtx_equal_p (operands[1], operands[2]))
13010 {
13011 if (which_alternative == 1)
13012 {
13013 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13014 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13015 output_asm_insn (mulstr, operands);
13016 return "srlx\t%L0, 32, %H0";
13017 }
13018 else
13019 {
13020 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13021 output_asm_insn ("or\t%L1, %3, %3", operands);
13022 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13023 output_asm_insn (mulstr, operands);
13024 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13025 return "mov\t%3, %L0";
13026 }
13027 }
13028 if (sparc_check_64 (operands[2], insn) <= 0)
13029 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13030 if (which_alternative == 1)
13031 {
13032 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13033 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13034 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13035 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13036 output_asm_insn (mulstr, operands);
13037 return "srlx\t%L0, 32, %H0";
13038 }
13039 else
13040 {
13041 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13042 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13043 output_asm_insn ("or\t%L1, %3, %3", operands);
13044 output_asm_insn ("or\t%L2, %4, %4", operands);
13045 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13046 output_asm_insn (mulstr, operands);
13047 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13048 return "mov\t%3, %L0";
13049 }
13050 }
13051
13052 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13053 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13054 and INNER_MODE are the modes describing TARGET. */
13055
13056 static void
13057 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13058 machine_mode inner_mode)
13059 {
13060 rtx t1, final_insn, sel;
13061 int bmask;
13062
13063 t1 = gen_reg_rtx (mode);
13064
13065 elt = convert_modes (SImode, inner_mode, elt, true);
13066 emit_move_insn (gen_lowpart(SImode, t1), elt);
13067
13068 switch (mode)
13069 {
13070 case E_V2SImode:
13071 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13072 bmask = 0x45674567;
13073 break;
13074 case E_V4HImode:
13075 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13076 bmask = 0x67676767;
13077 break;
13078 case E_V8QImode:
13079 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13080 bmask = 0x77777777;
13081 break;
13082 default:
13083 gcc_unreachable ();
13084 }
13085
13086 sel = force_reg (SImode, GEN_INT (bmask));
13087 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13088 emit_insn (final_insn);
13089 }
13090
13091 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13092 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13093
13094 static void
13095 vector_init_fpmerge (rtx target, rtx elt)
13096 {
13097 rtx t1, t2, t2_low, t3, t3_low;
13098
13099 t1 = gen_reg_rtx (V4QImode);
13100 elt = convert_modes (SImode, QImode, elt, true);
13101 emit_move_insn (gen_lowpart (SImode, t1), elt);
13102
13103 t2 = gen_reg_rtx (V8QImode);
13104 t2_low = gen_lowpart (V4QImode, t2);
13105 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13106
13107 t3 = gen_reg_rtx (V8QImode);
13108 t3_low = gen_lowpart (V4QImode, t3);
13109 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13110
13111 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13112 }
13113
13114 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13115 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13116
13117 static void
13118 vector_init_faligndata (rtx target, rtx elt)
13119 {
13120 rtx t1 = gen_reg_rtx (V4HImode);
13121 int i;
13122
13123 elt = convert_modes (SImode, HImode, elt, true);
13124 emit_move_insn (gen_lowpart (SImode, t1), elt);
13125
13126 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13127 force_reg (SImode, GEN_INT (6)),
13128 const0_rtx));
13129
13130 for (i = 0; i < 4; i++)
13131 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13132 }
13133
13134 /* Emit code to initialize TARGET to values for individual fields VALS. */
13135
13136 void
13137 sparc_expand_vector_init (rtx target, rtx vals)
13138 {
13139 const machine_mode mode = GET_MODE (target);
13140 const machine_mode inner_mode = GET_MODE_INNER (mode);
13141 const int n_elts = GET_MODE_NUNITS (mode);
13142 int i, n_var = 0;
13143 bool all_same = true;
13144 rtx mem;
13145
13146 for (i = 0; i < n_elts; i++)
13147 {
13148 rtx x = XVECEXP (vals, 0, i);
13149 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13150 n_var++;
13151
13152 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13153 all_same = false;
13154 }
13155
13156 if (n_var == 0)
13157 {
13158 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13159 return;
13160 }
13161
13162 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13163 {
13164 if (GET_MODE_SIZE (inner_mode) == 4)
13165 {
13166 emit_move_insn (gen_lowpart (SImode, target),
13167 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13168 return;
13169 }
13170 else if (GET_MODE_SIZE (inner_mode) == 8)
13171 {
13172 emit_move_insn (gen_lowpart (DImode, target),
13173 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13174 return;
13175 }
13176 }
13177 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13178 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13179 {
13180 emit_move_insn (gen_highpart (word_mode, target),
13181 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13182 emit_move_insn (gen_lowpart (word_mode, target),
13183 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13184 return;
13185 }
13186
13187 if (all_same && GET_MODE_SIZE (mode) == 8)
13188 {
13189 if (TARGET_VIS2)
13190 {
13191 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13192 return;
13193 }
13194 if (mode == V8QImode)
13195 {
13196 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13197 return;
13198 }
13199 if (mode == V4HImode)
13200 {
13201 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13202 return;
13203 }
13204 }
13205
13206 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13207 for (i = 0; i < n_elts; i++)
13208 emit_move_insn (adjust_address_nv (mem, inner_mode,
13209 i * GET_MODE_SIZE (inner_mode)),
13210 XVECEXP (vals, 0, i));
13211 emit_move_insn (target, mem);
13212 }
13213
13214 /* Implement TARGET_SECONDARY_RELOAD. */
13215
13216 static reg_class_t
13217 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13218 machine_mode mode, secondary_reload_info *sri)
13219 {
13220 enum reg_class rclass = (enum reg_class) rclass_i;
13221
13222 sri->icode = CODE_FOR_nothing;
13223 sri->extra_cost = 0;
13224
13225 /* We need a temporary when loading/storing a HImode/QImode value
13226 between memory and the FPU registers. This can happen when combine puts
13227 a paradoxical subreg in a float/fix conversion insn. */
13228 if (FP_REG_CLASS_P (rclass)
13229 && (mode == HImode || mode == QImode)
13230 && (GET_CODE (x) == MEM
13231 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13232 && true_regnum (x) == -1)))
13233 return GENERAL_REGS;
13234
13235 /* On 32-bit we need a temporary when loading/storing a DFmode value
13236 between unaligned memory and the upper FPU registers. */
13237 if (TARGET_ARCH32
13238 && rclass == EXTRA_FP_REGS
13239 && mode == DFmode
13240 && GET_CODE (x) == MEM
13241 && ! mem_min_alignment (x, 8))
13242 return FP_REGS;
13243
13244 if (((TARGET_CM_MEDANY
13245 && symbolic_operand (x, mode))
13246 || (TARGET_CM_EMBMEDANY
13247 && text_segment_operand (x, mode)))
13248 && ! flag_pic)
13249 {
13250 if (in_p)
13251 sri->icode = direct_optab_handler (reload_in_optab, mode);
13252 else
13253 sri->icode = direct_optab_handler (reload_out_optab, mode);
13254 return NO_REGS;
13255 }
13256
13257 if (TARGET_VIS3 && TARGET_ARCH32)
13258 {
13259 int regno = true_regnum (x);
13260
13261 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13262 to move 8-byte values in 4-byte pieces. This only works via
13263 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13264 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13265 an FP_REGS intermediate move. */
13266 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13267 || ((general_or_i64_p (rclass)
13268 || rclass == GENERAL_OR_FP_REGS)
13269 && SPARC_FP_REG_P (regno)))
13270 {
13271 sri->extra_cost = 2;
13272 return FP_REGS;
13273 }
13274 }
13275
13276 return NO_REGS;
13277 }
13278
13279 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13280
13281 On SPARC when not VIS3 it is not possible to directly move data
13282 between GENERAL_REGS and FP_REGS. */
13283
13284 static bool
13285 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13286 reg_class_t class2)
13287 {
13288 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13289 && (! TARGET_VIS3
13290 || GET_MODE_SIZE (mode) > 8
13291 || GET_MODE_SIZE (mode) < 4));
13292 }
13293
13294 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13295
13296 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13297 because the movsi and movsf patterns don't handle r/f moves.
13298 For v8 we copy the default definition. */
13299
13300 static machine_mode
13301 sparc_secondary_memory_needed_mode (machine_mode mode)
13302 {
13303 if (TARGET_ARCH64)
13304 {
13305 if (GET_MODE_BITSIZE (mode) < 32)
13306 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13307 return mode;
13308 }
13309 else
13310 {
13311 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13312 return mode_for_size (BITS_PER_WORD,
13313 GET_MODE_CLASS (mode), 0).require ();
13314 return mode;
13315 }
13316 }
13317
13318 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13319 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13320
13321 bool
13322 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13323 {
13324 enum rtx_code rc = GET_CODE (operands[1]);
13325 machine_mode cmp_mode;
13326 rtx cc_reg, dst, cmp;
13327
13328 cmp = operands[1];
13329 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13330 return false;
13331
13332 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13333 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13334
13335 cmp_mode = GET_MODE (XEXP (cmp, 0));
13336 rc = GET_CODE (cmp);
13337
13338 dst = operands[0];
13339 if (! rtx_equal_p (operands[2], dst)
13340 && ! rtx_equal_p (operands[3], dst))
13341 {
13342 if (reg_overlap_mentioned_p (dst, cmp))
13343 dst = gen_reg_rtx (mode);
13344
13345 emit_move_insn (dst, operands[3]);
13346 }
13347 else if (operands[2] == dst)
13348 {
13349 operands[2] = operands[3];
13350
13351 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13352 rc = reverse_condition_maybe_unordered (rc);
13353 else
13354 rc = reverse_condition (rc);
13355 }
13356
13357 if (XEXP (cmp, 1) == const0_rtx
13358 && GET_CODE (XEXP (cmp, 0)) == REG
13359 && cmp_mode == DImode
13360 && v9_regcmp_p (rc))
13361 cc_reg = XEXP (cmp, 0);
13362 else
13363 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13364
13365 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13366
13367 emit_insn (gen_rtx_SET (dst,
13368 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13369
13370 if (dst != operands[0])
13371 emit_move_insn (operands[0], dst);
13372
13373 return true;
13374 }
13375
13376 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13377 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13378 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13379 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13380 code to be used for the condition mask. */
13381
13382 void
13383 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13384 {
13385 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13386 enum rtx_code code = GET_CODE (operands[3]);
13387
13388 mask = gen_reg_rtx (Pmode);
13389 cop0 = operands[4];
13390 cop1 = operands[5];
13391 if (code == LT || code == GE)
13392 {
13393 rtx t;
13394
13395 code = swap_condition (code);
13396 t = cop0; cop0 = cop1; cop1 = t;
13397 }
13398
13399 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13400
13401 fcmp = gen_rtx_UNSPEC (Pmode,
13402 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13403 fcode);
13404
13405 cmask = gen_rtx_UNSPEC (DImode,
13406 gen_rtvec (2, mask, gsr),
13407 ccode);
13408
13409 bshuf = gen_rtx_UNSPEC (mode,
13410 gen_rtvec (3, operands[1], operands[2], gsr),
13411 UNSPEC_BSHUFFLE);
13412
13413 emit_insn (gen_rtx_SET (mask, fcmp));
13414 emit_insn (gen_rtx_SET (gsr, cmask));
13415
13416 emit_insn (gen_rtx_SET (operands[0], bshuf));
13417 }
13418
13419 /* On sparc, any mode which naturally allocates into the float
13420 registers should return 4 here. */
13421
13422 unsigned int
13423 sparc_regmode_natural_size (machine_mode mode)
13424 {
13425 int size = UNITS_PER_WORD;
13426
13427 if (TARGET_ARCH64)
13428 {
13429 enum mode_class mclass = GET_MODE_CLASS (mode);
13430
13431 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13432 size = 4;
13433 }
13434
13435 return size;
13436 }
13437
13438 /* Implement TARGET_HARD_REGNO_NREGS.
13439
13440 On SPARC, ordinary registers hold 32 bits worth; this means both
13441 integer and floating point registers. On v9, integer regs hold 64
13442 bits worth; floating point regs hold 32 bits worth (this includes the
13443 new fp regs as even the odd ones are included in the hard register
13444 count). */
13445
13446 static unsigned int
13447 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13448 {
13449 if (regno == SPARC_GSR_REG)
13450 return 1;
13451 if (TARGET_ARCH64)
13452 {
13453 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13454 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13455 return CEIL (GET_MODE_SIZE (mode), 4);
13456 }
13457 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13458 }
13459
13460 /* Implement TARGET_HARD_REGNO_MODE_OK.
13461
13462 ??? Because of the funny way we pass parameters we should allow certain
13463 ??? types of float/complex values to be in integer registers during
13464 ??? RTL generation. This only matters on arch32. */
13465
13466 static bool
13467 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13468 {
13469 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13470 }
13471
13472 /* Implement TARGET_MODES_TIEABLE_P.
13473
13474 For V9 we have to deal with the fact that only the lower 32 floating
13475 point registers are 32-bit addressable. */
13476
13477 static bool
13478 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13479 {
13480 enum mode_class mclass1, mclass2;
13481 unsigned short size1, size2;
13482
13483 if (mode1 == mode2)
13484 return true;
13485
13486 mclass1 = GET_MODE_CLASS (mode1);
13487 mclass2 = GET_MODE_CLASS (mode2);
13488 if (mclass1 != mclass2)
13489 return false;
13490
13491 if (! TARGET_V9)
13492 return true;
13493
13494 /* Classes are the same and we are V9 so we have to deal with upper
13495 vs. lower floating point registers. If one of the modes is a
13496 4-byte mode, and the other is not, we have to mark them as not
13497 tieable because only the lower 32 floating point register are
13498 addressable 32-bits at a time.
13499
13500 We can't just test explicitly for SFmode, otherwise we won't
13501 cover the vector mode cases properly. */
13502
13503 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13504 return true;
13505
13506 size1 = GET_MODE_SIZE (mode1);
13507 size2 = GET_MODE_SIZE (mode2);
13508 if ((size1 > 4 && size2 == 4)
13509 || (size2 > 4 && size1 == 4))
13510 return false;
13511
13512 return true;
13513 }
13514
13515 /* Implement TARGET_CSTORE_MODE. */
13516
13517 static scalar_int_mode
13518 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13519 {
13520 return (TARGET_ARCH64 ? DImode : SImode);
13521 }
13522
13523 /* Return the compound expression made of T1 and T2. */
13524
13525 static inline tree
13526 compound_expr (tree t1, tree t2)
13527 {
13528 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13529 }
13530
13531 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13532
13533 static void
13534 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13535 {
13536 if (!TARGET_FPU)
13537 return;
13538
13539 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13540 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13541
13542 /* We generate the equivalent of feholdexcept (&fenv_var):
13543
13544 unsigned int fenv_var;
13545 __builtin_store_fsr (&fenv_var);
13546
13547 unsigned int tmp1_var;
13548 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13549
13550 __builtin_load_fsr (&tmp1_var); */
13551
13552 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13553 TREE_ADDRESSABLE (fenv_var) = 1;
13554 tree fenv_addr = build_fold_addr_expr (fenv_var);
13555 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13556 tree hold_stfsr
13557 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13558 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13559
13560 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13561 TREE_ADDRESSABLE (tmp1_var) = 1;
13562 tree masked_fenv_var
13563 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13564 build_int_cst (unsigned_type_node,
13565 ~(accrued_exception_mask | trap_enable_mask)));
13566 tree hold_mask
13567 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13568 NULL_TREE, NULL_TREE);
13569
13570 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13571 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13572 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13573
13574 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13575
13576 /* We reload the value of tmp1_var to clear the exceptions:
13577
13578 __builtin_load_fsr (&tmp1_var); */
13579
13580 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13581
13582 /* We generate the equivalent of feupdateenv (&fenv_var):
13583
13584 unsigned int tmp2_var;
13585 __builtin_store_fsr (&tmp2_var);
13586
13587 __builtin_load_fsr (&fenv_var);
13588
13589 if (SPARC_LOW_FE_EXCEPT_VALUES)
13590 tmp2_var >>= 5;
13591 __atomic_feraiseexcept ((int) tmp2_var); */
13592
13593 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13594 TREE_ADDRESSABLE (tmp2_var) = 1;
13595 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13596 tree update_stfsr
13597 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13598 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13599
13600 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13601
13602 tree atomic_feraiseexcept
13603 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13604 tree update_call
13605 = build_call_expr (atomic_feraiseexcept, 1,
13606 fold_convert (integer_type_node, tmp2_var));
13607
13608 if (SPARC_LOW_FE_EXCEPT_VALUES)
13609 {
13610 tree shifted_tmp2_var
13611 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13612 build_int_cst (unsigned_type_node, 5));
13613 tree update_shift
13614 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13615 update_call = compound_expr (update_shift, update_call);
13616 }
13617
13618 *update
13619 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13620 }
13621
13622 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13623
13624 SImode loads to floating-point registers are not zero-extended.
13625 The definition for LOAD_EXTEND_OP specifies that integer loads
13626 narrower than BITS_PER_WORD will be zero-extended. As a result,
13627 we inhibit changes from SImode unless they are to a mode that is
13628 identical in size.
13629
13630 Likewise for SFmode, since word-mode paradoxical subregs are
13631 problematic on big-endian architectures. */
13632
13633 static bool
13634 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13635 reg_class_t rclass)
13636 {
13637 if (TARGET_ARCH64
13638 && GET_MODE_SIZE (from) == 4
13639 && GET_MODE_SIZE (to) != 4)
13640 return !reg_classes_intersect_p (rclass, FP_REGS);
13641 return true;
13642 }
13643
13644 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13645
13646 static HOST_WIDE_INT
13647 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13648 {
13649 if (TREE_CODE (exp) == STRING_CST)
13650 return MAX (align, FASTEST_ALIGNMENT);
13651 return align;
13652 }
13653
13654 #include "gt-sparc.h"