]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
PR83675: Restore TARGET_VIS2 check for SPARC vec_perm_const
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2018 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "common/common-target.h"
55 #include "gimplify.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "params.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63
64 /* This file should be included last. */
65 #include "target-def.h"
66
67 /* Processor costs */
68
69 struct processor_costs {
70 /* Integer load */
71 const int int_load;
72
73 /* Integer signed load */
74 const int int_sload;
75
76 /* Integer zeroed load */
77 const int int_zload;
78
79 /* Float load */
80 const int float_load;
81
82 /* fmov, fneg, fabs */
83 const int float_move;
84
85 /* fadd, fsub */
86 const int float_plusminus;
87
88 /* fcmp */
89 const int float_cmp;
90
91 /* fmov, fmovr */
92 const int float_cmove;
93
94 /* fmul */
95 const int float_mul;
96
97 /* fdivs */
98 const int float_div_sf;
99
100 /* fdivd */
101 const int float_div_df;
102
103 /* fsqrts */
104 const int float_sqrt_sf;
105
106 /* fsqrtd */
107 const int float_sqrt_df;
108
109 /* umul/smul */
110 const int int_mul;
111
112 /* mulX */
113 const int int_mulX;
114
115 /* integer multiply cost for each bit set past the most
116 significant 3, so the formula for multiply cost becomes:
117
118 if (rs1 < 0)
119 highest_bit = highest_clear_bit(rs1);
120 else
121 highest_bit = highest_set_bit(rs1);
122 if (highest_bit < 3)
123 highest_bit = 3;
124 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
125
126 A value of zero indicates that the multiply costs is fixed,
127 and not variable. */
128 const int int_mul_bit_factor;
129
130 /* udiv/sdiv */
131 const int int_div;
132
133 /* divX */
134 const int int_divX;
135
136 /* movcc, movr */
137 const int int_cmove;
138
139 /* penalty for shifts, due to scheduling rules etc. */
140 const int shift_penalty;
141 };
142
143 static const
144 struct processor_costs cypress_costs = {
145 COSTS_N_INSNS (2), /* int load */
146 COSTS_N_INSNS (2), /* int signed load */
147 COSTS_N_INSNS (2), /* int zeroed load */
148 COSTS_N_INSNS (2), /* float load */
149 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
150 COSTS_N_INSNS (5), /* fadd, fsub */
151 COSTS_N_INSNS (1), /* fcmp */
152 COSTS_N_INSNS (1), /* fmov, fmovr */
153 COSTS_N_INSNS (7), /* fmul */
154 COSTS_N_INSNS (37), /* fdivs */
155 COSTS_N_INSNS (37), /* fdivd */
156 COSTS_N_INSNS (63), /* fsqrts */
157 COSTS_N_INSNS (63), /* fsqrtd */
158 COSTS_N_INSNS (1), /* imul */
159 COSTS_N_INSNS (1), /* imulX */
160 0, /* imul bit factor */
161 COSTS_N_INSNS (1), /* idiv */
162 COSTS_N_INSNS (1), /* idivX */
163 COSTS_N_INSNS (1), /* movcc/movr */
164 0, /* shift penalty */
165 };
166
167 static const
168 struct processor_costs supersparc_costs = {
169 COSTS_N_INSNS (1), /* int load */
170 COSTS_N_INSNS (1), /* int signed load */
171 COSTS_N_INSNS (1), /* int zeroed load */
172 COSTS_N_INSNS (0), /* float load */
173 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
174 COSTS_N_INSNS (3), /* fadd, fsub */
175 COSTS_N_INSNS (3), /* fcmp */
176 COSTS_N_INSNS (1), /* fmov, fmovr */
177 COSTS_N_INSNS (3), /* fmul */
178 COSTS_N_INSNS (6), /* fdivs */
179 COSTS_N_INSNS (9), /* fdivd */
180 COSTS_N_INSNS (12), /* fsqrts */
181 COSTS_N_INSNS (12), /* fsqrtd */
182 COSTS_N_INSNS (4), /* imul */
183 COSTS_N_INSNS (4), /* imulX */
184 0, /* imul bit factor */
185 COSTS_N_INSNS (4), /* idiv */
186 COSTS_N_INSNS (4), /* idivX */
187 COSTS_N_INSNS (1), /* movcc/movr */
188 1, /* shift penalty */
189 };
190
191 static const
192 struct processor_costs hypersparc_costs = {
193 COSTS_N_INSNS (1), /* int load */
194 COSTS_N_INSNS (1), /* int signed load */
195 COSTS_N_INSNS (1), /* int zeroed load */
196 COSTS_N_INSNS (1), /* float load */
197 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
198 COSTS_N_INSNS (1), /* fadd, fsub */
199 COSTS_N_INSNS (1), /* fcmp */
200 COSTS_N_INSNS (1), /* fmov, fmovr */
201 COSTS_N_INSNS (1), /* fmul */
202 COSTS_N_INSNS (8), /* fdivs */
203 COSTS_N_INSNS (12), /* fdivd */
204 COSTS_N_INSNS (17), /* fsqrts */
205 COSTS_N_INSNS (17), /* fsqrtd */
206 COSTS_N_INSNS (17), /* imul */
207 COSTS_N_INSNS (17), /* imulX */
208 0, /* imul bit factor */
209 COSTS_N_INSNS (17), /* idiv */
210 COSTS_N_INSNS (17), /* idivX */
211 COSTS_N_INSNS (1), /* movcc/movr */
212 0, /* shift penalty */
213 };
214
215 static const
216 struct processor_costs leon_costs = {
217 COSTS_N_INSNS (1), /* int load */
218 COSTS_N_INSNS (1), /* int signed load */
219 COSTS_N_INSNS (1), /* int zeroed load */
220 COSTS_N_INSNS (1), /* float load */
221 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
222 COSTS_N_INSNS (1), /* fadd, fsub */
223 COSTS_N_INSNS (1), /* fcmp */
224 COSTS_N_INSNS (1), /* fmov, fmovr */
225 COSTS_N_INSNS (1), /* fmul */
226 COSTS_N_INSNS (15), /* fdivs */
227 COSTS_N_INSNS (15), /* fdivd */
228 COSTS_N_INSNS (23), /* fsqrts */
229 COSTS_N_INSNS (23), /* fsqrtd */
230 COSTS_N_INSNS (5), /* imul */
231 COSTS_N_INSNS (5), /* imulX */
232 0, /* imul bit factor */
233 COSTS_N_INSNS (5), /* idiv */
234 COSTS_N_INSNS (5), /* idivX */
235 COSTS_N_INSNS (1), /* movcc/movr */
236 0, /* shift penalty */
237 };
238
239 static const
240 struct processor_costs leon3_costs = {
241 COSTS_N_INSNS (1), /* int load */
242 COSTS_N_INSNS (1), /* int signed load */
243 COSTS_N_INSNS (1), /* int zeroed load */
244 COSTS_N_INSNS (1), /* float load */
245 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
246 COSTS_N_INSNS (1), /* fadd, fsub */
247 COSTS_N_INSNS (1), /* fcmp */
248 COSTS_N_INSNS (1), /* fmov, fmovr */
249 COSTS_N_INSNS (1), /* fmul */
250 COSTS_N_INSNS (14), /* fdivs */
251 COSTS_N_INSNS (15), /* fdivd */
252 COSTS_N_INSNS (22), /* fsqrts */
253 COSTS_N_INSNS (23), /* fsqrtd */
254 COSTS_N_INSNS (5), /* imul */
255 COSTS_N_INSNS (5), /* imulX */
256 0, /* imul bit factor */
257 COSTS_N_INSNS (35), /* idiv */
258 COSTS_N_INSNS (35), /* idivX */
259 COSTS_N_INSNS (1), /* movcc/movr */
260 0, /* shift penalty */
261 };
262
263 static const
264 struct processor_costs sparclet_costs = {
265 COSTS_N_INSNS (3), /* int load */
266 COSTS_N_INSNS (3), /* int signed load */
267 COSTS_N_INSNS (1), /* int zeroed load */
268 COSTS_N_INSNS (1), /* float load */
269 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
270 COSTS_N_INSNS (1), /* fadd, fsub */
271 COSTS_N_INSNS (1), /* fcmp */
272 COSTS_N_INSNS (1), /* fmov, fmovr */
273 COSTS_N_INSNS (1), /* fmul */
274 COSTS_N_INSNS (1), /* fdivs */
275 COSTS_N_INSNS (1), /* fdivd */
276 COSTS_N_INSNS (1), /* fsqrts */
277 COSTS_N_INSNS (1), /* fsqrtd */
278 COSTS_N_INSNS (5), /* imul */
279 COSTS_N_INSNS (5), /* imulX */
280 0, /* imul bit factor */
281 COSTS_N_INSNS (5), /* idiv */
282 COSTS_N_INSNS (5), /* idivX */
283 COSTS_N_INSNS (1), /* movcc/movr */
284 0, /* shift penalty */
285 };
286
287 static const
288 struct processor_costs ultrasparc_costs = {
289 COSTS_N_INSNS (2), /* int load */
290 COSTS_N_INSNS (3), /* int signed load */
291 COSTS_N_INSNS (2), /* int zeroed load */
292 COSTS_N_INSNS (2), /* float load */
293 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
294 COSTS_N_INSNS (4), /* fadd, fsub */
295 COSTS_N_INSNS (1), /* fcmp */
296 COSTS_N_INSNS (2), /* fmov, fmovr */
297 COSTS_N_INSNS (4), /* fmul */
298 COSTS_N_INSNS (13), /* fdivs */
299 COSTS_N_INSNS (23), /* fdivd */
300 COSTS_N_INSNS (13), /* fsqrts */
301 COSTS_N_INSNS (23), /* fsqrtd */
302 COSTS_N_INSNS (4), /* imul */
303 COSTS_N_INSNS (4), /* imulX */
304 2, /* imul bit factor */
305 COSTS_N_INSNS (37), /* idiv */
306 COSTS_N_INSNS (68), /* idivX */
307 COSTS_N_INSNS (2), /* movcc/movr */
308 2, /* shift penalty */
309 };
310
311 static const
312 struct processor_costs ultrasparc3_costs = {
313 COSTS_N_INSNS (2), /* int load */
314 COSTS_N_INSNS (3), /* int signed load */
315 COSTS_N_INSNS (3), /* int zeroed load */
316 COSTS_N_INSNS (2), /* float load */
317 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
318 COSTS_N_INSNS (4), /* fadd, fsub */
319 COSTS_N_INSNS (5), /* fcmp */
320 COSTS_N_INSNS (3), /* fmov, fmovr */
321 COSTS_N_INSNS (4), /* fmul */
322 COSTS_N_INSNS (17), /* fdivs */
323 COSTS_N_INSNS (20), /* fdivd */
324 COSTS_N_INSNS (20), /* fsqrts */
325 COSTS_N_INSNS (29), /* fsqrtd */
326 COSTS_N_INSNS (6), /* imul */
327 COSTS_N_INSNS (6), /* imulX */
328 0, /* imul bit factor */
329 COSTS_N_INSNS (40), /* idiv */
330 COSTS_N_INSNS (71), /* idivX */
331 COSTS_N_INSNS (2), /* movcc/movr */
332 0, /* shift penalty */
333 };
334
335 static const
336 struct processor_costs niagara_costs = {
337 COSTS_N_INSNS (3), /* int load */
338 COSTS_N_INSNS (3), /* int signed load */
339 COSTS_N_INSNS (3), /* int zeroed load */
340 COSTS_N_INSNS (9), /* float load */
341 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
342 COSTS_N_INSNS (8), /* fadd, fsub */
343 COSTS_N_INSNS (26), /* fcmp */
344 COSTS_N_INSNS (8), /* fmov, fmovr */
345 COSTS_N_INSNS (29), /* fmul */
346 COSTS_N_INSNS (54), /* fdivs */
347 COSTS_N_INSNS (83), /* fdivd */
348 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
349 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
350 COSTS_N_INSNS (11), /* imul */
351 COSTS_N_INSNS (11), /* imulX */
352 0, /* imul bit factor */
353 COSTS_N_INSNS (72), /* idiv */
354 COSTS_N_INSNS (72), /* idivX */
355 COSTS_N_INSNS (1), /* movcc/movr */
356 0, /* shift penalty */
357 };
358
359 static const
360 struct processor_costs niagara2_costs = {
361 COSTS_N_INSNS (3), /* int load */
362 COSTS_N_INSNS (3), /* int signed load */
363 COSTS_N_INSNS (3), /* int zeroed load */
364 COSTS_N_INSNS (3), /* float load */
365 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
366 COSTS_N_INSNS (6), /* fadd, fsub */
367 COSTS_N_INSNS (6), /* fcmp */
368 COSTS_N_INSNS (6), /* fmov, fmovr */
369 COSTS_N_INSNS (6), /* fmul */
370 COSTS_N_INSNS (19), /* fdivs */
371 COSTS_N_INSNS (33), /* fdivd */
372 COSTS_N_INSNS (19), /* fsqrts */
373 COSTS_N_INSNS (33), /* fsqrtd */
374 COSTS_N_INSNS (5), /* imul */
375 COSTS_N_INSNS (5), /* imulX */
376 0, /* imul bit factor */
377 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
378 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
379 COSTS_N_INSNS (1), /* movcc/movr */
380 0, /* shift penalty */
381 };
382
383 static const
384 struct processor_costs niagara3_costs = {
385 COSTS_N_INSNS (3), /* int load */
386 COSTS_N_INSNS (3), /* int signed load */
387 COSTS_N_INSNS (3), /* int zeroed load */
388 COSTS_N_INSNS (3), /* float load */
389 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
390 COSTS_N_INSNS (9), /* fadd, fsub */
391 COSTS_N_INSNS (9), /* fcmp */
392 COSTS_N_INSNS (9), /* fmov, fmovr */
393 COSTS_N_INSNS (9), /* fmul */
394 COSTS_N_INSNS (23), /* fdivs */
395 COSTS_N_INSNS (37), /* fdivd */
396 COSTS_N_INSNS (23), /* fsqrts */
397 COSTS_N_INSNS (37), /* fsqrtd */
398 COSTS_N_INSNS (9), /* imul */
399 COSTS_N_INSNS (9), /* imulX */
400 0, /* imul bit factor */
401 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
402 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
403 COSTS_N_INSNS (1), /* movcc/movr */
404 0, /* shift penalty */
405 };
406
407 static const
408 struct processor_costs niagara4_costs = {
409 COSTS_N_INSNS (5), /* int load */
410 COSTS_N_INSNS (5), /* int signed load */
411 COSTS_N_INSNS (5), /* int zeroed load */
412 COSTS_N_INSNS (5), /* float load */
413 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
414 COSTS_N_INSNS (11), /* fadd, fsub */
415 COSTS_N_INSNS (11), /* fcmp */
416 COSTS_N_INSNS (11), /* fmov, fmovr */
417 COSTS_N_INSNS (11), /* fmul */
418 COSTS_N_INSNS (24), /* fdivs */
419 COSTS_N_INSNS (37), /* fdivd */
420 COSTS_N_INSNS (24), /* fsqrts */
421 COSTS_N_INSNS (37), /* fsqrtd */
422 COSTS_N_INSNS (12), /* imul */
423 COSTS_N_INSNS (12), /* imulX */
424 0, /* imul bit factor */
425 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
426 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
427 COSTS_N_INSNS (1), /* movcc/movr */
428 0, /* shift penalty */
429 };
430
431 static const
432 struct processor_costs niagara7_costs = {
433 COSTS_N_INSNS (5), /* int load */
434 COSTS_N_INSNS (5), /* int signed load */
435 COSTS_N_INSNS (5), /* int zeroed load */
436 COSTS_N_INSNS (5), /* float load */
437 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
438 COSTS_N_INSNS (11), /* fadd, fsub */
439 COSTS_N_INSNS (11), /* fcmp */
440 COSTS_N_INSNS (11), /* fmov, fmovr */
441 COSTS_N_INSNS (11), /* fmul */
442 COSTS_N_INSNS (24), /* fdivs */
443 COSTS_N_INSNS (37), /* fdivd */
444 COSTS_N_INSNS (24), /* fsqrts */
445 COSTS_N_INSNS (37), /* fsqrtd */
446 COSTS_N_INSNS (12), /* imul */
447 COSTS_N_INSNS (12), /* imulX */
448 0, /* imul bit factor */
449 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
450 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
451 COSTS_N_INSNS (1), /* movcc/movr */
452 0, /* shift penalty */
453 };
454
455 static const
456 struct processor_costs m8_costs = {
457 COSTS_N_INSNS (3), /* int load */
458 COSTS_N_INSNS (3), /* int signed load */
459 COSTS_N_INSNS (3), /* int zeroed load */
460 COSTS_N_INSNS (3), /* float load */
461 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
462 COSTS_N_INSNS (9), /* fadd, fsub */
463 COSTS_N_INSNS (9), /* fcmp */
464 COSTS_N_INSNS (9), /* fmov, fmovr */
465 COSTS_N_INSNS (9), /* fmul */
466 COSTS_N_INSNS (26), /* fdivs */
467 COSTS_N_INSNS (30), /* fdivd */
468 COSTS_N_INSNS (33), /* fsqrts */
469 COSTS_N_INSNS (41), /* fsqrtd */
470 COSTS_N_INSNS (12), /* imul */
471 COSTS_N_INSNS (10), /* imulX */
472 0, /* imul bit factor */
473 COSTS_N_INSNS (57), /* udiv/sdiv */
474 COSTS_N_INSNS (30), /* udivx/sdivx */
475 COSTS_N_INSNS (1), /* movcc/movr */
476 0, /* shift penalty */
477 };
478
479 static const struct processor_costs *sparc_costs = &cypress_costs;
480
481 #ifdef HAVE_AS_RELAX_OPTION
482 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
483 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
484 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
485 somebody does not branch between the sethi and jmp. */
486 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
487 #else
488 #define LEAF_SIBCALL_SLOT_RESERVED_P \
489 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
490 #endif
491
492 /* Vector to say how input registers are mapped to output registers.
493 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
494 eliminate it. You must use -fomit-frame-pointer to get that. */
495 char leaf_reg_remap[] =
496 { 0, 1, 2, 3, 4, 5, 6, 7,
497 -1, -1, -1, -1, -1, -1, 14, -1,
498 -1, -1, -1, -1, -1, -1, -1, -1,
499 8, 9, 10, 11, 12, 13, -1, 15,
500
501 32, 33, 34, 35, 36, 37, 38, 39,
502 40, 41, 42, 43, 44, 45, 46, 47,
503 48, 49, 50, 51, 52, 53, 54, 55,
504 56, 57, 58, 59, 60, 61, 62, 63,
505 64, 65, 66, 67, 68, 69, 70, 71,
506 72, 73, 74, 75, 76, 77, 78, 79,
507 80, 81, 82, 83, 84, 85, 86, 87,
508 88, 89, 90, 91, 92, 93, 94, 95,
509 96, 97, 98, 99, 100, 101, 102};
510
511 /* Vector, indexed by hard register number, which contains 1
512 for a register that is allowable in a candidate for leaf
513 function treatment. */
514 char sparc_leaf_regs[] =
515 { 1, 1, 1, 1, 1, 1, 1, 1,
516 0, 0, 0, 0, 0, 0, 1, 0,
517 0, 0, 0, 0, 0, 0, 0, 0,
518 1, 1, 1, 1, 1, 1, 0, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1, 1,
525 1, 1, 1, 1, 1, 1, 1, 1,
526 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1};
528
529 struct GTY(()) machine_function
530 {
531 /* Size of the frame of the function. */
532 HOST_WIDE_INT frame_size;
533
534 /* Size of the frame of the function minus the register window save area
535 and the outgoing argument area. */
536 HOST_WIDE_INT apparent_frame_size;
537
538 /* Register we pretend the frame pointer is allocated to. Normally, this
539 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
540 record "offset" separately as it may be too big for (reg + disp). */
541 rtx frame_base_reg;
542 HOST_WIDE_INT frame_base_offset;
543
544 /* Number of global or FP registers to be saved (as 4-byte quantities). */
545 int n_global_fp_regs;
546
547 /* True if the current function is leaf and uses only leaf regs,
548 so that the SPARC leaf function optimization can be applied.
549 Private version of crtl->uses_only_leaf_regs, see
550 sparc_expand_prologue for the rationale. */
551 int leaf_function_p;
552
553 /* True if the prologue saves local or in registers. */
554 bool save_local_in_regs_p;
555
556 /* True if the data calculated by sparc_expand_prologue are valid. */
557 bool prologue_data_valid_p;
558 };
559
560 #define sparc_frame_size cfun->machine->frame_size
561 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
562 #define sparc_frame_base_reg cfun->machine->frame_base_reg
563 #define sparc_frame_base_offset cfun->machine->frame_base_offset
564 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
565 #define sparc_leaf_function_p cfun->machine->leaf_function_p
566 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
567 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
568
569 /* 1 if the next opcode is to be specially indented. */
570 int sparc_indent_opcode = 0;
571
572 static void sparc_option_override (void);
573 static void sparc_init_modes (void);
574 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
575 const_tree, bool, bool, int *, int *);
576
577 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
578 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
579
580 static void sparc_emit_set_const32 (rtx, rtx);
581 static void sparc_emit_set_const64 (rtx, rtx);
582 static void sparc_output_addr_vec (rtx);
583 static void sparc_output_addr_diff_vec (rtx);
584 static void sparc_output_deferred_case_vectors (void);
585 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
586 static bool sparc_legitimate_constant_p (machine_mode, rtx);
587 static rtx sparc_builtin_saveregs (void);
588 static int epilogue_renumber (rtx *, int);
589 static bool sparc_assemble_integer (rtx, unsigned int, int);
590 static int set_extends (rtx_insn *);
591 static void sparc_asm_function_prologue (FILE *);
592 static void sparc_asm_function_epilogue (FILE *);
593 #ifdef TARGET_SOLARIS
594 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
595 tree) ATTRIBUTE_UNUSED;
596 #endif
597 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
598 static int sparc_issue_rate (void);
599 static void sparc_sched_init (FILE *, int, int);
600 static int sparc_use_sched_lookahead (void);
601
602 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
603 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
604 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
605 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
606 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
607
608 static bool sparc_function_ok_for_sibcall (tree, tree);
609 static void sparc_init_libfuncs (void);
610 static void sparc_init_builtins (void);
611 static void sparc_fpu_init_builtins (void);
612 static void sparc_vis_init_builtins (void);
613 static tree sparc_builtin_decl (unsigned, bool);
614 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
615 static tree sparc_fold_builtin (tree, int, tree *, bool);
616 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
617 HOST_WIDE_INT, tree);
618 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
619 HOST_WIDE_INT, const_tree);
620 static struct machine_function * sparc_init_machine_status (void);
621 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
622 static rtx sparc_tls_get_addr (void);
623 static rtx sparc_tls_got (void);
624 static int sparc_register_move_cost (machine_mode,
625 reg_class_t, reg_class_t);
626 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
627 static rtx sparc_function_value (const_tree, const_tree, bool);
628 static rtx sparc_libcall_value (machine_mode, const_rtx);
629 static bool sparc_function_value_regno_p (const unsigned int);
630 static rtx sparc_struct_value_rtx (tree, int);
631 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
632 int *, const_tree, int);
633 static bool sparc_return_in_memory (const_tree, const_tree);
634 static bool sparc_strict_argument_naming (cumulative_args_t);
635 static void sparc_va_start (tree, rtx);
636 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
637 static bool sparc_vector_mode_supported_p (machine_mode);
638 static bool sparc_tls_referenced_p (rtx);
639 static rtx sparc_legitimize_tls_address (rtx);
640 static rtx sparc_legitimize_pic_address (rtx, rtx);
641 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
642 static rtx sparc_delegitimize_address (rtx);
643 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
644 static bool sparc_pass_by_reference (cumulative_args_t,
645 machine_mode, const_tree, bool);
646 static void sparc_function_arg_advance (cumulative_args_t,
647 machine_mode, const_tree, bool);
648 static rtx sparc_function_arg_1 (cumulative_args_t,
649 machine_mode, const_tree, bool, bool);
650 static rtx sparc_function_arg (cumulative_args_t,
651 machine_mode, const_tree, bool);
652 static rtx sparc_function_incoming_arg (cumulative_args_t,
653 machine_mode, const_tree, bool);
654 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
655 static unsigned int sparc_function_arg_boundary (machine_mode,
656 const_tree);
657 static int sparc_arg_partial_bytes (cumulative_args_t,
658 machine_mode, tree, bool);
659 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
660 static void sparc_file_end (void);
661 static bool sparc_frame_pointer_required (void);
662 static bool sparc_can_eliminate (const int, const int);
663 static rtx sparc_builtin_setjmp_frame_value (void);
664 static void sparc_conditional_register_usage (void);
665 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
666 static const char *sparc_mangle_type (const_tree);
667 #endif
668 static void sparc_trampoline_init (rtx, tree, rtx);
669 static machine_mode sparc_preferred_simd_mode (scalar_mode);
670 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
671 static bool sparc_lra_p (void);
672 static bool sparc_print_operand_punct_valid_p (unsigned char);
673 static void sparc_print_operand (FILE *, rtx, int);
674 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
675 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
676 machine_mode,
677 secondary_reload_info *);
678 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
679 reg_class_t);
680 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
681 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
682 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
683 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
684 static unsigned int sparc_min_arithmetic_precision (void);
685 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
686 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
687 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
688 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
689 reg_class_t);
690 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
691 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
692 const vec_perm_indices &);
693 \f
694 #ifdef SUBTARGET_ATTRIBUTE_TABLE
695 /* Table of valid machine attributes. */
696 static const struct attribute_spec sparc_attribute_table[] =
697 {
698 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
699 do_diagnostic, handler, exclude } */
700 SUBTARGET_ATTRIBUTE_TABLE,
701 { NULL, 0, 0, false, false, false, false, NULL, NULL }
702 };
703 #endif
704 \f
705 /* Option handling. */
706
707 /* Parsed value. */
708 enum cmodel sparc_cmodel;
709
710 char sparc_hard_reg_printed[8];
711
712 /* Initialize the GCC target structure. */
713
714 /* The default is to use .half rather than .short for aligned HI objects. */
715 #undef TARGET_ASM_ALIGNED_HI_OP
716 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
717
718 #undef TARGET_ASM_UNALIGNED_HI_OP
719 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
720 #undef TARGET_ASM_UNALIGNED_SI_OP
721 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
722 #undef TARGET_ASM_UNALIGNED_DI_OP
723 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
724
725 /* The target hook has to handle DI-mode values. */
726 #undef TARGET_ASM_INTEGER
727 #define TARGET_ASM_INTEGER sparc_assemble_integer
728
729 #undef TARGET_ASM_FUNCTION_PROLOGUE
730 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
731 #undef TARGET_ASM_FUNCTION_EPILOGUE
732 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
733
734 #undef TARGET_SCHED_ADJUST_COST
735 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
736 #undef TARGET_SCHED_ISSUE_RATE
737 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
738 #undef TARGET_SCHED_INIT
739 #define TARGET_SCHED_INIT sparc_sched_init
740 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
741 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
742
743 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
744 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
745
746 #undef TARGET_INIT_LIBFUNCS
747 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
748
749 #undef TARGET_LEGITIMIZE_ADDRESS
750 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
751 #undef TARGET_DELEGITIMIZE_ADDRESS
752 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
753 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
754 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
755
756 #undef TARGET_INIT_BUILTINS
757 #define TARGET_INIT_BUILTINS sparc_init_builtins
758 #undef TARGET_BUILTIN_DECL
759 #define TARGET_BUILTIN_DECL sparc_builtin_decl
760 #undef TARGET_EXPAND_BUILTIN
761 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
762 #undef TARGET_FOLD_BUILTIN
763 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
764
765 #if TARGET_TLS
766 #undef TARGET_HAVE_TLS
767 #define TARGET_HAVE_TLS true
768 #endif
769
770 #undef TARGET_CANNOT_FORCE_CONST_MEM
771 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
772
773 #undef TARGET_ASM_OUTPUT_MI_THUNK
774 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
775 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
776 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
777
778 #undef TARGET_RTX_COSTS
779 #define TARGET_RTX_COSTS sparc_rtx_costs
780 #undef TARGET_ADDRESS_COST
781 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
782 #undef TARGET_REGISTER_MOVE_COST
783 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
784
785 #undef TARGET_PROMOTE_FUNCTION_MODE
786 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
787
788 #undef TARGET_FUNCTION_VALUE
789 #define TARGET_FUNCTION_VALUE sparc_function_value
790 #undef TARGET_LIBCALL_VALUE
791 #define TARGET_LIBCALL_VALUE sparc_libcall_value
792 #undef TARGET_FUNCTION_VALUE_REGNO_P
793 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
794
795 #undef TARGET_STRUCT_VALUE_RTX
796 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
797 #undef TARGET_RETURN_IN_MEMORY
798 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
799 #undef TARGET_MUST_PASS_IN_STACK
800 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
801 #undef TARGET_PASS_BY_REFERENCE
802 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
803 #undef TARGET_ARG_PARTIAL_BYTES
804 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
805 #undef TARGET_FUNCTION_ARG_ADVANCE
806 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
807 #undef TARGET_FUNCTION_ARG
808 #define TARGET_FUNCTION_ARG sparc_function_arg
809 #undef TARGET_FUNCTION_INCOMING_ARG
810 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
811 #undef TARGET_FUNCTION_ARG_PADDING
812 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
813 #undef TARGET_FUNCTION_ARG_BOUNDARY
814 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
815
816 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
817 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
818 #undef TARGET_STRICT_ARGUMENT_NAMING
819 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
820
821 #undef TARGET_EXPAND_BUILTIN_VA_START
822 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
823 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
824 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
825
826 #undef TARGET_VECTOR_MODE_SUPPORTED_P
827 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
828
829 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
830 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
831
832 #ifdef SUBTARGET_INSERT_ATTRIBUTES
833 #undef TARGET_INSERT_ATTRIBUTES
834 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
835 #endif
836
837 #ifdef SUBTARGET_ATTRIBUTE_TABLE
838 #undef TARGET_ATTRIBUTE_TABLE
839 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
840 #endif
841
842 #undef TARGET_OPTION_OVERRIDE
843 #define TARGET_OPTION_OVERRIDE sparc_option_override
844
845 #ifdef TARGET_THREAD_SSP_OFFSET
846 #undef TARGET_STACK_PROTECT_GUARD
847 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
848 #endif
849
850 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
851 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
852 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
853 #endif
854
855 #undef TARGET_ASM_FILE_END
856 #define TARGET_ASM_FILE_END sparc_file_end
857
858 #undef TARGET_FRAME_POINTER_REQUIRED
859 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
860
861 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
862 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
863
864 #undef TARGET_CAN_ELIMINATE
865 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
866
867 #undef TARGET_PREFERRED_RELOAD_CLASS
868 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
869
870 #undef TARGET_SECONDARY_RELOAD
871 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
872 #undef TARGET_SECONDARY_MEMORY_NEEDED
873 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
874 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
875 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
876
877 #undef TARGET_CONDITIONAL_REGISTER_USAGE
878 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
879
880 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
881 #undef TARGET_MANGLE_TYPE
882 #define TARGET_MANGLE_TYPE sparc_mangle_type
883 #endif
884
885 #undef TARGET_LRA_P
886 #define TARGET_LRA_P sparc_lra_p
887
888 #undef TARGET_LEGITIMATE_ADDRESS_P
889 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
890
891 #undef TARGET_LEGITIMATE_CONSTANT_P
892 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
893
894 #undef TARGET_TRAMPOLINE_INIT
895 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
896
897 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
898 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
899 #undef TARGET_PRINT_OPERAND
900 #define TARGET_PRINT_OPERAND sparc_print_operand
901 #undef TARGET_PRINT_OPERAND_ADDRESS
902 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
903
904 /* The value stored by LDSTUB. */
905 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
906 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
907
908 #undef TARGET_CSTORE_MODE
909 #define TARGET_CSTORE_MODE sparc_cstore_mode
910
911 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
912 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
913
914 #undef TARGET_FIXED_CONDITION_CODE_REGS
915 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
916
917 #undef TARGET_MIN_ARITHMETIC_PRECISION
918 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
919
920 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
921 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
922
923 #undef TARGET_HARD_REGNO_NREGS
924 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
925 #undef TARGET_HARD_REGNO_MODE_OK
926 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
927
928 #undef TARGET_MODES_TIEABLE_P
929 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
930
931 #undef TARGET_CAN_CHANGE_MODE_CLASS
932 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
933
934 #undef TARGET_CONSTANT_ALIGNMENT
935 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
936
937 #undef TARGET_VECTORIZE_VEC_PERM_CONST
938 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
939
940 struct gcc_target targetm = TARGET_INITIALIZER;
941
942 /* Return the memory reference contained in X if any, zero otherwise. */
943
944 static rtx
945 mem_ref (rtx x)
946 {
947 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
948 x = XEXP (x, 0);
949
950 if (MEM_P (x))
951 return x;
952
953 return NULL_RTX;
954 }
955
956 /* True if any of INSN's source register(s) is REG. */
957
958 static bool
959 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
960 {
961 extract_insn (insn);
962 return ((REG_P (recog_data.operand[1])
963 && REGNO (recog_data.operand[1]) == reg)
964 || (recog_data.n_operands == 3
965 && REG_P (recog_data.operand[2])
966 && REGNO (recog_data.operand[2]) == reg));
967 }
968
969 /* True if INSN is a floating-point division or square-root. */
970
971 static bool
972 div_sqrt_insn_p (rtx_insn *insn)
973 {
974 if (GET_CODE (PATTERN (insn)) != SET)
975 return false;
976
977 switch (get_attr_type (insn))
978 {
979 case TYPE_FPDIVS:
980 case TYPE_FPSQRTS:
981 case TYPE_FPDIVD:
982 case TYPE_FPSQRTD:
983 return true;
984 default:
985 return false;
986 }
987 }
988
989 /* True if INSN is a floating-point instruction. */
990
991 static bool
992 fpop_insn_p (rtx_insn *insn)
993 {
994 if (GET_CODE (PATTERN (insn)) != SET)
995 return false;
996
997 switch (get_attr_type (insn))
998 {
999 case TYPE_FPMOVE:
1000 case TYPE_FPCMOVE:
1001 case TYPE_FP:
1002 case TYPE_FPCMP:
1003 case TYPE_FPMUL:
1004 case TYPE_FPDIVS:
1005 case TYPE_FPSQRTS:
1006 case TYPE_FPDIVD:
1007 case TYPE_FPSQRTD:
1008 return true;
1009 default:
1010 return false;
1011 }
1012 }
1013
1014 /* True if INSN is an atomic instruction. */
1015
1016 static bool
1017 atomic_insn_for_leon3_p (rtx_insn *insn)
1018 {
1019 switch (INSN_CODE (insn))
1020 {
1021 case CODE_FOR_swapsi:
1022 case CODE_FOR_ldstub:
1023 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1024 return true;
1025 default:
1026 return false;
1027 }
1028 }
1029
1030 /* We use a machine specific pass to enable workarounds for errata.
1031
1032 We need to have the (essentially) final form of the insn stream in order
1033 to properly detect the various hazards. Therefore, this machine specific
1034 pass runs as late as possible. */
1035
1036 /* True if INSN is a md pattern or asm statement. */
1037 #define USEFUL_INSN_P(INSN) \
1038 (NONDEBUG_INSN_P (INSN) \
1039 && GET_CODE (PATTERN (INSN)) != USE \
1040 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1041
1042 static unsigned int
1043 sparc_do_work_around_errata (void)
1044 {
1045 rtx_insn *insn, *next;
1046
1047 /* Force all instructions to be split into their final form. */
1048 split_all_insns_noflow ();
1049
1050 /* Now look for specific patterns in the insn stream. */
1051 for (insn = get_insns (); insn; insn = next)
1052 {
1053 bool insert_nop = false;
1054 rtx set;
1055 rtx_insn *jump;
1056 rtx_sequence *seq;
1057
1058 /* Look into the instruction in a delay slot. */
1059 if (NONJUMP_INSN_P (insn)
1060 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1061 {
1062 jump = seq->insn (0);
1063 insn = seq->insn (1);
1064 }
1065 else if (JUMP_P (insn))
1066 jump = insn;
1067 else
1068 jump = NULL;
1069
1070 /* Place a NOP at the branch target of an integer branch if it is a
1071 floating-point operation or a floating-point branch. */
1072 if (sparc_fix_gr712rc
1073 && jump
1074 && jump_to_label_p (jump)
1075 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1076 {
1077 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1078 if (target
1079 && (fpop_insn_p (target)
1080 || (JUMP_P (target)
1081 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1082 emit_insn_before (gen_nop (), target);
1083 }
1084
1085 /* Insert a NOP between load instruction and atomic instruction. Insert
1086 a NOP at branch target if there is a load in delay slot and an atomic
1087 instruction at branch target. */
1088 if (sparc_fix_ut700
1089 && NONJUMP_INSN_P (insn)
1090 && (set = single_set (insn)) != NULL_RTX
1091 && mem_ref (SET_SRC (set))
1092 && REG_P (SET_DEST (set)))
1093 {
1094 if (jump && jump_to_label_p (jump))
1095 {
1096 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1097 if (target && atomic_insn_for_leon3_p (target))
1098 emit_insn_before (gen_nop (), target);
1099 }
1100
1101 next = next_active_insn (insn);
1102 if (!next)
1103 break;
1104
1105 if (atomic_insn_for_leon3_p (next))
1106 insert_nop = true;
1107 }
1108
1109 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1110 ends with another fdiv or fsqrt instruction with no dependencies on
1111 the former, along with an appropriate pattern in between. */
1112 if (sparc_fix_lost_divsqrt
1113 && NONJUMP_INSN_P (insn)
1114 && div_sqrt_insn_p (insn))
1115 {
1116 int i;
1117 int fp_found = 0;
1118 rtx_insn *after;
1119
1120 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1121
1122 next = next_active_insn (insn);
1123 if (!next)
1124 break;
1125
1126 for (after = next, i = 0; i < 4; i++)
1127 {
1128 /* Count floating-point operations. */
1129 if (i != 3 && fpop_insn_p (after))
1130 {
1131 /* If the insn uses the destination register of
1132 the div/sqrt, then it cannot be problematic. */
1133 if (insn_uses_reg_p (after, dest_reg))
1134 break;
1135 fp_found++;
1136 }
1137
1138 /* Count floating-point loads. */
1139 if (i != 3
1140 && (set = single_set (after)) != NULL_RTX
1141 && REG_P (SET_DEST (set))
1142 && REGNO (SET_DEST (set)) > 31)
1143 {
1144 /* If the insn uses the destination register of
1145 the div/sqrt, then it cannot be problematic. */
1146 if (REGNO (SET_DEST (set)) == dest_reg)
1147 break;
1148 fp_found++;
1149 }
1150
1151 /* Check if this is a problematic sequence. */
1152 if (i > 1
1153 && fp_found >= 2
1154 && div_sqrt_insn_p (after))
1155 {
1156 /* If this is the short version of the problematic
1157 sequence we add two NOPs in a row to also prevent
1158 the long version. */
1159 if (i == 2)
1160 emit_insn_before (gen_nop (), next);
1161 insert_nop = true;
1162 break;
1163 }
1164
1165 /* No need to scan past a second div/sqrt. */
1166 if (div_sqrt_insn_p (after))
1167 break;
1168
1169 /* Insert NOP before branch. */
1170 if (i < 3
1171 && (!NONJUMP_INSN_P (after)
1172 || GET_CODE (PATTERN (after)) == SEQUENCE))
1173 {
1174 insert_nop = true;
1175 break;
1176 }
1177
1178 after = next_active_insn (after);
1179 if (!after)
1180 break;
1181 }
1182 }
1183
1184 /* Look for either of these two sequences:
1185
1186 Sequence A:
1187 1. store of word size or less (e.g. st / stb / sth / stf)
1188 2. any single instruction that is not a load or store
1189 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1190
1191 Sequence B:
1192 1. store of double word size (e.g. std / stdf)
1193 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1194 if (sparc_fix_b2bst
1195 && NONJUMP_INSN_P (insn)
1196 && (set = single_set (insn)) != NULL_RTX
1197 && MEM_P (SET_DEST (set)))
1198 {
1199 /* Sequence B begins with a double-word store. */
1200 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1201 rtx_insn *after;
1202 int i;
1203
1204 next = next_active_insn (insn);
1205 if (!next)
1206 break;
1207
1208 for (after = next, i = 0; i < 2; i++)
1209 {
1210 /* Skip empty assembly statements. */
1211 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1212 || (USEFUL_INSN_P (after)
1213 && (asm_noperands (PATTERN (after))>=0)
1214 && !strcmp (decode_asm_operands (PATTERN (after),
1215 NULL, NULL, NULL,
1216 NULL, NULL), "")))
1217 after = next_active_insn (after);
1218 if (!after)
1219 break;
1220
1221 /* If the insn is a branch, then it cannot be problematic. */
1222 if (!NONJUMP_INSN_P (after)
1223 || GET_CODE (PATTERN (after)) == SEQUENCE)
1224 break;
1225
1226 /* Sequence B is only two instructions long. */
1227 if (seq_b)
1228 {
1229 /* Add NOP if followed by a store. */
1230 if ((set = single_set (after)) != NULL_RTX
1231 && MEM_P (SET_DEST (set)))
1232 insert_nop = true;
1233
1234 /* Otherwise it is ok. */
1235 break;
1236 }
1237
1238 /* If the second instruction is a load or a store,
1239 then the sequence cannot be problematic. */
1240 if (i == 0)
1241 {
1242 if ((set = single_set (after)) != NULL_RTX
1243 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1244 break;
1245
1246 after = next_active_insn (after);
1247 if (!after)
1248 break;
1249 }
1250
1251 /* Add NOP if third instruction is a store. */
1252 if (i == 1
1253 && (set = single_set (after)) != NULL_RTX
1254 && MEM_P (SET_DEST (set)))
1255 insert_nop = true;
1256 }
1257 }
1258
1259 /* Look for a single-word load into an odd-numbered FP register. */
1260 else if (sparc_fix_at697f
1261 && NONJUMP_INSN_P (insn)
1262 && (set = single_set (insn)) != NULL_RTX
1263 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1264 && mem_ref (SET_SRC (set))
1265 && REG_P (SET_DEST (set))
1266 && REGNO (SET_DEST (set)) > 31
1267 && REGNO (SET_DEST (set)) % 2 != 0)
1268 {
1269 /* The wrong dependency is on the enclosing double register. */
1270 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1271 unsigned int src1, src2, dest;
1272 int code;
1273
1274 next = next_active_insn (insn);
1275 if (!next)
1276 break;
1277 /* If the insn is a branch, then it cannot be problematic. */
1278 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1279 continue;
1280
1281 extract_insn (next);
1282 code = INSN_CODE (next);
1283
1284 switch (code)
1285 {
1286 case CODE_FOR_adddf3:
1287 case CODE_FOR_subdf3:
1288 case CODE_FOR_muldf3:
1289 case CODE_FOR_divdf3:
1290 dest = REGNO (recog_data.operand[0]);
1291 src1 = REGNO (recog_data.operand[1]);
1292 src2 = REGNO (recog_data.operand[2]);
1293 if (src1 != src2)
1294 {
1295 /* Case [1-4]:
1296 ld [address], %fx+1
1297 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1298 if ((src1 == x || src2 == x)
1299 && (dest == src1 || dest == src2))
1300 insert_nop = true;
1301 }
1302 else
1303 {
1304 /* Case 5:
1305 ld [address], %fx+1
1306 FPOPd %fx, %fx, %fx */
1307 if (src1 == x
1308 && dest == src1
1309 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1310 insert_nop = true;
1311 }
1312 break;
1313
1314 case CODE_FOR_sqrtdf2:
1315 dest = REGNO (recog_data.operand[0]);
1316 src1 = REGNO (recog_data.operand[1]);
1317 /* Case 6:
1318 ld [address], %fx+1
1319 fsqrtd %fx, %fx */
1320 if (src1 == x && dest == src1)
1321 insert_nop = true;
1322 break;
1323
1324 default:
1325 break;
1326 }
1327 }
1328
1329 /* Look for a single-word load into an integer register. */
1330 else if (sparc_fix_ut699
1331 && NONJUMP_INSN_P (insn)
1332 && (set = single_set (insn)) != NULL_RTX
1333 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1334 && (mem_ref (SET_SRC (set)) != NULL_RTX
1335 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1336 && REG_P (SET_DEST (set))
1337 && REGNO (SET_DEST (set)) < 32)
1338 {
1339 /* There is no problem if the second memory access has a data
1340 dependency on the first single-cycle load. */
1341 rtx x = SET_DEST (set);
1342
1343 next = next_active_insn (insn);
1344 if (!next)
1345 break;
1346 /* If the insn is a branch, then it cannot be problematic. */
1347 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1348 continue;
1349
1350 /* Look for a second memory access to/from an integer register. */
1351 if ((set = single_set (next)) != NULL_RTX)
1352 {
1353 rtx src = SET_SRC (set);
1354 rtx dest = SET_DEST (set);
1355 rtx mem;
1356
1357 /* LDD is affected. */
1358 if ((mem = mem_ref (src)) != NULL_RTX
1359 && REG_P (dest)
1360 && REGNO (dest) < 32
1361 && !reg_mentioned_p (x, XEXP (mem, 0)))
1362 insert_nop = true;
1363
1364 /* STD is *not* affected. */
1365 else if (MEM_P (dest)
1366 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1367 && (src == CONST0_RTX (GET_MODE (dest))
1368 || (REG_P (src)
1369 && REGNO (src) < 32
1370 && REGNO (src) != REGNO (x)))
1371 && !reg_mentioned_p (x, XEXP (dest, 0)))
1372 insert_nop = true;
1373
1374 /* GOT accesses uses LD. */
1375 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1376 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1377 insert_nop = true;
1378 }
1379 }
1380
1381 /* Look for a single-word load/operation into an FP register. */
1382 else if (sparc_fix_ut699
1383 && NONJUMP_INSN_P (insn)
1384 && (set = single_set (insn)) != NULL_RTX
1385 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1386 && REG_P (SET_DEST (set))
1387 && REGNO (SET_DEST (set)) > 31)
1388 {
1389 /* Number of instructions in the problematic window. */
1390 const int n_insns = 4;
1391 /* The problematic combination is with the sibling FP register. */
1392 const unsigned int x = REGNO (SET_DEST (set));
1393 const unsigned int y = x ^ 1;
1394 rtx_insn *after;
1395 int i;
1396
1397 next = next_active_insn (insn);
1398 if (!next)
1399 break;
1400 /* If the insn is a branch, then it cannot be problematic. */
1401 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1402 continue;
1403
1404 /* Look for a second load/operation into the sibling FP register. */
1405 if (!((set = single_set (next)) != NULL_RTX
1406 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1407 && REG_P (SET_DEST (set))
1408 && REGNO (SET_DEST (set)) == y))
1409 continue;
1410
1411 /* Look for a (possible) store from the FP register in the next N
1412 instructions, but bail out if it is again modified or if there
1413 is a store from the sibling FP register before this store. */
1414 for (after = next, i = 0; i < n_insns; i++)
1415 {
1416 bool branch_p;
1417
1418 after = next_active_insn (after);
1419 if (!after)
1420 break;
1421
1422 /* This is a branch with an empty delay slot. */
1423 if (!NONJUMP_INSN_P (after))
1424 {
1425 if (++i == n_insns)
1426 break;
1427 branch_p = true;
1428 after = NULL;
1429 }
1430 /* This is a branch with a filled delay slot. */
1431 else if (rtx_sequence *seq =
1432 dyn_cast <rtx_sequence *> (PATTERN (after)))
1433 {
1434 if (++i == n_insns)
1435 break;
1436 branch_p = true;
1437 after = seq->insn (1);
1438 }
1439 /* This is a regular instruction. */
1440 else
1441 branch_p = false;
1442
1443 if (after && (set = single_set (after)) != NULL_RTX)
1444 {
1445 const rtx src = SET_SRC (set);
1446 const rtx dest = SET_DEST (set);
1447 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1448
1449 /* If the FP register is again modified before the store,
1450 then the store isn't affected. */
1451 if (REG_P (dest)
1452 && (REGNO (dest) == x
1453 || (REGNO (dest) == y && size == 8)))
1454 break;
1455
1456 if (MEM_P (dest) && REG_P (src))
1457 {
1458 /* If there is a store from the sibling FP register
1459 before the store, then the store is not affected. */
1460 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1461 break;
1462
1463 /* Otherwise, the store is affected. */
1464 if (REGNO (src) == x && size == 4)
1465 {
1466 insert_nop = true;
1467 break;
1468 }
1469 }
1470 }
1471
1472 /* If we have a branch in the first M instructions, then we
1473 cannot see the (M+2)th instruction so we play safe. */
1474 if (branch_p && i <= (n_insns - 2))
1475 {
1476 insert_nop = true;
1477 break;
1478 }
1479 }
1480 }
1481
1482 else
1483 next = NEXT_INSN (insn);
1484
1485 if (insert_nop)
1486 emit_insn_before (gen_nop (), next);
1487 }
1488
1489 return 0;
1490 }
1491
1492 namespace {
1493
1494 const pass_data pass_data_work_around_errata =
1495 {
1496 RTL_PASS, /* type */
1497 "errata", /* name */
1498 OPTGROUP_NONE, /* optinfo_flags */
1499 TV_MACH_DEP, /* tv_id */
1500 0, /* properties_required */
1501 0, /* properties_provided */
1502 0, /* properties_destroyed */
1503 0, /* todo_flags_start */
1504 0, /* todo_flags_finish */
1505 };
1506
1507 class pass_work_around_errata : public rtl_opt_pass
1508 {
1509 public:
1510 pass_work_around_errata(gcc::context *ctxt)
1511 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1512 {}
1513
1514 /* opt_pass methods: */
1515 virtual bool gate (function *)
1516 {
1517 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
1518 || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt;
1519 }
1520
1521 virtual unsigned int execute (function *)
1522 {
1523 return sparc_do_work_around_errata ();
1524 }
1525
1526 }; // class pass_work_around_errata
1527
1528 } // anon namespace
1529
1530 rtl_opt_pass *
1531 make_pass_work_around_errata (gcc::context *ctxt)
1532 {
1533 return new pass_work_around_errata (ctxt);
1534 }
1535
1536 /* Helpers for TARGET_DEBUG_OPTIONS. */
1537 static void
1538 dump_target_flag_bits (const int flags)
1539 {
1540 if (flags & MASK_64BIT)
1541 fprintf (stderr, "64BIT ");
1542 if (flags & MASK_APP_REGS)
1543 fprintf (stderr, "APP_REGS ");
1544 if (flags & MASK_FASTER_STRUCTS)
1545 fprintf (stderr, "FASTER_STRUCTS ");
1546 if (flags & MASK_FLAT)
1547 fprintf (stderr, "FLAT ");
1548 if (flags & MASK_FMAF)
1549 fprintf (stderr, "FMAF ");
1550 if (flags & MASK_FSMULD)
1551 fprintf (stderr, "FSMULD ");
1552 if (flags & MASK_FPU)
1553 fprintf (stderr, "FPU ");
1554 if (flags & MASK_HARD_QUAD)
1555 fprintf (stderr, "HARD_QUAD ");
1556 if (flags & MASK_POPC)
1557 fprintf (stderr, "POPC ");
1558 if (flags & MASK_PTR64)
1559 fprintf (stderr, "PTR64 ");
1560 if (flags & MASK_STACK_BIAS)
1561 fprintf (stderr, "STACK_BIAS ");
1562 if (flags & MASK_UNALIGNED_DOUBLES)
1563 fprintf (stderr, "UNALIGNED_DOUBLES ");
1564 if (flags & MASK_V8PLUS)
1565 fprintf (stderr, "V8PLUS ");
1566 if (flags & MASK_VIS)
1567 fprintf (stderr, "VIS ");
1568 if (flags & MASK_VIS2)
1569 fprintf (stderr, "VIS2 ");
1570 if (flags & MASK_VIS3)
1571 fprintf (stderr, "VIS3 ");
1572 if (flags & MASK_VIS4)
1573 fprintf (stderr, "VIS4 ");
1574 if (flags & MASK_VIS4B)
1575 fprintf (stderr, "VIS4B ");
1576 if (flags & MASK_CBCOND)
1577 fprintf (stderr, "CBCOND ");
1578 if (flags & MASK_DEPRECATED_V8_INSNS)
1579 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1580 if (flags & MASK_SPARCLET)
1581 fprintf (stderr, "SPARCLET ");
1582 if (flags & MASK_SPARCLITE)
1583 fprintf (stderr, "SPARCLITE ");
1584 if (flags & MASK_V8)
1585 fprintf (stderr, "V8 ");
1586 if (flags & MASK_V9)
1587 fprintf (stderr, "V9 ");
1588 }
1589
1590 static void
1591 dump_target_flags (const char *prefix, const int flags)
1592 {
1593 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1594 dump_target_flag_bits (flags);
1595 fprintf(stderr, "]\n");
1596 }
1597
1598 /* Validate and override various options, and do some machine dependent
1599 initialization. */
1600
1601 static void
1602 sparc_option_override (void)
1603 {
1604 static struct code_model {
1605 const char *const name;
1606 const enum cmodel value;
1607 } const cmodels[] = {
1608 { "32", CM_32 },
1609 { "medlow", CM_MEDLOW },
1610 { "medmid", CM_MEDMID },
1611 { "medany", CM_MEDANY },
1612 { "embmedany", CM_EMBMEDANY },
1613 { NULL, (enum cmodel) 0 }
1614 };
1615 const struct code_model *cmodel;
1616 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1617 static struct cpu_default {
1618 const int cpu;
1619 const enum processor_type processor;
1620 } const cpu_default[] = {
1621 /* There must be one entry here for each TARGET_CPU value. */
1622 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1623 { TARGET_CPU_v8, PROCESSOR_V8 },
1624 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1625 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1626 { TARGET_CPU_leon, PROCESSOR_LEON },
1627 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1628 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1629 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1630 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1631 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1632 { TARGET_CPU_v9, PROCESSOR_V9 },
1633 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1634 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1635 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1636 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1637 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1638 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1639 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1640 { TARGET_CPU_m8, PROCESSOR_M8 },
1641 { -1, PROCESSOR_V7 }
1642 };
1643 const struct cpu_default *def;
1644 /* Table of values for -m{cpu,tune}=. This must match the order of
1645 the enum processor_type in sparc-opts.h. */
1646 static struct cpu_table {
1647 const char *const name;
1648 const int disable;
1649 const int enable;
1650 } const cpu_table[] = {
1651 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1652 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1653 { "v8", MASK_ISA, MASK_V8 },
1654 /* TI TMS390Z55 supersparc */
1655 { "supersparc", MASK_ISA, MASK_V8 },
1656 { "hypersparc", MASK_ISA, MASK_V8 },
1657 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1658 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1659 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1660 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1661 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1662 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1663 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1664 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1665 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1666 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1667 /* TEMIC sparclet */
1668 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1669 { "v9", MASK_ISA, MASK_V9 },
1670 /* UltraSPARC I, II, IIi */
1671 { "ultrasparc", MASK_ISA,
1672 /* Although insns using %y are deprecated, it is a clear win. */
1673 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1674 /* UltraSPARC III */
1675 /* ??? Check if %y issue still holds true. */
1676 { "ultrasparc3", MASK_ISA,
1677 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1678 /* UltraSPARC T1 */
1679 { "niagara", MASK_ISA,
1680 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1681 /* UltraSPARC T2 */
1682 { "niagara2", MASK_ISA,
1683 MASK_V9|MASK_POPC|MASK_VIS2 },
1684 /* UltraSPARC T3 */
1685 { "niagara3", MASK_ISA,
1686 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1687 /* UltraSPARC T4 */
1688 { "niagara4", MASK_ISA,
1689 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1690 /* UltraSPARC M7 */
1691 { "niagara7", MASK_ISA,
1692 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1693 /* UltraSPARC M8 */
1694 { "m8", MASK_ISA,
1695 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1696 };
1697 const struct cpu_table *cpu;
1698 unsigned int i;
1699
1700 if (sparc_debug_string != NULL)
1701 {
1702 const char *q;
1703 char *p;
1704
1705 p = ASTRDUP (sparc_debug_string);
1706 while ((q = strtok (p, ",")) != NULL)
1707 {
1708 bool invert;
1709 int mask;
1710
1711 p = NULL;
1712 if (*q == '!')
1713 {
1714 invert = true;
1715 q++;
1716 }
1717 else
1718 invert = false;
1719
1720 if (! strcmp (q, "all"))
1721 mask = MASK_DEBUG_ALL;
1722 else if (! strcmp (q, "options"))
1723 mask = MASK_DEBUG_OPTIONS;
1724 else
1725 error ("unknown -mdebug-%s switch", q);
1726
1727 if (invert)
1728 sparc_debug &= ~mask;
1729 else
1730 sparc_debug |= mask;
1731 }
1732 }
1733
1734 /* Enable the FsMULd instruction by default if not explicitly specified by
1735 the user. It may be later disabled by the CPU (explicitly or not). */
1736 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1737 target_flags |= MASK_FSMULD;
1738
1739 if (TARGET_DEBUG_OPTIONS)
1740 {
1741 dump_target_flags("Initial target_flags", target_flags);
1742 dump_target_flags("target_flags_explicit", target_flags_explicit);
1743 }
1744
1745 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1746 SUBTARGET_OVERRIDE_OPTIONS;
1747 #endif
1748
1749 #ifndef SPARC_BI_ARCH
1750 /* Check for unsupported architecture size. */
1751 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1752 error ("%s is not supported by this configuration",
1753 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1754 #endif
1755
1756 /* We force all 64bit archs to use 128 bit long double */
1757 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1758 {
1759 error ("-mlong-double-64 not allowed with -m64");
1760 target_flags |= MASK_LONG_DOUBLE_128;
1761 }
1762
1763 /* Code model selection. */
1764 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1765
1766 #ifdef SPARC_BI_ARCH
1767 if (TARGET_ARCH32)
1768 sparc_cmodel = CM_32;
1769 #endif
1770
1771 if (sparc_cmodel_string != NULL)
1772 {
1773 if (TARGET_ARCH64)
1774 {
1775 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1776 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1777 break;
1778 if (cmodel->name == NULL)
1779 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1780 else
1781 sparc_cmodel = cmodel->value;
1782 }
1783 else
1784 error ("-mcmodel= is not supported on 32-bit systems");
1785 }
1786
1787 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1788 for (i = 8; i < 16; i++)
1789 if (!call_used_regs [i])
1790 {
1791 error ("-fcall-saved-REG is not supported for out registers");
1792 call_used_regs [i] = 1;
1793 }
1794
1795 /* Set the default CPU if no -mcpu option was specified. */
1796 if (!global_options_set.x_sparc_cpu_and_features)
1797 {
1798 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1799 if (def->cpu == TARGET_CPU_DEFAULT)
1800 break;
1801 gcc_assert (def->cpu != -1);
1802 sparc_cpu_and_features = def->processor;
1803 }
1804
1805 /* Set the default CPU if no -mtune option was specified. */
1806 if (!global_options_set.x_sparc_cpu)
1807 sparc_cpu = sparc_cpu_and_features;
1808
1809 cpu = &cpu_table[(int) sparc_cpu_and_features];
1810
1811 if (TARGET_DEBUG_OPTIONS)
1812 {
1813 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1814 dump_target_flags ("cpu->disable", cpu->disable);
1815 dump_target_flags ("cpu->enable", cpu->enable);
1816 }
1817
1818 target_flags &= ~cpu->disable;
1819 target_flags |= (cpu->enable
1820 #ifndef HAVE_AS_FMAF_HPC_VIS3
1821 & ~(MASK_FMAF | MASK_VIS3)
1822 #endif
1823 #ifndef HAVE_AS_SPARC4
1824 & ~MASK_CBCOND
1825 #endif
1826 #ifndef HAVE_AS_SPARC5_VIS4
1827 & ~(MASK_VIS4 | MASK_SUBXC)
1828 #endif
1829 #ifndef HAVE_AS_SPARC6
1830 & ~(MASK_VIS4B)
1831 #endif
1832 #ifndef HAVE_AS_LEON
1833 & ~(MASK_LEON | MASK_LEON3)
1834 #endif
1835 & ~(target_flags_explicit & MASK_FEATURES)
1836 );
1837
1838 /* -mvis2 implies -mvis. */
1839 if (TARGET_VIS2)
1840 target_flags |= MASK_VIS;
1841
1842 /* -mvis3 implies -mvis2 and -mvis. */
1843 if (TARGET_VIS3)
1844 target_flags |= MASK_VIS2 | MASK_VIS;
1845
1846 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1847 if (TARGET_VIS4)
1848 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1849
1850 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1851 if (TARGET_VIS4B)
1852 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1853
1854 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1855 FPU is disabled. */
1856 if (!TARGET_FPU)
1857 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1858 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1859
1860 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1861 are available; -m64 also implies v9. */
1862 if (TARGET_VIS || TARGET_ARCH64)
1863 {
1864 target_flags |= MASK_V9;
1865 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1866 }
1867
1868 /* -mvis also implies -mv8plus on 32-bit. */
1869 if (TARGET_VIS && !TARGET_ARCH64)
1870 target_flags |= MASK_V8PLUS;
1871
1872 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1873 if (TARGET_V9 && TARGET_ARCH32)
1874 target_flags |= MASK_DEPRECATED_V8_INSNS;
1875
1876 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1877 if (!TARGET_V9 || TARGET_ARCH64)
1878 target_flags &= ~MASK_V8PLUS;
1879
1880 /* Don't use stack biasing in 32-bit mode. */
1881 if (TARGET_ARCH32)
1882 target_flags &= ~MASK_STACK_BIAS;
1883
1884 /* Use LRA instead of reload, unless otherwise instructed. */
1885 if (!(target_flags_explicit & MASK_LRA))
1886 target_flags |= MASK_LRA;
1887
1888 /* Enable applicable errata workarounds for LEON3FT. */
1889 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1890 {
1891 sparc_fix_b2bst = 1;
1892 sparc_fix_lost_divsqrt = 1;
1893 }
1894
1895 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1896 if (sparc_fix_ut699)
1897 target_flags &= ~MASK_FSMULD;
1898
1899 /* Supply a default value for align_functions. */
1900 if (align_functions == 0)
1901 {
1902 if (sparc_cpu == PROCESSOR_ULTRASPARC
1903 || sparc_cpu == PROCESSOR_ULTRASPARC3
1904 || sparc_cpu == PROCESSOR_NIAGARA
1905 || sparc_cpu == PROCESSOR_NIAGARA2
1906 || sparc_cpu == PROCESSOR_NIAGARA3
1907 || sparc_cpu == PROCESSOR_NIAGARA4)
1908 align_functions = 32;
1909 else if (sparc_cpu == PROCESSOR_NIAGARA7
1910 || sparc_cpu == PROCESSOR_M8)
1911 align_functions = 64;
1912 }
1913
1914 /* Validate PCC_STRUCT_RETURN. */
1915 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1916 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1917
1918 /* Only use .uaxword when compiling for a 64-bit target. */
1919 if (!TARGET_ARCH64)
1920 targetm.asm_out.unaligned_op.di = NULL;
1921
1922 /* Do various machine dependent initializations. */
1923 sparc_init_modes ();
1924
1925 /* Set up function hooks. */
1926 init_machine_status = sparc_init_machine_status;
1927
1928 switch (sparc_cpu)
1929 {
1930 case PROCESSOR_V7:
1931 case PROCESSOR_CYPRESS:
1932 sparc_costs = &cypress_costs;
1933 break;
1934 case PROCESSOR_V8:
1935 case PROCESSOR_SPARCLITE:
1936 case PROCESSOR_SUPERSPARC:
1937 sparc_costs = &supersparc_costs;
1938 break;
1939 case PROCESSOR_F930:
1940 case PROCESSOR_F934:
1941 case PROCESSOR_HYPERSPARC:
1942 case PROCESSOR_SPARCLITE86X:
1943 sparc_costs = &hypersparc_costs;
1944 break;
1945 case PROCESSOR_LEON:
1946 sparc_costs = &leon_costs;
1947 break;
1948 case PROCESSOR_LEON3:
1949 case PROCESSOR_LEON3V7:
1950 sparc_costs = &leon3_costs;
1951 break;
1952 case PROCESSOR_SPARCLET:
1953 case PROCESSOR_TSC701:
1954 sparc_costs = &sparclet_costs;
1955 break;
1956 case PROCESSOR_V9:
1957 case PROCESSOR_ULTRASPARC:
1958 sparc_costs = &ultrasparc_costs;
1959 break;
1960 case PROCESSOR_ULTRASPARC3:
1961 sparc_costs = &ultrasparc3_costs;
1962 break;
1963 case PROCESSOR_NIAGARA:
1964 sparc_costs = &niagara_costs;
1965 break;
1966 case PROCESSOR_NIAGARA2:
1967 sparc_costs = &niagara2_costs;
1968 break;
1969 case PROCESSOR_NIAGARA3:
1970 sparc_costs = &niagara3_costs;
1971 break;
1972 case PROCESSOR_NIAGARA4:
1973 sparc_costs = &niagara4_costs;
1974 break;
1975 case PROCESSOR_NIAGARA7:
1976 sparc_costs = &niagara7_costs;
1977 break;
1978 case PROCESSOR_M8:
1979 sparc_costs = &m8_costs;
1980 break;
1981 case PROCESSOR_NATIVE:
1982 gcc_unreachable ();
1983 };
1984
1985 if (sparc_memory_model == SMM_DEFAULT)
1986 {
1987 /* Choose the memory model for the operating system. */
1988 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1989 if (os_default != SMM_DEFAULT)
1990 sparc_memory_model = os_default;
1991 /* Choose the most relaxed model for the processor. */
1992 else if (TARGET_V9)
1993 sparc_memory_model = SMM_RMO;
1994 else if (TARGET_LEON3)
1995 sparc_memory_model = SMM_TSO;
1996 else if (TARGET_LEON)
1997 sparc_memory_model = SMM_SC;
1998 else if (TARGET_V8)
1999 sparc_memory_model = SMM_PSO;
2000 else
2001 sparc_memory_model = SMM_SC;
2002 }
2003
2004 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
2005 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
2006 target_flags |= MASK_LONG_DOUBLE_128;
2007 #endif
2008
2009 if (TARGET_DEBUG_OPTIONS)
2010 dump_target_flags ("Final target_flags", target_flags);
2011
2012 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2013 can run at the same time. More important, it is the threshold
2014 defining when additional prefetches will be dropped by the
2015 hardware.
2016
2017 The UltraSPARC-III features a documented prefetch queue with a
2018 size of 8. Additional prefetches issued in the cpu are
2019 dropped.
2020
2021 Niagara processors are different. In these processors prefetches
2022 are handled much like regular loads. The L1 miss buffer is 32
2023 entries, but prefetches start getting affected when 30 entries
2024 become occupied. That occupation could be a mix of regular loads
2025 and prefetches though. And that buffer is shared by all threads.
2026 Once the threshold is reached, if the core is running a single
2027 thread the prefetch will retry. If more than one thread is
2028 running, the prefetch will be dropped.
2029
2030 All this makes it very difficult to determine how many
2031 simultaneous prefetches can be issued simultaneously, even in a
2032 single-threaded program. Experimental results show that setting
2033 this parameter to 32 works well when the number of threads is not
2034 high. */
2035 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2036 ((sparc_cpu == PROCESSOR_ULTRASPARC
2037 || sparc_cpu == PROCESSOR_NIAGARA
2038 || sparc_cpu == PROCESSOR_NIAGARA2
2039 || sparc_cpu == PROCESSOR_NIAGARA3
2040 || sparc_cpu == PROCESSOR_NIAGARA4)
2041 ? 2
2042 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2043 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2044 || sparc_cpu == PROCESSOR_M8)
2045 ? 32 : 3))),
2046 global_options.x_param_values,
2047 global_options_set.x_param_values);
2048
2049 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2050 bytes.
2051
2052 The Oracle SPARC Architecture (previously the UltraSPARC
2053 Architecture) specification states that when a PREFETCH[A]
2054 instruction is executed an implementation-specific amount of data
2055 is prefetched, and that it is at least 64 bytes long (aligned to
2056 at least 64 bytes).
2057
2058 However, this is not correct. The M7 (and implementations prior
2059 to that) does not guarantee a 64B prefetch into a cache if the
2060 line size is smaller. A single cache line is all that is ever
2061 prefetched. So for the M7, where the L1D$ has 32B lines and the
2062 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2063 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2064 is a read_n prefetch, which is the only type which allocates to
2065 the L1.) */
2066 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2067 (sparc_cpu == PROCESSOR_M8
2068 ? 64 : 32),
2069 global_options.x_param_values,
2070 global_options_set.x_param_values);
2071
2072 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2073 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2074 Niagara processors feature a L1D$ of 16KB. */
2075 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2076 ((sparc_cpu == PROCESSOR_ULTRASPARC
2077 || sparc_cpu == PROCESSOR_ULTRASPARC3
2078 || sparc_cpu == PROCESSOR_NIAGARA
2079 || sparc_cpu == PROCESSOR_NIAGARA2
2080 || sparc_cpu == PROCESSOR_NIAGARA3
2081 || sparc_cpu == PROCESSOR_NIAGARA4
2082 || sparc_cpu == PROCESSOR_NIAGARA7
2083 || sparc_cpu == PROCESSOR_M8)
2084 ? 16 : 64),
2085 global_options.x_param_values,
2086 global_options_set.x_param_values);
2087
2088
2089 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2090 that 512 is the default in params.def. */
2091 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2092 ((sparc_cpu == PROCESSOR_NIAGARA4
2093 || sparc_cpu == PROCESSOR_M8)
2094 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2095 ? 256 : 512)),
2096 global_options.x_param_values,
2097 global_options_set.x_param_values);
2098
2099
2100 /* Disable save slot sharing for call-clobbered registers by default.
2101 The IRA sharing algorithm works on single registers only and this
2102 pessimizes for double floating-point registers. */
2103 if (!global_options_set.x_flag_ira_share_save_slots)
2104 flag_ira_share_save_slots = 0;
2105
2106 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2107 redundant 32-to-64-bit extensions. */
2108 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2109 flag_ree = 0;
2110 }
2111 \f
2112 /* Miscellaneous utilities. */
2113
2114 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2115 or branch on register contents instructions. */
2116
2117 int
2118 v9_regcmp_p (enum rtx_code code)
2119 {
2120 return (code == EQ || code == NE || code == GE || code == LT
2121 || code == LE || code == GT);
2122 }
2123
2124 /* Nonzero if OP is a floating point constant which can
2125 be loaded into an integer register using a single
2126 sethi instruction. */
2127
2128 int
2129 fp_sethi_p (rtx op)
2130 {
2131 if (GET_CODE (op) == CONST_DOUBLE)
2132 {
2133 long i;
2134
2135 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2136 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2137 }
2138
2139 return 0;
2140 }
2141
2142 /* Nonzero if OP is a floating point constant which can
2143 be loaded into an integer register using a single
2144 mov instruction. */
2145
2146 int
2147 fp_mov_p (rtx op)
2148 {
2149 if (GET_CODE (op) == CONST_DOUBLE)
2150 {
2151 long i;
2152
2153 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2154 return SPARC_SIMM13_P (i);
2155 }
2156
2157 return 0;
2158 }
2159
2160 /* Nonzero if OP is a floating point constant which can
2161 be loaded into an integer register using a high/losum
2162 instruction sequence. */
2163
2164 int
2165 fp_high_losum_p (rtx op)
2166 {
2167 /* The constraints calling this should only be in
2168 SFmode move insns, so any constant which cannot
2169 be moved using a single insn will do. */
2170 if (GET_CODE (op) == CONST_DOUBLE)
2171 {
2172 long i;
2173
2174 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2175 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2176 }
2177
2178 return 0;
2179 }
2180
2181 /* Return true if the address of LABEL can be loaded by means of the
2182 mov{si,di}_pic_label_ref patterns in PIC mode. */
2183
2184 static bool
2185 can_use_mov_pic_label_ref (rtx label)
2186 {
2187 /* VxWorks does not impose a fixed gap between segments; the run-time
2188 gap can be different from the object-file gap. We therefore can't
2189 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2190 are absolutely sure that X is in the same segment as the GOT.
2191 Unfortunately, the flexibility of linker scripts means that we
2192 can't be sure of that in general, so assume that GOT-relative
2193 accesses are never valid on VxWorks. */
2194 if (TARGET_VXWORKS_RTP)
2195 return false;
2196
2197 /* Similarly, if the label is non-local, it might end up being placed
2198 in a different section than the current one; now mov_pic_label_ref
2199 requires the label and the code to be in the same section. */
2200 if (LABEL_REF_NONLOCAL_P (label))
2201 return false;
2202
2203 /* Finally, if we are reordering basic blocks and partition into hot
2204 and cold sections, this might happen for any label. */
2205 if (flag_reorder_blocks_and_partition)
2206 return false;
2207
2208 return true;
2209 }
2210
2211 /* Expand a move instruction. Return true if all work is done. */
2212
2213 bool
2214 sparc_expand_move (machine_mode mode, rtx *operands)
2215 {
2216 /* Handle sets of MEM first. */
2217 if (GET_CODE (operands[0]) == MEM)
2218 {
2219 /* 0 is a register (or a pair of registers) on SPARC. */
2220 if (register_or_zero_operand (operands[1], mode))
2221 return false;
2222
2223 if (!reload_in_progress)
2224 {
2225 operands[0] = validize_mem (operands[0]);
2226 operands[1] = force_reg (mode, operands[1]);
2227 }
2228 }
2229
2230 /* Fixup TLS cases. */
2231 if (TARGET_HAVE_TLS
2232 && CONSTANT_P (operands[1])
2233 && sparc_tls_referenced_p (operands [1]))
2234 {
2235 operands[1] = sparc_legitimize_tls_address (operands[1]);
2236 return false;
2237 }
2238
2239 /* Fixup PIC cases. */
2240 if (flag_pic && CONSTANT_P (operands[1]))
2241 {
2242 if (pic_address_needs_scratch (operands[1]))
2243 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2244
2245 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2246 if (GET_CODE (operands[1]) == LABEL_REF
2247 && can_use_mov_pic_label_ref (operands[1]))
2248 {
2249 if (mode == SImode)
2250 {
2251 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2252 return true;
2253 }
2254
2255 if (mode == DImode)
2256 {
2257 gcc_assert (TARGET_ARCH64);
2258 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2259 return true;
2260 }
2261 }
2262
2263 if (symbolic_operand (operands[1], mode))
2264 {
2265 operands[1]
2266 = sparc_legitimize_pic_address (operands[1],
2267 reload_in_progress
2268 ? operands[0] : NULL_RTX);
2269 return false;
2270 }
2271 }
2272
2273 /* If we are trying to toss an integer constant into FP registers,
2274 or loading a FP or vector constant, force it into memory. */
2275 if (CONSTANT_P (operands[1])
2276 && REG_P (operands[0])
2277 && (SPARC_FP_REG_P (REGNO (operands[0]))
2278 || SCALAR_FLOAT_MODE_P (mode)
2279 || VECTOR_MODE_P (mode)))
2280 {
2281 /* emit_group_store will send such bogosity to us when it is
2282 not storing directly into memory. So fix this up to avoid
2283 crashes in output_constant_pool. */
2284 if (operands [1] == const0_rtx)
2285 operands[1] = CONST0_RTX (mode);
2286
2287 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2288 always other regs. */
2289 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2290 && (const_zero_operand (operands[1], mode)
2291 || const_all_ones_operand (operands[1], mode)))
2292 return false;
2293
2294 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2295 /* We are able to build any SF constant in integer registers
2296 with at most 2 instructions. */
2297 && (mode == SFmode
2298 /* And any DF constant in integer registers if needed. */
2299 || (mode == DFmode && !can_create_pseudo_p ())))
2300 return false;
2301
2302 operands[1] = force_const_mem (mode, operands[1]);
2303 if (!reload_in_progress)
2304 operands[1] = validize_mem (operands[1]);
2305 return false;
2306 }
2307
2308 /* Accept non-constants and valid constants unmodified. */
2309 if (!CONSTANT_P (operands[1])
2310 || GET_CODE (operands[1]) == HIGH
2311 || input_operand (operands[1], mode))
2312 return false;
2313
2314 switch (mode)
2315 {
2316 case E_QImode:
2317 /* All QImode constants require only one insn, so proceed. */
2318 break;
2319
2320 case E_HImode:
2321 case E_SImode:
2322 sparc_emit_set_const32 (operands[0], operands[1]);
2323 return true;
2324
2325 case E_DImode:
2326 /* input_operand should have filtered out 32-bit mode. */
2327 sparc_emit_set_const64 (operands[0], operands[1]);
2328 return true;
2329
2330 case E_TImode:
2331 {
2332 rtx high, low;
2333 /* TImode isn't available in 32-bit mode. */
2334 split_double (operands[1], &high, &low);
2335 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2336 high));
2337 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2338 low));
2339 }
2340 return true;
2341
2342 default:
2343 gcc_unreachable ();
2344 }
2345
2346 return false;
2347 }
2348
2349 /* Load OP1, a 32-bit constant, into OP0, a register.
2350 We know it can't be done in one insn when we get
2351 here, the move expander guarantees this. */
2352
2353 static void
2354 sparc_emit_set_const32 (rtx op0, rtx op1)
2355 {
2356 machine_mode mode = GET_MODE (op0);
2357 rtx temp = op0;
2358
2359 if (can_create_pseudo_p ())
2360 temp = gen_reg_rtx (mode);
2361
2362 if (GET_CODE (op1) == CONST_INT)
2363 {
2364 gcc_assert (!small_int_operand (op1, mode)
2365 && !const_high_operand (op1, mode));
2366
2367 /* Emit them as real moves instead of a HIGH/LO_SUM,
2368 this way CSE can see everything and reuse intermediate
2369 values if it wants. */
2370 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2371 & ~(HOST_WIDE_INT) 0x3ff)));
2372
2373 emit_insn (gen_rtx_SET (op0,
2374 gen_rtx_IOR (mode, temp,
2375 GEN_INT (INTVAL (op1) & 0x3ff))));
2376 }
2377 else
2378 {
2379 /* A symbol, emit in the traditional way. */
2380 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2381 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2382 }
2383 }
2384
2385 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2386 If TEMP is nonzero, we are forbidden to use any other scratch
2387 registers. Otherwise, we are allowed to generate them as needed.
2388
2389 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2390 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2391
2392 void
2393 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2394 {
2395 rtx cst, temp1, temp2, temp3, temp4, temp5;
2396 rtx ti_temp = 0;
2397
2398 /* Deal with too large offsets. */
2399 if (GET_CODE (op1) == CONST
2400 && GET_CODE (XEXP (op1, 0)) == PLUS
2401 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2402 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2403 {
2404 gcc_assert (!temp);
2405 temp1 = gen_reg_rtx (DImode);
2406 temp2 = gen_reg_rtx (DImode);
2407 sparc_emit_set_const64 (temp2, cst);
2408 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2409 NULL_RTX);
2410 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2411 return;
2412 }
2413
2414 if (temp && GET_MODE (temp) == TImode)
2415 {
2416 ti_temp = temp;
2417 temp = gen_rtx_REG (DImode, REGNO (temp));
2418 }
2419
2420 /* SPARC-V9 code-model support. */
2421 switch (sparc_cmodel)
2422 {
2423 case CM_MEDLOW:
2424 /* The range spanned by all instructions in the object is less
2425 than 2^31 bytes (2GB) and the distance from any instruction
2426 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2427 than 2^31 bytes (2GB).
2428
2429 The executable must be in the low 4TB of the virtual address
2430 space.
2431
2432 sethi %hi(symbol), %temp1
2433 or %temp1, %lo(symbol), %reg */
2434 if (temp)
2435 temp1 = temp; /* op0 is allowed. */
2436 else
2437 temp1 = gen_reg_rtx (DImode);
2438
2439 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2440 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2441 break;
2442
2443 case CM_MEDMID:
2444 /* The range spanned by all instructions in the object is less
2445 than 2^31 bytes (2GB) and the distance from any instruction
2446 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2447 than 2^31 bytes (2GB).
2448
2449 The executable must be in the low 16TB of the virtual address
2450 space.
2451
2452 sethi %h44(symbol), %temp1
2453 or %temp1, %m44(symbol), %temp2
2454 sllx %temp2, 12, %temp3
2455 or %temp3, %l44(symbol), %reg */
2456 if (temp)
2457 {
2458 temp1 = op0;
2459 temp2 = op0;
2460 temp3 = temp; /* op0 is allowed. */
2461 }
2462 else
2463 {
2464 temp1 = gen_reg_rtx (DImode);
2465 temp2 = gen_reg_rtx (DImode);
2466 temp3 = gen_reg_rtx (DImode);
2467 }
2468
2469 emit_insn (gen_seth44 (temp1, op1));
2470 emit_insn (gen_setm44 (temp2, temp1, op1));
2471 emit_insn (gen_rtx_SET (temp3,
2472 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2473 emit_insn (gen_setl44 (op0, temp3, op1));
2474 break;
2475
2476 case CM_MEDANY:
2477 /* The range spanned by all instructions in the object is less
2478 than 2^31 bytes (2GB) and the distance from any instruction
2479 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2480 than 2^31 bytes (2GB).
2481
2482 The executable can be placed anywhere in the virtual address
2483 space.
2484
2485 sethi %hh(symbol), %temp1
2486 sethi %lm(symbol), %temp2
2487 or %temp1, %hm(symbol), %temp3
2488 sllx %temp3, 32, %temp4
2489 or %temp4, %temp2, %temp5
2490 or %temp5, %lo(symbol), %reg */
2491 if (temp)
2492 {
2493 /* It is possible that one of the registers we got for operands[2]
2494 might coincide with that of operands[0] (which is why we made
2495 it TImode). Pick the other one to use as our scratch. */
2496 if (rtx_equal_p (temp, op0))
2497 {
2498 gcc_assert (ti_temp);
2499 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2500 }
2501 temp1 = op0;
2502 temp2 = temp; /* op0 is _not_ allowed, see above. */
2503 temp3 = op0;
2504 temp4 = op0;
2505 temp5 = op0;
2506 }
2507 else
2508 {
2509 temp1 = gen_reg_rtx (DImode);
2510 temp2 = gen_reg_rtx (DImode);
2511 temp3 = gen_reg_rtx (DImode);
2512 temp4 = gen_reg_rtx (DImode);
2513 temp5 = gen_reg_rtx (DImode);
2514 }
2515
2516 emit_insn (gen_sethh (temp1, op1));
2517 emit_insn (gen_setlm (temp2, op1));
2518 emit_insn (gen_sethm (temp3, temp1, op1));
2519 emit_insn (gen_rtx_SET (temp4,
2520 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2521 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2522 emit_insn (gen_setlo (op0, temp5, op1));
2523 break;
2524
2525 case CM_EMBMEDANY:
2526 /* Old old old backwards compatibility kruft here.
2527 Essentially it is MEDLOW with a fixed 64-bit
2528 virtual base added to all data segment addresses.
2529 Text-segment stuff is computed like MEDANY, we can't
2530 reuse the code above because the relocation knobs
2531 look different.
2532
2533 Data segment: sethi %hi(symbol), %temp1
2534 add %temp1, EMBMEDANY_BASE_REG, %temp2
2535 or %temp2, %lo(symbol), %reg */
2536 if (data_segment_operand (op1, GET_MODE (op1)))
2537 {
2538 if (temp)
2539 {
2540 temp1 = temp; /* op0 is allowed. */
2541 temp2 = op0;
2542 }
2543 else
2544 {
2545 temp1 = gen_reg_rtx (DImode);
2546 temp2 = gen_reg_rtx (DImode);
2547 }
2548
2549 emit_insn (gen_embmedany_sethi (temp1, op1));
2550 emit_insn (gen_embmedany_brsum (temp2, temp1));
2551 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2552 }
2553
2554 /* Text segment: sethi %uhi(symbol), %temp1
2555 sethi %hi(symbol), %temp2
2556 or %temp1, %ulo(symbol), %temp3
2557 sllx %temp3, 32, %temp4
2558 or %temp4, %temp2, %temp5
2559 or %temp5, %lo(symbol), %reg */
2560 else
2561 {
2562 if (temp)
2563 {
2564 /* It is possible that one of the registers we got for operands[2]
2565 might coincide with that of operands[0] (which is why we made
2566 it TImode). Pick the other one to use as our scratch. */
2567 if (rtx_equal_p (temp, op0))
2568 {
2569 gcc_assert (ti_temp);
2570 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2571 }
2572 temp1 = op0;
2573 temp2 = temp; /* op0 is _not_ allowed, see above. */
2574 temp3 = op0;
2575 temp4 = op0;
2576 temp5 = op0;
2577 }
2578 else
2579 {
2580 temp1 = gen_reg_rtx (DImode);
2581 temp2 = gen_reg_rtx (DImode);
2582 temp3 = gen_reg_rtx (DImode);
2583 temp4 = gen_reg_rtx (DImode);
2584 temp5 = gen_reg_rtx (DImode);
2585 }
2586
2587 emit_insn (gen_embmedany_textuhi (temp1, op1));
2588 emit_insn (gen_embmedany_texthi (temp2, op1));
2589 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2590 emit_insn (gen_rtx_SET (temp4,
2591 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2592 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2593 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2594 }
2595 break;
2596
2597 default:
2598 gcc_unreachable ();
2599 }
2600 }
2601
2602 /* These avoid problems when cross compiling. If we do not
2603 go through all this hair then the optimizer will see
2604 invalid REG_EQUAL notes or in some cases none at all. */
2605 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2606 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2607 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2608 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2609
2610 /* The optimizer is not to assume anything about exactly
2611 which bits are set for a HIGH, they are unspecified.
2612 Unfortunately this leads to many missed optimizations
2613 during CSE. We mask out the non-HIGH bits, and matches
2614 a plain movdi, to alleviate this problem. */
2615 static rtx
2616 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2617 {
2618 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2619 }
2620
2621 static rtx
2622 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2623 {
2624 return gen_rtx_SET (dest, GEN_INT (val));
2625 }
2626
2627 static rtx
2628 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2629 {
2630 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2631 }
2632
2633 static rtx
2634 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2635 {
2636 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2637 }
2638
2639 /* Worker routines for 64-bit constant formation on arch64.
2640 One of the key things to be doing in these emissions is
2641 to create as many temp REGs as possible. This makes it
2642 possible for half-built constants to be used later when
2643 such values are similar to something required later on.
2644 Without doing this, the optimizer cannot see such
2645 opportunities. */
2646
2647 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2648 unsigned HOST_WIDE_INT, int);
2649
2650 static void
2651 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2652 unsigned HOST_WIDE_INT low_bits, int is_neg)
2653 {
2654 unsigned HOST_WIDE_INT high_bits;
2655
2656 if (is_neg)
2657 high_bits = (~low_bits) & 0xffffffff;
2658 else
2659 high_bits = low_bits;
2660
2661 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2662 if (!is_neg)
2663 {
2664 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2665 }
2666 else
2667 {
2668 /* If we are XOR'ing with -1, then we should emit a one's complement
2669 instead. This way the combiner will notice logical operations
2670 such as ANDN later on and substitute. */
2671 if ((low_bits & 0x3ff) == 0x3ff)
2672 {
2673 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2674 }
2675 else
2676 {
2677 emit_insn (gen_rtx_SET (op0,
2678 gen_safe_XOR64 (temp,
2679 (-(HOST_WIDE_INT)0x400
2680 | (low_bits & 0x3ff)))));
2681 }
2682 }
2683 }
2684
2685 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2686 unsigned HOST_WIDE_INT, int);
2687
2688 static void
2689 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2690 unsigned HOST_WIDE_INT high_bits,
2691 unsigned HOST_WIDE_INT low_immediate,
2692 int shift_count)
2693 {
2694 rtx temp2 = op0;
2695
2696 if ((high_bits & 0xfffffc00) != 0)
2697 {
2698 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2699 if ((high_bits & ~0xfffffc00) != 0)
2700 emit_insn (gen_rtx_SET (op0,
2701 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2702 else
2703 temp2 = temp;
2704 }
2705 else
2706 {
2707 emit_insn (gen_safe_SET64 (temp, high_bits));
2708 temp2 = temp;
2709 }
2710
2711 /* Now shift it up into place. */
2712 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2713 GEN_INT (shift_count))));
2714
2715 /* If there is a low immediate part piece, finish up by
2716 putting that in as well. */
2717 if (low_immediate != 0)
2718 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2719 }
2720
2721 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2722 unsigned HOST_WIDE_INT);
2723
2724 /* Full 64-bit constant decomposition. Even though this is the
2725 'worst' case, we still optimize a few things away. */
2726 static void
2727 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2728 unsigned HOST_WIDE_INT high_bits,
2729 unsigned HOST_WIDE_INT low_bits)
2730 {
2731 rtx sub_temp = op0;
2732
2733 if (can_create_pseudo_p ())
2734 sub_temp = gen_reg_rtx (DImode);
2735
2736 if ((high_bits & 0xfffffc00) != 0)
2737 {
2738 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2739 if ((high_bits & ~0xfffffc00) != 0)
2740 emit_insn (gen_rtx_SET (sub_temp,
2741 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2742 else
2743 sub_temp = temp;
2744 }
2745 else
2746 {
2747 emit_insn (gen_safe_SET64 (temp, high_bits));
2748 sub_temp = temp;
2749 }
2750
2751 if (can_create_pseudo_p ())
2752 {
2753 rtx temp2 = gen_reg_rtx (DImode);
2754 rtx temp3 = gen_reg_rtx (DImode);
2755 rtx temp4 = gen_reg_rtx (DImode);
2756
2757 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2758 GEN_INT (32))));
2759
2760 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2761 if ((low_bits & ~0xfffffc00) != 0)
2762 {
2763 emit_insn (gen_rtx_SET (temp3,
2764 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2765 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2766 }
2767 else
2768 {
2769 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2770 }
2771 }
2772 else
2773 {
2774 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2775 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2776 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2777 int to_shift = 12;
2778
2779 /* We are in the middle of reload, so this is really
2780 painful. However we do still make an attempt to
2781 avoid emitting truly stupid code. */
2782 if (low1 != const0_rtx)
2783 {
2784 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2785 GEN_INT (to_shift))));
2786 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2787 sub_temp = op0;
2788 to_shift = 12;
2789 }
2790 else
2791 {
2792 to_shift += 12;
2793 }
2794 if (low2 != const0_rtx)
2795 {
2796 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2797 GEN_INT (to_shift))));
2798 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2799 sub_temp = op0;
2800 to_shift = 8;
2801 }
2802 else
2803 {
2804 to_shift += 8;
2805 }
2806 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2807 GEN_INT (to_shift))));
2808 if (low3 != const0_rtx)
2809 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2810 /* phew... */
2811 }
2812 }
2813
2814 /* Analyze a 64-bit constant for certain properties. */
2815 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2816 unsigned HOST_WIDE_INT,
2817 int *, int *, int *);
2818
2819 static void
2820 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2821 unsigned HOST_WIDE_INT low_bits,
2822 int *hbsp, int *lbsp, int *abbasp)
2823 {
2824 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2825 int i;
2826
2827 lowest_bit_set = highest_bit_set = -1;
2828 i = 0;
2829 do
2830 {
2831 if ((lowest_bit_set == -1)
2832 && ((low_bits >> i) & 1))
2833 lowest_bit_set = i;
2834 if ((highest_bit_set == -1)
2835 && ((high_bits >> (32 - i - 1)) & 1))
2836 highest_bit_set = (64 - i - 1);
2837 }
2838 while (++i < 32
2839 && ((highest_bit_set == -1)
2840 || (lowest_bit_set == -1)));
2841 if (i == 32)
2842 {
2843 i = 0;
2844 do
2845 {
2846 if ((lowest_bit_set == -1)
2847 && ((high_bits >> i) & 1))
2848 lowest_bit_set = i + 32;
2849 if ((highest_bit_set == -1)
2850 && ((low_bits >> (32 - i - 1)) & 1))
2851 highest_bit_set = 32 - i - 1;
2852 }
2853 while (++i < 32
2854 && ((highest_bit_set == -1)
2855 || (lowest_bit_set == -1)));
2856 }
2857 /* If there are no bits set this should have gone out
2858 as one instruction! */
2859 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2860 all_bits_between_are_set = 1;
2861 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2862 {
2863 if (i < 32)
2864 {
2865 if ((low_bits & (1 << i)) != 0)
2866 continue;
2867 }
2868 else
2869 {
2870 if ((high_bits & (1 << (i - 32))) != 0)
2871 continue;
2872 }
2873 all_bits_between_are_set = 0;
2874 break;
2875 }
2876 *hbsp = highest_bit_set;
2877 *lbsp = lowest_bit_set;
2878 *abbasp = all_bits_between_are_set;
2879 }
2880
2881 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2882
2883 static int
2884 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2885 unsigned HOST_WIDE_INT low_bits)
2886 {
2887 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2888
2889 if (high_bits == 0
2890 || high_bits == 0xffffffff)
2891 return 1;
2892
2893 analyze_64bit_constant (high_bits, low_bits,
2894 &highest_bit_set, &lowest_bit_set,
2895 &all_bits_between_are_set);
2896
2897 if ((highest_bit_set == 63
2898 || lowest_bit_set == 0)
2899 && all_bits_between_are_set != 0)
2900 return 1;
2901
2902 if ((highest_bit_set - lowest_bit_set) < 21)
2903 return 1;
2904
2905 return 0;
2906 }
2907
2908 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2909 unsigned HOST_WIDE_INT,
2910 int, int);
2911
2912 static unsigned HOST_WIDE_INT
2913 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2914 unsigned HOST_WIDE_INT low_bits,
2915 int lowest_bit_set, int shift)
2916 {
2917 HOST_WIDE_INT hi, lo;
2918
2919 if (lowest_bit_set < 32)
2920 {
2921 lo = (low_bits >> lowest_bit_set) << shift;
2922 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2923 }
2924 else
2925 {
2926 lo = 0;
2927 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2928 }
2929 gcc_assert (! (hi & lo));
2930 return (hi | lo);
2931 }
2932
2933 /* Here we are sure to be arch64 and this is an integer constant
2934 being loaded into a register. Emit the most efficient
2935 insn sequence possible. Detection of all the 1-insn cases
2936 has been done already. */
2937 static void
2938 sparc_emit_set_const64 (rtx op0, rtx op1)
2939 {
2940 unsigned HOST_WIDE_INT high_bits, low_bits;
2941 int lowest_bit_set, highest_bit_set;
2942 int all_bits_between_are_set;
2943 rtx temp = 0;
2944
2945 /* Sanity check that we know what we are working with. */
2946 gcc_assert (TARGET_ARCH64
2947 && (GET_CODE (op0) == SUBREG
2948 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2949
2950 if (! can_create_pseudo_p ())
2951 temp = op0;
2952
2953 if (GET_CODE (op1) != CONST_INT)
2954 {
2955 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2956 return;
2957 }
2958
2959 if (! temp)
2960 temp = gen_reg_rtx (DImode);
2961
2962 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2963 low_bits = (INTVAL (op1) & 0xffffffff);
2964
2965 /* low_bits bits 0 --> 31
2966 high_bits bits 32 --> 63 */
2967
2968 analyze_64bit_constant (high_bits, low_bits,
2969 &highest_bit_set, &lowest_bit_set,
2970 &all_bits_between_are_set);
2971
2972 /* First try for a 2-insn sequence. */
2973
2974 /* These situations are preferred because the optimizer can
2975 * do more things with them:
2976 * 1) mov -1, %reg
2977 * sllx %reg, shift, %reg
2978 * 2) mov -1, %reg
2979 * srlx %reg, shift, %reg
2980 * 3) mov some_small_const, %reg
2981 * sllx %reg, shift, %reg
2982 */
2983 if (((highest_bit_set == 63
2984 || lowest_bit_set == 0)
2985 && all_bits_between_are_set != 0)
2986 || ((highest_bit_set - lowest_bit_set) < 12))
2987 {
2988 HOST_WIDE_INT the_const = -1;
2989 int shift = lowest_bit_set;
2990
2991 if ((highest_bit_set != 63
2992 && lowest_bit_set != 0)
2993 || all_bits_between_are_set == 0)
2994 {
2995 the_const =
2996 create_simple_focus_bits (high_bits, low_bits,
2997 lowest_bit_set, 0);
2998 }
2999 else if (lowest_bit_set == 0)
3000 shift = -(63 - highest_bit_set);
3001
3002 gcc_assert (SPARC_SIMM13_P (the_const));
3003 gcc_assert (shift != 0);
3004
3005 emit_insn (gen_safe_SET64 (temp, the_const));
3006 if (shift > 0)
3007 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3008 GEN_INT (shift))));
3009 else if (shift < 0)
3010 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3011 GEN_INT (-shift))));
3012 return;
3013 }
3014
3015 /* Now a range of 22 or less bits set somewhere.
3016 * 1) sethi %hi(focus_bits), %reg
3017 * sllx %reg, shift, %reg
3018 * 2) sethi %hi(focus_bits), %reg
3019 * srlx %reg, shift, %reg
3020 */
3021 if ((highest_bit_set - lowest_bit_set) < 21)
3022 {
3023 unsigned HOST_WIDE_INT focus_bits =
3024 create_simple_focus_bits (high_bits, low_bits,
3025 lowest_bit_set, 10);
3026
3027 gcc_assert (SPARC_SETHI_P (focus_bits));
3028 gcc_assert (lowest_bit_set != 10);
3029
3030 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3031
3032 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3033 if (lowest_bit_set < 10)
3034 emit_insn (gen_rtx_SET (op0,
3035 gen_rtx_LSHIFTRT (DImode, temp,
3036 GEN_INT (10 - lowest_bit_set))));
3037 else if (lowest_bit_set > 10)
3038 emit_insn (gen_rtx_SET (op0,
3039 gen_rtx_ASHIFT (DImode, temp,
3040 GEN_INT (lowest_bit_set - 10))));
3041 return;
3042 }
3043
3044 /* 1) sethi %hi(low_bits), %reg
3045 * or %reg, %lo(low_bits), %reg
3046 * 2) sethi %hi(~low_bits), %reg
3047 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3048 */
3049 if (high_bits == 0
3050 || high_bits == 0xffffffff)
3051 {
3052 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3053 (high_bits == 0xffffffff));
3054 return;
3055 }
3056
3057 /* Now, try 3-insn sequences. */
3058
3059 /* 1) sethi %hi(high_bits), %reg
3060 * or %reg, %lo(high_bits), %reg
3061 * sllx %reg, 32, %reg
3062 */
3063 if (low_bits == 0)
3064 {
3065 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3066 return;
3067 }
3068
3069 /* We may be able to do something quick
3070 when the constant is negated, so try that. */
3071 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3072 (~low_bits) & 0xfffffc00))
3073 {
3074 /* NOTE: The trailing bits get XOR'd so we need the
3075 non-negated bits, not the negated ones. */
3076 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3077
3078 if ((((~high_bits) & 0xffffffff) == 0
3079 && ((~low_bits) & 0x80000000) == 0)
3080 || (((~high_bits) & 0xffffffff) == 0xffffffff
3081 && ((~low_bits) & 0x80000000) != 0))
3082 {
3083 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3084
3085 if ((SPARC_SETHI_P (fast_int)
3086 && (~high_bits & 0xffffffff) == 0)
3087 || SPARC_SIMM13_P (fast_int))
3088 emit_insn (gen_safe_SET64 (temp, fast_int));
3089 else
3090 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3091 }
3092 else
3093 {
3094 rtx negated_const;
3095 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3096 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3097 sparc_emit_set_const64 (temp, negated_const);
3098 }
3099
3100 /* If we are XOR'ing with -1, then we should emit a one's complement
3101 instead. This way the combiner will notice logical operations
3102 such as ANDN later on and substitute. */
3103 if (trailing_bits == 0x3ff)
3104 {
3105 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3106 }
3107 else
3108 {
3109 emit_insn (gen_rtx_SET (op0,
3110 gen_safe_XOR64 (temp,
3111 (-0x400 | trailing_bits))));
3112 }
3113 return;
3114 }
3115
3116 /* 1) sethi %hi(xxx), %reg
3117 * or %reg, %lo(xxx), %reg
3118 * sllx %reg, yyy, %reg
3119 *
3120 * ??? This is just a generalized version of the low_bits==0
3121 * thing above, FIXME...
3122 */
3123 if ((highest_bit_set - lowest_bit_set) < 32)
3124 {
3125 unsigned HOST_WIDE_INT focus_bits =
3126 create_simple_focus_bits (high_bits, low_bits,
3127 lowest_bit_set, 0);
3128
3129 /* We can't get here in this state. */
3130 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3131
3132 /* So what we know is that the set bits straddle the
3133 middle of the 64-bit word. */
3134 sparc_emit_set_const64_quick2 (op0, temp,
3135 focus_bits, 0,
3136 lowest_bit_set);
3137 return;
3138 }
3139
3140 /* 1) sethi %hi(high_bits), %reg
3141 * or %reg, %lo(high_bits), %reg
3142 * sllx %reg, 32, %reg
3143 * or %reg, low_bits, %reg
3144 */
3145 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3146 {
3147 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3148 return;
3149 }
3150
3151 /* The easiest way when all else fails, is full decomposition. */
3152 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3153 }
3154
3155 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3156
3157 static bool
3158 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3159 {
3160 *p1 = SPARC_ICC_REG;
3161 *p2 = SPARC_FCC_REG;
3162 return true;
3163 }
3164
3165 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3166
3167 static unsigned int
3168 sparc_min_arithmetic_precision (void)
3169 {
3170 return 32;
3171 }
3172
3173 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3174 return the mode to be used for the comparison. For floating-point,
3175 CCFP[E]mode is used. CCNZmode should be used when the first operand
3176 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3177 processing is needed. */
3178
3179 machine_mode
3180 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3181 {
3182 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3183 {
3184 switch (op)
3185 {
3186 case EQ:
3187 case NE:
3188 case UNORDERED:
3189 case ORDERED:
3190 case UNLT:
3191 case UNLE:
3192 case UNGT:
3193 case UNGE:
3194 case UNEQ:
3195 case LTGT:
3196 return CCFPmode;
3197
3198 case LT:
3199 case LE:
3200 case GT:
3201 case GE:
3202 return CCFPEmode;
3203
3204 default:
3205 gcc_unreachable ();
3206 }
3207 }
3208 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3209 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3210 && y == const0_rtx)
3211 {
3212 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3213 return CCXNZmode;
3214 else
3215 return CCNZmode;
3216 }
3217 else
3218 {
3219 /* This is for the cmp<mode>_sne pattern. */
3220 if (GET_CODE (x) == NOT && y == constm1_rtx)
3221 {
3222 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3223 return CCXCmode;
3224 else
3225 return CCCmode;
3226 }
3227
3228 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3229 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3230 {
3231 if (GET_CODE (y) == UNSPEC
3232 && (XINT (y, 1) == UNSPEC_ADDV
3233 || XINT (y, 1) == UNSPEC_SUBV
3234 || XINT (y, 1) == UNSPEC_NEGV))
3235 return CCVmode;
3236 else
3237 return CCCmode;
3238 }
3239
3240 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3241 return CCXmode;
3242 else
3243 return CCmode;
3244 }
3245 }
3246
3247 /* Emit the compare insn and return the CC reg for a CODE comparison
3248 with operands X and Y. */
3249
3250 static rtx
3251 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3252 {
3253 machine_mode mode;
3254 rtx cc_reg;
3255
3256 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3257 return x;
3258
3259 mode = SELECT_CC_MODE (code, x, y);
3260
3261 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3262 fcc regs (cse can't tell they're really call clobbered regs and will
3263 remove a duplicate comparison even if there is an intervening function
3264 call - it will then try to reload the cc reg via an int reg which is why
3265 we need the movcc patterns). It is possible to provide the movcc
3266 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3267 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3268 to tell cse that CCFPE mode registers (even pseudos) are call
3269 clobbered. */
3270
3271 /* ??? This is an experiment. Rather than making changes to cse which may
3272 or may not be easy/clean, we do our own cse. This is possible because
3273 we will generate hard registers. Cse knows they're call clobbered (it
3274 doesn't know the same thing about pseudos). If we guess wrong, no big
3275 deal, but if we win, great! */
3276
3277 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3278 #if 1 /* experiment */
3279 {
3280 int reg;
3281 /* We cycle through the registers to ensure they're all exercised. */
3282 static int next_fcc_reg = 0;
3283 /* Previous x,y for each fcc reg. */
3284 static rtx prev_args[4][2];
3285
3286 /* Scan prev_args for x,y. */
3287 for (reg = 0; reg < 4; reg++)
3288 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3289 break;
3290 if (reg == 4)
3291 {
3292 reg = next_fcc_reg;
3293 prev_args[reg][0] = x;
3294 prev_args[reg][1] = y;
3295 next_fcc_reg = (next_fcc_reg + 1) & 3;
3296 }
3297 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3298 }
3299 #else
3300 cc_reg = gen_reg_rtx (mode);
3301 #endif /* ! experiment */
3302 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3303 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3304 else
3305 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3306
3307 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3308 will only result in an unrecognizable insn so no point in asserting. */
3309 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3310
3311 return cc_reg;
3312 }
3313
3314
3315 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3316
3317 rtx
3318 gen_compare_reg (rtx cmp)
3319 {
3320 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3321 }
3322
3323 /* This function is used for v9 only.
3324 DEST is the target of the Scc insn.
3325 CODE is the code for an Scc's comparison.
3326 X and Y are the values we compare.
3327
3328 This function is needed to turn
3329
3330 (set (reg:SI 110)
3331 (gt (reg:CCX 100 %icc)
3332 (const_int 0)))
3333 into
3334 (set (reg:SI 110)
3335 (gt:DI (reg:CCX 100 %icc)
3336 (const_int 0)))
3337
3338 IE: The instruction recognizer needs to see the mode of the comparison to
3339 find the right instruction. We could use "gt:DI" right in the
3340 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3341
3342 static int
3343 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3344 {
3345 if (! TARGET_ARCH64
3346 && (GET_MODE (x) == DImode
3347 || GET_MODE (dest) == DImode))
3348 return 0;
3349
3350 /* Try to use the movrCC insns. */
3351 if (TARGET_ARCH64
3352 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3353 && y == const0_rtx
3354 && v9_regcmp_p (compare_code))
3355 {
3356 rtx op0 = x;
3357 rtx temp;
3358
3359 /* Special case for op0 != 0. This can be done with one instruction if
3360 dest == x. */
3361
3362 if (compare_code == NE
3363 && GET_MODE (dest) == DImode
3364 && rtx_equal_p (op0, dest))
3365 {
3366 emit_insn (gen_rtx_SET (dest,
3367 gen_rtx_IF_THEN_ELSE (DImode,
3368 gen_rtx_fmt_ee (compare_code, DImode,
3369 op0, const0_rtx),
3370 const1_rtx,
3371 dest)));
3372 return 1;
3373 }
3374
3375 if (reg_overlap_mentioned_p (dest, op0))
3376 {
3377 /* Handle the case where dest == x.
3378 We "early clobber" the result. */
3379 op0 = gen_reg_rtx (GET_MODE (x));
3380 emit_move_insn (op0, x);
3381 }
3382
3383 emit_insn (gen_rtx_SET (dest, const0_rtx));
3384 if (GET_MODE (op0) != DImode)
3385 {
3386 temp = gen_reg_rtx (DImode);
3387 convert_move (temp, op0, 0);
3388 }
3389 else
3390 temp = op0;
3391 emit_insn (gen_rtx_SET (dest,
3392 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3393 gen_rtx_fmt_ee (compare_code, DImode,
3394 temp, const0_rtx),
3395 const1_rtx,
3396 dest)));
3397 return 1;
3398 }
3399 else
3400 {
3401 x = gen_compare_reg_1 (compare_code, x, y);
3402 y = const0_rtx;
3403
3404 emit_insn (gen_rtx_SET (dest, const0_rtx));
3405 emit_insn (gen_rtx_SET (dest,
3406 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3407 gen_rtx_fmt_ee (compare_code,
3408 GET_MODE (x), x, y),
3409 const1_rtx, dest)));
3410 return 1;
3411 }
3412 }
3413
3414
3415 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3416 without jumps using the addx/subx instructions. */
3417
3418 bool
3419 emit_scc_insn (rtx operands[])
3420 {
3421 rtx tem, x, y;
3422 enum rtx_code code;
3423 machine_mode mode;
3424
3425 /* The quad-word fp compare library routines all return nonzero to indicate
3426 true, which is different from the equivalent libgcc routines, so we must
3427 handle them specially here. */
3428 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3429 {
3430 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3431 GET_CODE (operands[1]));
3432 operands[2] = XEXP (operands[1], 0);
3433 operands[3] = XEXP (operands[1], 1);
3434 }
3435
3436 code = GET_CODE (operands[1]);
3437 x = operands[2];
3438 y = operands[3];
3439 mode = GET_MODE (x);
3440
3441 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3442 more applications). The exception to this is "reg != 0" which can
3443 be done in one instruction on v9 (so we do it). */
3444 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3445 {
3446 if (y != const0_rtx)
3447 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3448
3449 rtx pat = gen_rtx_SET (operands[0],
3450 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3451 x, const0_rtx));
3452
3453 /* If we can use addx/subx or addxc, add a clobber for CC. */
3454 if (mode == SImode || (code == NE && TARGET_VIS3))
3455 {
3456 rtx clobber
3457 = gen_rtx_CLOBBER (VOIDmode,
3458 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3459 SPARC_ICC_REG));
3460 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3461 }
3462
3463 emit_insn (pat);
3464 return true;
3465 }
3466
3467 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3468 if (TARGET_ARCH64
3469 && mode == DImode
3470 && !((code == LTU || code == GTU) && TARGET_VIS3)
3471 && gen_v9_scc (operands[0], code, x, y))
3472 return true;
3473
3474 /* We can do LTU and GEU using the addx/subx instructions too. And
3475 for GTU/LEU, if both operands are registers swap them and fall
3476 back to the easy case. */
3477 if (code == GTU || code == LEU)
3478 {
3479 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3480 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3481 {
3482 tem = x;
3483 x = y;
3484 y = tem;
3485 code = swap_condition (code);
3486 }
3487 }
3488
3489 if (code == LTU || code == GEU)
3490 {
3491 emit_insn (gen_rtx_SET (operands[0],
3492 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3493 gen_compare_reg_1 (code, x, y),
3494 const0_rtx)));
3495 return true;
3496 }
3497
3498 /* All the posibilities to use addx/subx based sequences has been
3499 exhausted, try for a 3 instruction sequence using v9 conditional
3500 moves. */
3501 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3502 return true;
3503
3504 /* Nope, do branches. */
3505 return false;
3506 }
3507
3508 /* Emit a conditional jump insn for the v9 architecture using comparison code
3509 CODE and jump target LABEL.
3510 This function exists to take advantage of the v9 brxx insns. */
3511
3512 static void
3513 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3514 {
3515 emit_jump_insn (gen_rtx_SET (pc_rtx,
3516 gen_rtx_IF_THEN_ELSE (VOIDmode,
3517 gen_rtx_fmt_ee (code, GET_MODE (op0),
3518 op0, const0_rtx),
3519 gen_rtx_LABEL_REF (VOIDmode, label),
3520 pc_rtx)));
3521 }
3522
3523 /* Emit a conditional jump insn for the UA2011 architecture using
3524 comparison code CODE and jump target LABEL. This function exists
3525 to take advantage of the UA2011 Compare and Branch insns. */
3526
3527 static void
3528 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3529 {
3530 rtx if_then_else;
3531
3532 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3533 gen_rtx_fmt_ee(code, GET_MODE(op0),
3534 op0, op1),
3535 gen_rtx_LABEL_REF (VOIDmode, label),
3536 pc_rtx);
3537
3538 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3539 }
3540
3541 void
3542 emit_conditional_branch_insn (rtx operands[])
3543 {
3544 /* The quad-word fp compare library routines all return nonzero to indicate
3545 true, which is different from the equivalent libgcc routines, so we must
3546 handle them specially here. */
3547 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3548 {
3549 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3550 GET_CODE (operands[0]));
3551 operands[1] = XEXP (operands[0], 0);
3552 operands[2] = XEXP (operands[0], 1);
3553 }
3554
3555 /* If we can tell early on that the comparison is against a constant
3556 that won't fit in the 5-bit signed immediate field of a cbcond,
3557 use one of the other v9 conditional branch sequences. */
3558 if (TARGET_CBCOND
3559 && GET_CODE (operands[1]) == REG
3560 && (GET_MODE (operands[1]) == SImode
3561 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3562 && (GET_CODE (operands[2]) != CONST_INT
3563 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3564 {
3565 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3566 return;
3567 }
3568
3569 if (TARGET_ARCH64 && operands[2] == const0_rtx
3570 && GET_CODE (operands[1]) == REG
3571 && GET_MODE (operands[1]) == DImode)
3572 {
3573 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3574 return;
3575 }
3576
3577 operands[1] = gen_compare_reg (operands[0]);
3578 operands[2] = const0_rtx;
3579 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3580 operands[1], operands[2]);
3581 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3582 operands[3]));
3583 }
3584
3585
3586 /* Generate a DFmode part of a hard TFmode register.
3587 REG is the TFmode hard register, LOW is 1 for the
3588 low 64bit of the register and 0 otherwise.
3589 */
3590 rtx
3591 gen_df_reg (rtx reg, int low)
3592 {
3593 int regno = REGNO (reg);
3594
3595 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3596 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3597 return gen_rtx_REG (DFmode, regno);
3598 }
3599 \f
3600 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3601 Unlike normal calls, TFmode operands are passed by reference. It is
3602 assumed that no more than 3 operands are required. */
3603
3604 static void
3605 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3606 {
3607 rtx ret_slot = NULL, arg[3], func_sym;
3608 int i;
3609
3610 /* We only expect to be called for conversions, unary, and binary ops. */
3611 gcc_assert (nargs == 2 || nargs == 3);
3612
3613 for (i = 0; i < nargs; ++i)
3614 {
3615 rtx this_arg = operands[i];
3616 rtx this_slot;
3617
3618 /* TFmode arguments and return values are passed by reference. */
3619 if (GET_MODE (this_arg) == TFmode)
3620 {
3621 int force_stack_temp;
3622
3623 force_stack_temp = 0;
3624 if (TARGET_BUGGY_QP_LIB && i == 0)
3625 force_stack_temp = 1;
3626
3627 if (GET_CODE (this_arg) == MEM
3628 && ! force_stack_temp)
3629 {
3630 tree expr = MEM_EXPR (this_arg);
3631 if (expr)
3632 mark_addressable (expr);
3633 this_arg = XEXP (this_arg, 0);
3634 }
3635 else if (CONSTANT_P (this_arg)
3636 && ! force_stack_temp)
3637 {
3638 this_slot = force_const_mem (TFmode, this_arg);
3639 this_arg = XEXP (this_slot, 0);
3640 }
3641 else
3642 {
3643 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3644
3645 /* Operand 0 is the return value. We'll copy it out later. */
3646 if (i > 0)
3647 emit_move_insn (this_slot, this_arg);
3648 else
3649 ret_slot = this_slot;
3650
3651 this_arg = XEXP (this_slot, 0);
3652 }
3653 }
3654
3655 arg[i] = this_arg;
3656 }
3657
3658 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3659
3660 if (GET_MODE (operands[0]) == TFmode)
3661 {
3662 if (nargs == 2)
3663 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3664 arg[0], GET_MODE (arg[0]),
3665 arg[1], GET_MODE (arg[1]));
3666 else
3667 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3668 arg[0], GET_MODE (arg[0]),
3669 arg[1], GET_MODE (arg[1]),
3670 arg[2], GET_MODE (arg[2]));
3671
3672 if (ret_slot)
3673 emit_move_insn (operands[0], ret_slot);
3674 }
3675 else
3676 {
3677 rtx ret;
3678
3679 gcc_assert (nargs == 2);
3680
3681 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3682 GET_MODE (operands[0]),
3683 arg[1], GET_MODE (arg[1]));
3684
3685 if (ret != operands[0])
3686 emit_move_insn (operands[0], ret);
3687 }
3688 }
3689
3690 /* Expand soft-float TFmode calls to sparc abi routines. */
3691
3692 static void
3693 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3694 {
3695 const char *func;
3696
3697 switch (code)
3698 {
3699 case PLUS:
3700 func = "_Qp_add";
3701 break;
3702 case MINUS:
3703 func = "_Qp_sub";
3704 break;
3705 case MULT:
3706 func = "_Qp_mul";
3707 break;
3708 case DIV:
3709 func = "_Qp_div";
3710 break;
3711 default:
3712 gcc_unreachable ();
3713 }
3714
3715 emit_soft_tfmode_libcall (func, 3, operands);
3716 }
3717
3718 static void
3719 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3720 {
3721 const char *func;
3722
3723 gcc_assert (code == SQRT);
3724 func = "_Qp_sqrt";
3725
3726 emit_soft_tfmode_libcall (func, 2, operands);
3727 }
3728
3729 static void
3730 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3731 {
3732 const char *func;
3733
3734 switch (code)
3735 {
3736 case FLOAT_EXTEND:
3737 switch (GET_MODE (operands[1]))
3738 {
3739 case E_SFmode:
3740 func = "_Qp_stoq";
3741 break;
3742 case E_DFmode:
3743 func = "_Qp_dtoq";
3744 break;
3745 default:
3746 gcc_unreachable ();
3747 }
3748 break;
3749
3750 case FLOAT_TRUNCATE:
3751 switch (GET_MODE (operands[0]))
3752 {
3753 case E_SFmode:
3754 func = "_Qp_qtos";
3755 break;
3756 case E_DFmode:
3757 func = "_Qp_qtod";
3758 break;
3759 default:
3760 gcc_unreachable ();
3761 }
3762 break;
3763
3764 case FLOAT:
3765 switch (GET_MODE (operands[1]))
3766 {
3767 case E_SImode:
3768 func = "_Qp_itoq";
3769 if (TARGET_ARCH64)
3770 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3771 break;
3772 case E_DImode:
3773 func = "_Qp_xtoq";
3774 break;
3775 default:
3776 gcc_unreachable ();
3777 }
3778 break;
3779
3780 case UNSIGNED_FLOAT:
3781 switch (GET_MODE (operands[1]))
3782 {
3783 case E_SImode:
3784 func = "_Qp_uitoq";
3785 if (TARGET_ARCH64)
3786 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3787 break;
3788 case E_DImode:
3789 func = "_Qp_uxtoq";
3790 break;
3791 default:
3792 gcc_unreachable ();
3793 }
3794 break;
3795
3796 case FIX:
3797 switch (GET_MODE (operands[0]))
3798 {
3799 case E_SImode:
3800 func = "_Qp_qtoi";
3801 break;
3802 case E_DImode:
3803 func = "_Qp_qtox";
3804 break;
3805 default:
3806 gcc_unreachable ();
3807 }
3808 break;
3809
3810 case UNSIGNED_FIX:
3811 switch (GET_MODE (operands[0]))
3812 {
3813 case E_SImode:
3814 func = "_Qp_qtoui";
3815 break;
3816 case E_DImode:
3817 func = "_Qp_qtoux";
3818 break;
3819 default:
3820 gcc_unreachable ();
3821 }
3822 break;
3823
3824 default:
3825 gcc_unreachable ();
3826 }
3827
3828 emit_soft_tfmode_libcall (func, 2, operands);
3829 }
3830
3831 /* Expand a hard-float tfmode operation. All arguments must be in
3832 registers. */
3833
3834 static void
3835 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3836 {
3837 rtx op, dest;
3838
3839 if (GET_RTX_CLASS (code) == RTX_UNARY)
3840 {
3841 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3842 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3843 }
3844 else
3845 {
3846 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3847 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3848 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3849 operands[1], operands[2]);
3850 }
3851
3852 if (register_operand (operands[0], VOIDmode))
3853 dest = operands[0];
3854 else
3855 dest = gen_reg_rtx (GET_MODE (operands[0]));
3856
3857 emit_insn (gen_rtx_SET (dest, op));
3858
3859 if (dest != operands[0])
3860 emit_move_insn (operands[0], dest);
3861 }
3862
3863 void
3864 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3865 {
3866 if (TARGET_HARD_QUAD)
3867 emit_hard_tfmode_operation (code, operands);
3868 else
3869 emit_soft_tfmode_binop (code, operands);
3870 }
3871
3872 void
3873 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3874 {
3875 if (TARGET_HARD_QUAD)
3876 emit_hard_tfmode_operation (code, operands);
3877 else
3878 emit_soft_tfmode_unop (code, operands);
3879 }
3880
3881 void
3882 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3883 {
3884 if (TARGET_HARD_QUAD)
3885 emit_hard_tfmode_operation (code, operands);
3886 else
3887 emit_soft_tfmode_cvt (code, operands);
3888 }
3889 \f
3890 /* Return nonzero if a branch/jump/call instruction will be emitting
3891 nop into its delay slot. */
3892
3893 int
3894 empty_delay_slot (rtx_insn *insn)
3895 {
3896 rtx seq;
3897
3898 /* If no previous instruction (should not happen), return true. */
3899 if (PREV_INSN (insn) == NULL)
3900 return 1;
3901
3902 seq = NEXT_INSN (PREV_INSN (insn));
3903 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3904 return 0;
3905
3906 return 1;
3907 }
3908
3909 /* Return nonzero if we should emit a nop after a cbcond instruction.
3910 The cbcond instruction does not have a delay slot, however there is
3911 a severe performance penalty if a control transfer appears right
3912 after a cbcond. Therefore we emit a nop when we detect this
3913 situation. */
3914
3915 int
3916 emit_cbcond_nop (rtx_insn *insn)
3917 {
3918 rtx next = next_active_insn (insn);
3919
3920 if (!next)
3921 return 1;
3922
3923 if (NONJUMP_INSN_P (next)
3924 && GET_CODE (PATTERN (next)) == SEQUENCE)
3925 next = XVECEXP (PATTERN (next), 0, 0);
3926 else if (CALL_P (next)
3927 && GET_CODE (PATTERN (next)) == PARALLEL)
3928 {
3929 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3930
3931 if (GET_CODE (delay) == RETURN)
3932 {
3933 /* It's a sibling call. Do not emit the nop if we're going
3934 to emit something other than the jump itself as the first
3935 instruction of the sibcall sequence. */
3936 if (sparc_leaf_function_p || TARGET_FLAT)
3937 return 0;
3938 }
3939 }
3940
3941 if (NONJUMP_INSN_P (next))
3942 return 0;
3943
3944 return 1;
3945 }
3946
3947 /* Return nonzero if TRIAL can go into the call delay slot. */
3948
3949 int
3950 eligible_for_call_delay (rtx_insn *trial)
3951 {
3952 rtx pat;
3953
3954 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3955 return 0;
3956
3957 /* Binutils allows
3958 call __tls_get_addr, %tgd_call (foo)
3959 add %l7, %o0, %o0, %tgd_add (foo)
3960 while Sun as/ld does not. */
3961 if (TARGET_GNU_TLS || !TARGET_TLS)
3962 return 1;
3963
3964 pat = PATTERN (trial);
3965
3966 /* We must reject tgd_add{32|64}, i.e.
3967 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3968 and tldm_add{32|64}, i.e.
3969 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3970 for Sun as/ld. */
3971 if (GET_CODE (pat) == SET
3972 && GET_CODE (SET_SRC (pat)) == PLUS)
3973 {
3974 rtx unspec = XEXP (SET_SRC (pat), 1);
3975
3976 if (GET_CODE (unspec) == UNSPEC
3977 && (XINT (unspec, 1) == UNSPEC_TLSGD
3978 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3979 return 0;
3980 }
3981
3982 return 1;
3983 }
3984
3985 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3986 instruction. RETURN_P is true if the v9 variant 'return' is to be
3987 considered in the test too.
3988
3989 TRIAL must be a SET whose destination is a REG appropriate for the
3990 'restore' instruction or, if RETURN_P is true, for the 'return'
3991 instruction. */
3992
3993 static int
3994 eligible_for_restore_insn (rtx trial, bool return_p)
3995 {
3996 rtx pat = PATTERN (trial);
3997 rtx src = SET_SRC (pat);
3998 bool src_is_freg = false;
3999 rtx src_reg;
4000
4001 /* Since we now can do moves between float and integer registers when
4002 VIS3 is enabled, we have to catch this case. We can allow such
4003 moves when doing a 'return' however. */
4004 src_reg = src;
4005 if (GET_CODE (src_reg) == SUBREG)
4006 src_reg = SUBREG_REG (src_reg);
4007 if (GET_CODE (src_reg) == REG
4008 && SPARC_FP_REG_P (REGNO (src_reg)))
4009 src_is_freg = true;
4010
4011 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4012 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4013 && arith_operand (src, GET_MODE (src))
4014 && ! src_is_freg)
4015 {
4016 if (TARGET_ARCH64)
4017 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4018 else
4019 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4020 }
4021
4022 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4023 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4024 && arith_double_operand (src, GET_MODE (src))
4025 && ! src_is_freg)
4026 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4027
4028 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4029 else if (! TARGET_FPU && register_operand (src, SFmode))
4030 return 1;
4031
4032 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4033 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4034 return 1;
4035
4036 /* If we have the 'return' instruction, anything that does not use
4037 local or output registers and can go into a delay slot wins. */
4038 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4039 return 1;
4040
4041 /* The 'restore src1,src2,dest' pattern for SImode. */
4042 else if (GET_CODE (src) == PLUS
4043 && register_operand (XEXP (src, 0), SImode)
4044 && arith_operand (XEXP (src, 1), SImode))
4045 return 1;
4046
4047 /* The 'restore src1,src2,dest' pattern for DImode. */
4048 else if (GET_CODE (src) == PLUS
4049 && register_operand (XEXP (src, 0), DImode)
4050 && arith_double_operand (XEXP (src, 1), DImode))
4051 return 1;
4052
4053 /* The 'restore src1,%lo(src2),dest' pattern. */
4054 else if (GET_CODE (src) == LO_SUM
4055 && ! TARGET_CM_MEDMID
4056 && ((register_operand (XEXP (src, 0), SImode)
4057 && immediate_operand (XEXP (src, 1), SImode))
4058 || (TARGET_ARCH64
4059 && register_operand (XEXP (src, 0), DImode)
4060 && immediate_operand (XEXP (src, 1), DImode))))
4061 return 1;
4062
4063 /* The 'restore src,src,dest' pattern. */
4064 else if (GET_CODE (src) == ASHIFT
4065 && (register_operand (XEXP (src, 0), SImode)
4066 || register_operand (XEXP (src, 0), DImode))
4067 && XEXP (src, 1) == const1_rtx)
4068 return 1;
4069
4070 return 0;
4071 }
4072
4073 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4074
4075 int
4076 eligible_for_return_delay (rtx_insn *trial)
4077 {
4078 int regno;
4079 rtx pat;
4080
4081 /* If the function uses __builtin_eh_return, the eh_return machinery
4082 occupies the delay slot. */
4083 if (crtl->calls_eh_return)
4084 return 0;
4085
4086 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4087 return 0;
4088
4089 /* In the case of a leaf or flat function, anything can go into the slot. */
4090 if (sparc_leaf_function_p || TARGET_FLAT)
4091 return 1;
4092
4093 if (!NONJUMP_INSN_P (trial))
4094 return 0;
4095
4096 pat = PATTERN (trial);
4097 if (GET_CODE (pat) == PARALLEL)
4098 {
4099 int i;
4100
4101 if (! TARGET_V9)
4102 return 0;
4103 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4104 {
4105 rtx expr = XVECEXP (pat, 0, i);
4106 if (GET_CODE (expr) != SET)
4107 return 0;
4108 if (GET_CODE (SET_DEST (expr)) != REG)
4109 return 0;
4110 regno = REGNO (SET_DEST (expr));
4111 if (regno >= 8 && regno < 24)
4112 return 0;
4113 }
4114 return !epilogue_renumber (&pat, 1);
4115 }
4116
4117 if (GET_CODE (pat) != SET)
4118 return 0;
4119
4120 if (GET_CODE (SET_DEST (pat)) != REG)
4121 return 0;
4122
4123 regno = REGNO (SET_DEST (pat));
4124
4125 /* Otherwise, only operations which can be done in tandem with
4126 a `restore' or `return' insn can go into the delay slot. */
4127 if (regno >= 8 && regno < 24)
4128 return 0;
4129
4130 /* If this instruction sets up floating point register and we have a return
4131 instruction, it can probably go in. But restore will not work
4132 with FP_REGS. */
4133 if (! SPARC_INT_REG_P (regno))
4134 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4135
4136 return eligible_for_restore_insn (trial, true);
4137 }
4138
4139 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4140
4141 int
4142 eligible_for_sibcall_delay (rtx_insn *trial)
4143 {
4144 rtx pat;
4145
4146 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4147 return 0;
4148
4149 if (!NONJUMP_INSN_P (trial))
4150 return 0;
4151
4152 pat = PATTERN (trial);
4153
4154 if (sparc_leaf_function_p || TARGET_FLAT)
4155 {
4156 /* If the tail call is done using the call instruction,
4157 we have to restore %o7 in the delay slot. */
4158 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4159 return 0;
4160
4161 /* %g1 is used to build the function address */
4162 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4163 return 0;
4164
4165 return 1;
4166 }
4167
4168 if (GET_CODE (pat) != SET)
4169 return 0;
4170
4171 /* Otherwise, only operations which can be done in tandem with
4172 a `restore' insn can go into the delay slot. */
4173 if (GET_CODE (SET_DEST (pat)) != REG
4174 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4175 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4176 return 0;
4177
4178 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4179 in most cases. */
4180 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4181 return 0;
4182
4183 return eligible_for_restore_insn (trial, false);
4184 }
4185 \f
4186 /* Determine if it's legal to put X into the constant pool. This
4187 is not possible if X contains the address of a symbol that is
4188 not constant (TLS) or not known at final link time (PIC). */
4189
4190 static bool
4191 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4192 {
4193 switch (GET_CODE (x))
4194 {
4195 case CONST_INT:
4196 case CONST_WIDE_INT:
4197 case CONST_DOUBLE:
4198 case CONST_VECTOR:
4199 /* Accept all non-symbolic constants. */
4200 return false;
4201
4202 case LABEL_REF:
4203 /* Labels are OK iff we are non-PIC. */
4204 return flag_pic != 0;
4205
4206 case SYMBOL_REF:
4207 /* 'Naked' TLS symbol references are never OK,
4208 non-TLS symbols are OK iff we are non-PIC. */
4209 if (SYMBOL_REF_TLS_MODEL (x))
4210 return true;
4211 else
4212 return flag_pic != 0;
4213
4214 case CONST:
4215 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4216 case PLUS:
4217 case MINUS:
4218 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4219 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4220 case UNSPEC:
4221 return true;
4222 default:
4223 gcc_unreachable ();
4224 }
4225 }
4226 \f
4227 /* Global Offset Table support. */
4228 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4229 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4230
4231 /* Return the SYMBOL_REF for the Global Offset Table. */
4232
4233 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4234
4235 static rtx
4236 sparc_got (void)
4237 {
4238 if (!sparc_got_symbol)
4239 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4240
4241 return sparc_got_symbol;
4242 }
4243
4244 /* Ensure that we are not using patterns that are not OK with PIC. */
4245
4246 int
4247 check_pic (int i)
4248 {
4249 rtx op;
4250
4251 switch (flag_pic)
4252 {
4253 case 1:
4254 op = recog_data.operand[i];
4255 gcc_assert (GET_CODE (op) != SYMBOL_REF
4256 && (GET_CODE (op) != CONST
4257 || (GET_CODE (XEXP (op, 0)) == MINUS
4258 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4259 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4260 /* fallthrough */
4261 case 2:
4262 default:
4263 return 1;
4264 }
4265 }
4266
4267 /* Return true if X is an address which needs a temporary register when
4268 reloaded while generating PIC code. */
4269
4270 int
4271 pic_address_needs_scratch (rtx x)
4272 {
4273 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4274 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4275 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4276 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4277 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4278 return 1;
4279
4280 return 0;
4281 }
4282
4283 /* Determine if a given RTX is a valid constant. We already know this
4284 satisfies CONSTANT_P. */
4285
4286 static bool
4287 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4288 {
4289 switch (GET_CODE (x))
4290 {
4291 case CONST:
4292 case SYMBOL_REF:
4293 if (sparc_tls_referenced_p (x))
4294 return false;
4295 break;
4296
4297 case CONST_DOUBLE:
4298 /* Floating point constants are generally not ok.
4299 The only exception is 0.0 and all-ones in VIS. */
4300 if (TARGET_VIS
4301 && SCALAR_FLOAT_MODE_P (mode)
4302 && (const_zero_operand (x, mode)
4303 || const_all_ones_operand (x, mode)))
4304 return true;
4305
4306 return false;
4307
4308 case CONST_VECTOR:
4309 /* Vector constants are generally not ok.
4310 The only exception is 0 or -1 in VIS. */
4311 if (TARGET_VIS
4312 && (const_zero_operand (x, mode)
4313 || const_all_ones_operand (x, mode)))
4314 return true;
4315
4316 return false;
4317
4318 default:
4319 break;
4320 }
4321
4322 return true;
4323 }
4324
4325 /* Determine if a given RTX is a valid constant address. */
4326
4327 bool
4328 constant_address_p (rtx x)
4329 {
4330 switch (GET_CODE (x))
4331 {
4332 case LABEL_REF:
4333 case CONST_INT:
4334 case HIGH:
4335 return true;
4336
4337 case CONST:
4338 if (flag_pic && pic_address_needs_scratch (x))
4339 return false;
4340 return sparc_legitimate_constant_p (Pmode, x);
4341
4342 case SYMBOL_REF:
4343 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4344
4345 default:
4346 return false;
4347 }
4348 }
4349
4350 /* Nonzero if the constant value X is a legitimate general operand
4351 when generating PIC code. It is given that flag_pic is on and
4352 that X satisfies CONSTANT_P. */
4353
4354 bool
4355 legitimate_pic_operand_p (rtx x)
4356 {
4357 if (pic_address_needs_scratch (x))
4358 return false;
4359 if (sparc_tls_referenced_p (x))
4360 return false;
4361 return true;
4362 }
4363
4364 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4365 (CONST_INT_P (X) \
4366 && INTVAL (X) >= -0x1000 \
4367 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4368
4369 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4370 (CONST_INT_P (X) \
4371 && INTVAL (X) >= -0x1000 \
4372 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4373
4374 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4375
4376 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4377 ordinarily. This changes a bit when generating PIC. */
4378
4379 static bool
4380 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4381 {
4382 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4383
4384 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4385 rs1 = addr;
4386 else if (GET_CODE (addr) == PLUS)
4387 {
4388 rs1 = XEXP (addr, 0);
4389 rs2 = XEXP (addr, 1);
4390
4391 /* Canonicalize. REG comes first, if there are no regs,
4392 LO_SUM comes first. */
4393 if (!REG_P (rs1)
4394 && GET_CODE (rs1) != SUBREG
4395 && (REG_P (rs2)
4396 || GET_CODE (rs2) == SUBREG
4397 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4398 {
4399 rs1 = XEXP (addr, 1);
4400 rs2 = XEXP (addr, 0);
4401 }
4402
4403 if ((flag_pic == 1
4404 && rs1 == pic_offset_table_rtx
4405 && !REG_P (rs2)
4406 && GET_CODE (rs2) != SUBREG
4407 && GET_CODE (rs2) != LO_SUM
4408 && GET_CODE (rs2) != MEM
4409 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4410 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4411 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4412 || ((REG_P (rs1)
4413 || GET_CODE (rs1) == SUBREG)
4414 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4415 {
4416 imm1 = rs2;
4417 rs2 = NULL;
4418 }
4419 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4420 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4421 {
4422 /* We prohibit REG + REG for TFmode when there are no quad move insns
4423 and we consequently need to split. We do this because REG+REG
4424 is not an offsettable address. If we get the situation in reload
4425 where source and destination of a movtf pattern are both MEMs with
4426 REG+REG address, then only one of them gets converted to an
4427 offsettable address. */
4428 if (mode == TFmode
4429 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4430 return 0;
4431
4432 /* Likewise for TImode, but in all cases. */
4433 if (mode == TImode)
4434 return 0;
4435
4436 /* We prohibit REG + REG on ARCH32 if not optimizing for
4437 DFmode/DImode because then mem_min_alignment is likely to be zero
4438 after reload and the forced split would lack a matching splitter
4439 pattern. */
4440 if (TARGET_ARCH32 && !optimize
4441 && (mode == DFmode || mode == DImode))
4442 return 0;
4443 }
4444 else if (USE_AS_OFFSETABLE_LO10
4445 && GET_CODE (rs1) == LO_SUM
4446 && TARGET_ARCH64
4447 && ! TARGET_CM_MEDMID
4448 && RTX_OK_FOR_OLO10_P (rs2, mode))
4449 {
4450 rs2 = NULL;
4451 imm1 = XEXP (rs1, 1);
4452 rs1 = XEXP (rs1, 0);
4453 if (!CONSTANT_P (imm1)
4454 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4455 return 0;
4456 }
4457 }
4458 else if (GET_CODE (addr) == LO_SUM)
4459 {
4460 rs1 = XEXP (addr, 0);
4461 imm1 = XEXP (addr, 1);
4462
4463 if (!CONSTANT_P (imm1)
4464 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4465 return 0;
4466
4467 /* We can't allow TFmode in 32-bit mode, because an offset greater
4468 than the alignment (8) may cause the LO_SUM to overflow. */
4469 if (mode == TFmode && TARGET_ARCH32)
4470 return 0;
4471
4472 /* During reload, accept the HIGH+LO_SUM construct generated by
4473 sparc_legitimize_reload_address. */
4474 if (reload_in_progress
4475 && GET_CODE (rs1) == HIGH
4476 && XEXP (rs1, 0) == imm1)
4477 return 1;
4478 }
4479 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4480 return 1;
4481 else
4482 return 0;
4483
4484 if (GET_CODE (rs1) == SUBREG)
4485 rs1 = SUBREG_REG (rs1);
4486 if (!REG_P (rs1))
4487 return 0;
4488
4489 if (rs2)
4490 {
4491 if (GET_CODE (rs2) == SUBREG)
4492 rs2 = SUBREG_REG (rs2);
4493 if (!REG_P (rs2))
4494 return 0;
4495 }
4496
4497 if (strict)
4498 {
4499 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4500 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4501 return 0;
4502 }
4503 else
4504 {
4505 if ((! SPARC_INT_REG_P (REGNO (rs1))
4506 && REGNO (rs1) != FRAME_POINTER_REGNUM
4507 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4508 || (rs2
4509 && (! SPARC_INT_REG_P (REGNO (rs2))
4510 && REGNO (rs2) != FRAME_POINTER_REGNUM
4511 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4512 return 0;
4513 }
4514 return 1;
4515 }
4516
4517 /* Return the SYMBOL_REF for the tls_get_addr function. */
4518
4519 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4520
4521 static rtx
4522 sparc_tls_get_addr (void)
4523 {
4524 if (!sparc_tls_symbol)
4525 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4526
4527 return sparc_tls_symbol;
4528 }
4529
4530 /* Return the Global Offset Table to be used in TLS mode. */
4531
4532 static rtx
4533 sparc_tls_got (void)
4534 {
4535 /* In PIC mode, this is just the PIC offset table. */
4536 if (flag_pic)
4537 {
4538 crtl->uses_pic_offset_table = 1;
4539 return pic_offset_table_rtx;
4540 }
4541
4542 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4543 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4544 if (TARGET_SUN_TLS && TARGET_ARCH32)
4545 {
4546 load_got_register ();
4547 return global_offset_table_rtx;
4548 }
4549
4550 /* In all other cases, we load a new pseudo with the GOT symbol. */
4551 return copy_to_reg (sparc_got ());
4552 }
4553
4554 /* Return true if X contains a thread-local symbol. */
4555
4556 static bool
4557 sparc_tls_referenced_p (rtx x)
4558 {
4559 if (!TARGET_HAVE_TLS)
4560 return false;
4561
4562 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4563 x = XEXP (XEXP (x, 0), 0);
4564
4565 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4566 return true;
4567
4568 /* That's all we handle in sparc_legitimize_tls_address for now. */
4569 return false;
4570 }
4571
4572 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4573 this (thread-local) address. */
4574
4575 static rtx
4576 sparc_legitimize_tls_address (rtx addr)
4577 {
4578 rtx temp1, temp2, temp3, ret, o0, got;
4579 rtx_insn *insn;
4580
4581 gcc_assert (can_create_pseudo_p ());
4582
4583 if (GET_CODE (addr) == SYMBOL_REF)
4584 switch (SYMBOL_REF_TLS_MODEL (addr))
4585 {
4586 case TLS_MODEL_GLOBAL_DYNAMIC:
4587 start_sequence ();
4588 temp1 = gen_reg_rtx (SImode);
4589 temp2 = gen_reg_rtx (SImode);
4590 ret = gen_reg_rtx (Pmode);
4591 o0 = gen_rtx_REG (Pmode, 8);
4592 got = sparc_tls_got ();
4593 emit_insn (gen_tgd_hi22 (temp1, addr));
4594 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4595 if (TARGET_ARCH32)
4596 {
4597 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4598 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4599 addr, const1_rtx));
4600 }
4601 else
4602 {
4603 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4604 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4605 addr, const1_rtx));
4606 }
4607 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4608 insn = get_insns ();
4609 end_sequence ();
4610 emit_libcall_block (insn, ret, o0, addr);
4611 break;
4612
4613 case TLS_MODEL_LOCAL_DYNAMIC:
4614 start_sequence ();
4615 temp1 = gen_reg_rtx (SImode);
4616 temp2 = gen_reg_rtx (SImode);
4617 temp3 = gen_reg_rtx (Pmode);
4618 ret = gen_reg_rtx (Pmode);
4619 o0 = gen_rtx_REG (Pmode, 8);
4620 got = sparc_tls_got ();
4621 emit_insn (gen_tldm_hi22 (temp1));
4622 emit_insn (gen_tldm_lo10 (temp2, temp1));
4623 if (TARGET_ARCH32)
4624 {
4625 emit_insn (gen_tldm_add32 (o0, got, temp2));
4626 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4627 const1_rtx));
4628 }
4629 else
4630 {
4631 emit_insn (gen_tldm_add64 (o0, got, temp2));
4632 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4633 const1_rtx));
4634 }
4635 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4636 insn = get_insns ();
4637 end_sequence ();
4638 emit_libcall_block (insn, temp3, o0,
4639 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4640 UNSPEC_TLSLD_BASE));
4641 temp1 = gen_reg_rtx (SImode);
4642 temp2 = gen_reg_rtx (SImode);
4643 emit_insn (gen_tldo_hix22 (temp1, addr));
4644 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4645 if (TARGET_ARCH32)
4646 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4647 else
4648 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4649 break;
4650
4651 case TLS_MODEL_INITIAL_EXEC:
4652 temp1 = gen_reg_rtx (SImode);
4653 temp2 = gen_reg_rtx (SImode);
4654 temp3 = gen_reg_rtx (Pmode);
4655 got = sparc_tls_got ();
4656 emit_insn (gen_tie_hi22 (temp1, addr));
4657 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4658 if (TARGET_ARCH32)
4659 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4660 else
4661 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4662 if (TARGET_SUN_TLS)
4663 {
4664 ret = gen_reg_rtx (Pmode);
4665 if (TARGET_ARCH32)
4666 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4667 temp3, addr));
4668 else
4669 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4670 temp3, addr));
4671 }
4672 else
4673 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4674 break;
4675
4676 case TLS_MODEL_LOCAL_EXEC:
4677 temp1 = gen_reg_rtx (Pmode);
4678 temp2 = gen_reg_rtx (Pmode);
4679 if (TARGET_ARCH32)
4680 {
4681 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4682 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4683 }
4684 else
4685 {
4686 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4687 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4688 }
4689 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4690 break;
4691
4692 default:
4693 gcc_unreachable ();
4694 }
4695
4696 else if (GET_CODE (addr) == CONST)
4697 {
4698 rtx base, offset;
4699
4700 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4701
4702 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4703 offset = XEXP (XEXP (addr, 0), 1);
4704
4705 base = force_operand (base, NULL_RTX);
4706 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4707 offset = force_reg (Pmode, offset);
4708 ret = gen_rtx_PLUS (Pmode, base, offset);
4709 }
4710
4711 else
4712 gcc_unreachable (); /* for now ... */
4713
4714 return ret;
4715 }
4716
4717 /* Legitimize PIC addresses. If the address is already position-independent,
4718 we return ORIG. Newly generated position-independent addresses go into a
4719 reg. This is REG if nonzero, otherwise we allocate register(s) as
4720 necessary. */
4721
4722 static rtx
4723 sparc_legitimize_pic_address (rtx orig, rtx reg)
4724 {
4725 bool gotdata_op = false;
4726
4727 if (GET_CODE (orig) == SYMBOL_REF
4728 /* See the comment in sparc_expand_move. */
4729 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4730 {
4731 rtx pic_ref, address;
4732 rtx_insn *insn;
4733
4734 if (reg == 0)
4735 {
4736 gcc_assert (can_create_pseudo_p ());
4737 reg = gen_reg_rtx (Pmode);
4738 }
4739
4740 if (flag_pic == 2)
4741 {
4742 /* If not during reload, allocate another temp reg here for loading
4743 in the address, so that these instructions can be optimized
4744 properly. */
4745 rtx temp_reg = (! can_create_pseudo_p ()
4746 ? reg : gen_reg_rtx (Pmode));
4747
4748 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4749 won't get confused into thinking that these two instructions
4750 are loading in the true address of the symbol. If in the
4751 future a PIC rtx exists, that should be used instead. */
4752 if (TARGET_ARCH64)
4753 {
4754 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4755 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4756 }
4757 else
4758 {
4759 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4760 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4761 }
4762 address = temp_reg;
4763 gotdata_op = true;
4764 }
4765 else
4766 address = orig;
4767
4768 crtl->uses_pic_offset_table = 1;
4769 if (gotdata_op)
4770 {
4771 if (TARGET_ARCH64)
4772 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4773 pic_offset_table_rtx,
4774 address, orig));
4775 else
4776 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4777 pic_offset_table_rtx,
4778 address, orig));
4779 }
4780 else
4781 {
4782 pic_ref
4783 = gen_const_mem (Pmode,
4784 gen_rtx_PLUS (Pmode,
4785 pic_offset_table_rtx, address));
4786 insn = emit_move_insn (reg, pic_ref);
4787 }
4788
4789 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4790 by loop. */
4791 set_unique_reg_note (insn, REG_EQUAL, orig);
4792 return reg;
4793 }
4794 else if (GET_CODE (orig) == CONST)
4795 {
4796 rtx base, offset;
4797
4798 if (GET_CODE (XEXP (orig, 0)) == PLUS
4799 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4800 return orig;
4801
4802 if (reg == 0)
4803 {
4804 gcc_assert (can_create_pseudo_p ());
4805 reg = gen_reg_rtx (Pmode);
4806 }
4807
4808 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4809 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4810 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4811 base == reg ? NULL_RTX : reg);
4812
4813 if (GET_CODE (offset) == CONST_INT)
4814 {
4815 if (SMALL_INT (offset))
4816 return plus_constant (Pmode, base, INTVAL (offset));
4817 else if (can_create_pseudo_p ())
4818 offset = force_reg (Pmode, offset);
4819 else
4820 /* If we reach here, then something is seriously wrong. */
4821 gcc_unreachable ();
4822 }
4823 return gen_rtx_PLUS (Pmode, base, offset);
4824 }
4825 else if (GET_CODE (orig) == LABEL_REF)
4826 /* ??? We ought to be checking that the register is live instead, in case
4827 it is eliminated. */
4828 crtl->uses_pic_offset_table = 1;
4829
4830 return orig;
4831 }
4832
4833 /* Try machine-dependent ways of modifying an illegitimate address X
4834 to be legitimate. If we find one, return the new, valid address.
4835
4836 OLDX is the address as it was before break_out_memory_refs was called.
4837 In some cases it is useful to look at this to decide what needs to be done.
4838
4839 MODE is the mode of the operand pointed to by X.
4840
4841 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4842
4843 static rtx
4844 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4845 machine_mode mode)
4846 {
4847 rtx orig_x = x;
4848
4849 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4850 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4851 force_operand (XEXP (x, 0), NULL_RTX));
4852 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4853 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4854 force_operand (XEXP (x, 1), NULL_RTX));
4855 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4856 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4857 XEXP (x, 1));
4858 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4859 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4860 force_operand (XEXP (x, 1), NULL_RTX));
4861
4862 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4863 return x;
4864
4865 if (sparc_tls_referenced_p (x))
4866 x = sparc_legitimize_tls_address (x);
4867 else if (flag_pic)
4868 x = sparc_legitimize_pic_address (x, NULL_RTX);
4869 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4870 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4871 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4872 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4873 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4874 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4875 else if (GET_CODE (x) == SYMBOL_REF
4876 || GET_CODE (x) == CONST
4877 || GET_CODE (x) == LABEL_REF)
4878 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4879
4880 return x;
4881 }
4882
4883 /* Delegitimize an address that was legitimized by the above function. */
4884
4885 static rtx
4886 sparc_delegitimize_address (rtx x)
4887 {
4888 x = delegitimize_mem_from_attrs (x);
4889
4890 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4891 switch (XINT (XEXP (x, 1), 1))
4892 {
4893 case UNSPEC_MOVE_PIC:
4894 case UNSPEC_TLSLE:
4895 x = XVECEXP (XEXP (x, 1), 0, 0);
4896 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4897 break;
4898 default:
4899 break;
4900 }
4901
4902 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4903 if (GET_CODE (x) == MINUS
4904 && REG_P (XEXP (x, 0))
4905 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4906 && GET_CODE (XEXP (x, 1)) == LO_SUM
4907 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4908 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4909 {
4910 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4911 gcc_assert (GET_CODE (x) == LABEL_REF);
4912 }
4913
4914 return x;
4915 }
4916
4917 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4918 replace the input X, or the original X if no replacement is called for.
4919 The output parameter *WIN is 1 if the calling macro should goto WIN,
4920 0 if it should not.
4921
4922 For SPARC, we wish to handle addresses by splitting them into
4923 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4924 This cuts the number of extra insns by one.
4925
4926 Do nothing when generating PIC code and the address is a symbolic
4927 operand or requires a scratch register. */
4928
4929 rtx
4930 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4931 int opnum, int type,
4932 int ind_levels ATTRIBUTE_UNUSED, int *win)
4933 {
4934 /* Decompose SImode constants into HIGH+LO_SUM. */
4935 if (CONSTANT_P (x)
4936 && (mode != TFmode || TARGET_ARCH64)
4937 && GET_MODE (x) == SImode
4938 && GET_CODE (x) != LO_SUM
4939 && GET_CODE (x) != HIGH
4940 && sparc_cmodel <= CM_MEDLOW
4941 && !(flag_pic
4942 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4943 {
4944 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4945 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4946 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4947 opnum, (enum reload_type)type);
4948 *win = 1;
4949 return x;
4950 }
4951
4952 /* We have to recognize what we have already generated above. */
4953 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4954 {
4955 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4956 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4957 opnum, (enum reload_type)type);
4958 *win = 1;
4959 return x;
4960 }
4961
4962 *win = 0;
4963 return x;
4964 }
4965
4966 /* Return true if ADDR (a legitimate address expression)
4967 has an effect that depends on the machine mode it is used for.
4968
4969 In PIC mode,
4970
4971 (mem:HI [%l7+a])
4972
4973 is not equivalent to
4974
4975 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4976
4977 because [%l7+a+1] is interpreted as the address of (a+1). */
4978
4979
4980 static bool
4981 sparc_mode_dependent_address_p (const_rtx addr,
4982 addr_space_t as ATTRIBUTE_UNUSED)
4983 {
4984 if (flag_pic && GET_CODE (addr) == PLUS)
4985 {
4986 rtx op0 = XEXP (addr, 0);
4987 rtx op1 = XEXP (addr, 1);
4988 if (op0 == pic_offset_table_rtx
4989 && symbolic_operand (op1, VOIDmode))
4990 return true;
4991 }
4992
4993 return false;
4994 }
4995
4996 #ifdef HAVE_GAS_HIDDEN
4997 # define USE_HIDDEN_LINKONCE 1
4998 #else
4999 # define USE_HIDDEN_LINKONCE 0
5000 #endif
5001
5002 static void
5003 get_pc_thunk_name (char name[32], unsigned int regno)
5004 {
5005 const char *reg_name = reg_names[regno];
5006
5007 /* Skip the leading '%' as that cannot be used in a
5008 symbol name. */
5009 reg_name += 1;
5010
5011 if (USE_HIDDEN_LINKONCE)
5012 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
5013 else
5014 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
5015 }
5016
5017 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
5018
5019 static rtx
5020 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
5021 {
5022 int orig_flag_pic = flag_pic;
5023 rtx insn;
5024
5025 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
5026 flag_pic = 0;
5027 if (TARGET_ARCH64)
5028 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
5029 else
5030 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
5031 flag_pic = orig_flag_pic;
5032
5033 return insn;
5034 }
5035
5036 /* Emit code to load the GOT register. */
5037
5038 void
5039 load_got_register (void)
5040 {
5041 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
5042 if (!global_offset_table_rtx)
5043 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
5044
5045 if (TARGET_VXWORKS_RTP)
5046 emit_insn (gen_vxworks_load_got ());
5047 else
5048 {
5049 /* The GOT symbol is subject to a PC-relative relocation so we need a
5050 helper function to add the PC value and thus get the final value. */
5051 if (!got_helper_rtx)
5052 {
5053 char name[32];
5054 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
5055 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5056 }
5057
5058 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
5059 got_helper_rtx,
5060 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
5061 }
5062
5063 /* Need to emit this whether or not we obey regdecls,
5064 since setjmp/longjmp can cause life info to screw up.
5065 ??? In the case where we don't obey regdecls, this is not sufficient
5066 since we may not fall out the bottom. */
5067 emit_use (global_offset_table_rtx);
5068 }
5069
5070 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5071 address of the call target. */
5072
5073 void
5074 sparc_emit_call_insn (rtx pat, rtx addr)
5075 {
5076 rtx_insn *insn;
5077
5078 insn = emit_call_insn (pat);
5079
5080 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5081 if (TARGET_VXWORKS_RTP
5082 && flag_pic
5083 && GET_CODE (addr) == SYMBOL_REF
5084 && (SYMBOL_REF_DECL (addr)
5085 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5086 : !SYMBOL_REF_LOCAL_P (addr)))
5087 {
5088 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5089 crtl->uses_pic_offset_table = 1;
5090 }
5091 }
5092 \f
5093 /* Return 1 if RTX is a MEM which is known to be aligned to at
5094 least a DESIRED byte boundary. */
5095
5096 int
5097 mem_min_alignment (rtx mem, int desired)
5098 {
5099 rtx addr, base, offset;
5100
5101 /* If it's not a MEM we can't accept it. */
5102 if (GET_CODE (mem) != MEM)
5103 return 0;
5104
5105 /* Obviously... */
5106 if (!TARGET_UNALIGNED_DOUBLES
5107 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5108 return 1;
5109
5110 /* ??? The rest of the function predates MEM_ALIGN so
5111 there is probably a bit of redundancy. */
5112 addr = XEXP (mem, 0);
5113 base = offset = NULL_RTX;
5114 if (GET_CODE (addr) == PLUS)
5115 {
5116 if (GET_CODE (XEXP (addr, 0)) == REG)
5117 {
5118 base = XEXP (addr, 0);
5119
5120 /* What we are saying here is that if the base
5121 REG is aligned properly, the compiler will make
5122 sure any REG based index upon it will be so
5123 as well. */
5124 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5125 offset = XEXP (addr, 1);
5126 else
5127 offset = const0_rtx;
5128 }
5129 }
5130 else if (GET_CODE (addr) == REG)
5131 {
5132 base = addr;
5133 offset = const0_rtx;
5134 }
5135
5136 if (base != NULL_RTX)
5137 {
5138 int regno = REGNO (base);
5139
5140 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5141 {
5142 /* Check if the compiler has recorded some information
5143 about the alignment of the base REG. If reload has
5144 completed, we already matched with proper alignments.
5145 If not running global_alloc, reload might give us
5146 unaligned pointer to local stack though. */
5147 if (((cfun != 0
5148 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5149 || (optimize && reload_completed))
5150 && (INTVAL (offset) & (desired - 1)) == 0)
5151 return 1;
5152 }
5153 else
5154 {
5155 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5156 return 1;
5157 }
5158 }
5159 else if (! TARGET_UNALIGNED_DOUBLES
5160 || CONSTANT_P (addr)
5161 || GET_CODE (addr) == LO_SUM)
5162 {
5163 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5164 is true, in which case we can only assume that an access is aligned if
5165 it is to a constant address, or the address involves a LO_SUM. */
5166 return 1;
5167 }
5168
5169 /* An obviously unaligned address. */
5170 return 0;
5171 }
5172
5173 \f
5174 /* Vectors to keep interesting information about registers where it can easily
5175 be got. We used to use the actual mode value as the bit number, but there
5176 are more than 32 modes now. Instead we use two tables: one indexed by
5177 hard register number, and one indexed by mode. */
5178
5179 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5180 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5181 mapped into one sparc_mode_class mode. */
5182
5183 enum sparc_mode_class {
5184 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5185 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5186 CC_MODE, CCFP_MODE
5187 };
5188
5189 /* Modes for single-word and smaller quantities. */
5190 #define S_MODES \
5191 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5192
5193 /* Modes for double-word and smaller quantities. */
5194 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5195
5196 /* Modes for quad-word and smaller quantities. */
5197 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5198
5199 /* Modes for 8-word and smaller quantities. */
5200 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5201
5202 /* Modes for single-float quantities. */
5203 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5204
5205 /* Modes for double-float and smaller quantities. */
5206 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5207
5208 /* Modes for quad-float and smaller quantities. */
5209 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5210
5211 /* Modes for quad-float pairs and smaller quantities. */
5212 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5213
5214 /* Modes for double-float only quantities. */
5215 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5216
5217 /* Modes for quad-float and double-float only quantities. */
5218 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5219
5220 /* Modes for quad-float pairs and double-float only quantities. */
5221 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5222
5223 /* Modes for condition codes. */
5224 #define CC_MODES (1 << (int) CC_MODE)
5225 #define CCFP_MODES (1 << (int) CCFP_MODE)
5226
5227 /* Value is 1 if register/mode pair is acceptable on sparc.
5228
5229 The funny mixture of D and T modes is because integer operations
5230 do not specially operate on tetra quantities, so non-quad-aligned
5231 registers can hold quadword quantities (except %o4 and %i4 because
5232 they cross fixed registers).
5233
5234 ??? Note that, despite the settings, non-double-aligned parameter
5235 registers can hold double-word quantities in 32-bit mode. */
5236
5237 /* This points to either the 32-bit or the 64-bit version. */
5238 static const int *hard_regno_mode_classes;
5239
5240 static const int hard_32bit_mode_classes[] = {
5241 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5242 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5243 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5244 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5245
5246 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5247 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5248 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5249 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5250
5251 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5252 and none can hold SFmode/SImode values. */
5253 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5254 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5255 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5256 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5257
5258 /* %fcc[0123] */
5259 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5260
5261 /* %icc, %sfp, %gsr */
5262 CC_MODES, 0, D_MODES
5263 };
5264
5265 static const int hard_64bit_mode_classes[] = {
5266 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5267 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5268 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5269 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5270
5271 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5272 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5273 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5274 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5275
5276 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5277 and none can hold SFmode/SImode values. */
5278 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5279 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5280 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5281 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5282
5283 /* %fcc[0123] */
5284 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5285
5286 /* %icc, %sfp, %gsr */
5287 CC_MODES, 0, D_MODES
5288 };
5289
5290 static int sparc_mode_class [NUM_MACHINE_MODES];
5291
5292 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5293
5294 static void
5295 sparc_init_modes (void)
5296 {
5297 int i;
5298
5299 for (i = 0; i < NUM_MACHINE_MODES; i++)
5300 {
5301 machine_mode m = (machine_mode) i;
5302 unsigned int size = GET_MODE_SIZE (m);
5303
5304 switch (GET_MODE_CLASS (m))
5305 {
5306 case MODE_INT:
5307 case MODE_PARTIAL_INT:
5308 case MODE_COMPLEX_INT:
5309 if (size < 4)
5310 sparc_mode_class[i] = 1 << (int) H_MODE;
5311 else if (size == 4)
5312 sparc_mode_class[i] = 1 << (int) S_MODE;
5313 else if (size == 8)
5314 sparc_mode_class[i] = 1 << (int) D_MODE;
5315 else if (size == 16)
5316 sparc_mode_class[i] = 1 << (int) T_MODE;
5317 else if (size == 32)
5318 sparc_mode_class[i] = 1 << (int) O_MODE;
5319 else
5320 sparc_mode_class[i] = 0;
5321 break;
5322 case MODE_VECTOR_INT:
5323 if (size == 4)
5324 sparc_mode_class[i] = 1 << (int) SF_MODE;
5325 else if (size == 8)
5326 sparc_mode_class[i] = 1 << (int) DF_MODE;
5327 else
5328 sparc_mode_class[i] = 0;
5329 break;
5330 case MODE_FLOAT:
5331 case MODE_COMPLEX_FLOAT:
5332 if (size == 4)
5333 sparc_mode_class[i] = 1 << (int) SF_MODE;
5334 else if (size == 8)
5335 sparc_mode_class[i] = 1 << (int) DF_MODE;
5336 else if (size == 16)
5337 sparc_mode_class[i] = 1 << (int) TF_MODE;
5338 else if (size == 32)
5339 sparc_mode_class[i] = 1 << (int) OF_MODE;
5340 else
5341 sparc_mode_class[i] = 0;
5342 break;
5343 case MODE_CC:
5344 if (m == CCFPmode || m == CCFPEmode)
5345 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5346 else
5347 sparc_mode_class[i] = 1 << (int) CC_MODE;
5348 break;
5349 default:
5350 sparc_mode_class[i] = 0;
5351 break;
5352 }
5353 }
5354
5355 if (TARGET_ARCH64)
5356 hard_regno_mode_classes = hard_64bit_mode_classes;
5357 else
5358 hard_regno_mode_classes = hard_32bit_mode_classes;
5359
5360 /* Initialize the array used by REGNO_REG_CLASS. */
5361 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5362 {
5363 if (i < 16 && TARGET_V8PLUS)
5364 sparc_regno_reg_class[i] = I64_REGS;
5365 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5366 sparc_regno_reg_class[i] = GENERAL_REGS;
5367 else if (i < 64)
5368 sparc_regno_reg_class[i] = FP_REGS;
5369 else if (i < 96)
5370 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5371 else if (i < 100)
5372 sparc_regno_reg_class[i] = FPCC_REGS;
5373 else
5374 sparc_regno_reg_class[i] = NO_REGS;
5375 }
5376 }
5377 \f
5378 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5379
5380 static inline bool
5381 save_global_or_fp_reg_p (unsigned int regno,
5382 int leaf_function ATTRIBUTE_UNUSED)
5383 {
5384 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5385 }
5386
5387 /* Return whether the return address register (%i7) is needed. */
5388
5389 static inline bool
5390 return_addr_reg_needed_p (int leaf_function)
5391 {
5392 /* If it is live, for example because of __builtin_return_address (0). */
5393 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5394 return true;
5395
5396 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5397 if (!leaf_function
5398 /* Loading the GOT register clobbers %o7. */
5399 || crtl->uses_pic_offset_table
5400 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5401 return true;
5402
5403 return false;
5404 }
5405
5406 /* Return whether REGNO, a local or in register, must be saved/restored. */
5407
5408 static bool
5409 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5410 {
5411 /* General case: call-saved registers live at some point. */
5412 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5413 return true;
5414
5415 /* Frame pointer register (%fp) if needed. */
5416 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5417 return true;
5418
5419 /* Return address register (%i7) if needed. */
5420 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5421 return true;
5422
5423 /* GOT register (%l7) if needed. */
5424 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5425 return true;
5426
5427 /* If the function accesses prior frames, the frame pointer and the return
5428 address of the previous frame must be saved on the stack. */
5429 if (crtl->accesses_prior_frames
5430 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5431 return true;
5432
5433 return false;
5434 }
5435
5436 /* Compute the frame size required by the function. This function is called
5437 during the reload pass and also by sparc_expand_prologue. */
5438
5439 HOST_WIDE_INT
5440 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5441 {
5442 HOST_WIDE_INT frame_size, apparent_frame_size;
5443 int args_size, n_global_fp_regs = 0;
5444 bool save_local_in_regs_p = false;
5445 unsigned int i;
5446
5447 /* If the function allocates dynamic stack space, the dynamic offset is
5448 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5449 if (leaf_function && !cfun->calls_alloca)
5450 args_size = 0;
5451 else
5452 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5453
5454 /* Calculate space needed for global registers. */
5455 if (TARGET_ARCH64)
5456 {
5457 for (i = 0; i < 8; i++)
5458 if (save_global_or_fp_reg_p (i, 0))
5459 n_global_fp_regs += 2;
5460 }
5461 else
5462 {
5463 for (i = 0; i < 8; i += 2)
5464 if (save_global_or_fp_reg_p (i, 0)
5465 || save_global_or_fp_reg_p (i + 1, 0))
5466 n_global_fp_regs += 2;
5467 }
5468
5469 /* In the flat window model, find out which local and in registers need to
5470 be saved. We don't reserve space in the current frame for them as they
5471 will be spilled into the register window save area of the caller's frame.
5472 However, as soon as we use this register window save area, we must create
5473 that of the current frame to make it the live one. */
5474 if (TARGET_FLAT)
5475 for (i = 16; i < 32; i++)
5476 if (save_local_or_in_reg_p (i, leaf_function))
5477 {
5478 save_local_in_regs_p = true;
5479 break;
5480 }
5481
5482 /* Calculate space needed for FP registers. */
5483 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5484 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5485 n_global_fp_regs += 2;
5486
5487 if (size == 0
5488 && n_global_fp_regs == 0
5489 && args_size == 0
5490 && !save_local_in_regs_p)
5491 frame_size = apparent_frame_size = 0;
5492 else
5493 {
5494 /* Start from the apparent frame size. */
5495 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5496
5497 /* We need to add the size of the outgoing argument area. */
5498 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5499
5500 /* And that of the register window save area. */
5501 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5502
5503 /* Finally, bump to the appropriate alignment. */
5504 frame_size = SPARC_STACK_ALIGN (frame_size);
5505 }
5506
5507 /* Set up values for use in prologue and epilogue. */
5508 sparc_frame_size = frame_size;
5509 sparc_apparent_frame_size = apparent_frame_size;
5510 sparc_n_global_fp_regs = n_global_fp_regs;
5511 sparc_save_local_in_regs_p = save_local_in_regs_p;
5512
5513 return frame_size;
5514 }
5515
5516 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5517
5518 int
5519 sparc_initial_elimination_offset (int to)
5520 {
5521 int offset;
5522
5523 if (to == STACK_POINTER_REGNUM)
5524 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5525 else
5526 offset = 0;
5527
5528 offset += SPARC_STACK_BIAS;
5529 return offset;
5530 }
5531
5532 /* Output any necessary .register pseudo-ops. */
5533
5534 void
5535 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5536 {
5537 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5538 int i;
5539
5540 if (TARGET_ARCH32)
5541 return;
5542
5543 /* Check if %g[2367] were used without
5544 .register being printed for them already. */
5545 for (i = 2; i < 8; i++)
5546 {
5547 if (df_regs_ever_live_p (i)
5548 && ! sparc_hard_reg_printed [i])
5549 {
5550 sparc_hard_reg_printed [i] = 1;
5551 /* %g7 is used as TLS base register, use #ignore
5552 for it instead of #scratch. */
5553 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5554 i == 7 ? "ignore" : "scratch");
5555 }
5556 if (i == 3) i = 5;
5557 }
5558 #endif
5559 }
5560
5561 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5562
5563 #if PROBE_INTERVAL > 4096
5564 #error Cannot use indexed addressing mode for stack probing
5565 #endif
5566
5567 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5568 inclusive. These are offsets from the current stack pointer.
5569
5570 Note that we don't use the REG+REG addressing mode for the probes because
5571 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5572 so the advantages of having a single code win here. */
5573
5574 static void
5575 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5576 {
5577 rtx g1 = gen_rtx_REG (Pmode, 1);
5578
5579 /* See if we have a constant small number of probes to generate. If so,
5580 that's the easy case. */
5581 if (size <= PROBE_INTERVAL)
5582 {
5583 emit_move_insn (g1, GEN_INT (first));
5584 emit_insn (gen_rtx_SET (g1,
5585 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5586 emit_stack_probe (plus_constant (Pmode, g1, -size));
5587 }
5588
5589 /* The run-time loop is made up of 9 insns in the generic case while the
5590 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5591 else if (size <= 4 * PROBE_INTERVAL)
5592 {
5593 HOST_WIDE_INT i;
5594
5595 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5596 emit_insn (gen_rtx_SET (g1,
5597 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5598 emit_stack_probe (g1);
5599
5600 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5601 it exceeds SIZE. If only two probes are needed, this will not
5602 generate any code. Then probe at FIRST + SIZE. */
5603 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5604 {
5605 emit_insn (gen_rtx_SET (g1,
5606 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5607 emit_stack_probe (g1);
5608 }
5609
5610 emit_stack_probe (plus_constant (Pmode, g1,
5611 (i - PROBE_INTERVAL) - size));
5612 }
5613
5614 /* Otherwise, do the same as above, but in a loop. Note that we must be
5615 extra careful with variables wrapping around because we might be at
5616 the very top (or the very bottom) of the address space and we have
5617 to be able to handle this case properly; in particular, we use an
5618 equality test for the loop condition. */
5619 else
5620 {
5621 HOST_WIDE_INT rounded_size;
5622 rtx g4 = gen_rtx_REG (Pmode, 4);
5623
5624 emit_move_insn (g1, GEN_INT (first));
5625
5626
5627 /* Step 1: round SIZE to the previous multiple of the interval. */
5628
5629 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5630 emit_move_insn (g4, GEN_INT (rounded_size));
5631
5632
5633 /* Step 2: compute initial and final value of the loop counter. */
5634
5635 /* TEST_ADDR = SP + FIRST. */
5636 emit_insn (gen_rtx_SET (g1,
5637 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5638
5639 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5640 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5641
5642
5643 /* Step 3: the loop
5644
5645 while (TEST_ADDR != LAST_ADDR)
5646 {
5647 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5648 probe at TEST_ADDR
5649 }
5650
5651 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5652 until it is equal to ROUNDED_SIZE. */
5653
5654 if (TARGET_ARCH64)
5655 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5656 else
5657 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5658
5659
5660 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5661 that SIZE is equal to ROUNDED_SIZE. */
5662
5663 if (size != rounded_size)
5664 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5665 }
5666
5667 /* Make sure nothing is scheduled before we are done. */
5668 emit_insn (gen_blockage ());
5669 }
5670
5671 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5672 absolute addresses. */
5673
5674 const char *
5675 output_probe_stack_range (rtx reg1, rtx reg2)
5676 {
5677 static int labelno = 0;
5678 char loop_lab[32];
5679 rtx xops[2];
5680
5681 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5682
5683 /* Loop. */
5684 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5685
5686 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5687 xops[0] = reg1;
5688 xops[1] = GEN_INT (-PROBE_INTERVAL);
5689 output_asm_insn ("add\t%0, %1, %0", xops);
5690
5691 /* Test if TEST_ADDR == LAST_ADDR. */
5692 xops[1] = reg2;
5693 output_asm_insn ("cmp\t%0, %1", xops);
5694
5695 /* Probe at TEST_ADDR and branch. */
5696 if (TARGET_ARCH64)
5697 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5698 else
5699 fputs ("\tbne\t", asm_out_file);
5700 assemble_name_raw (asm_out_file, loop_lab);
5701 fputc ('\n', asm_out_file);
5702 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5703 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5704
5705 return "";
5706 }
5707
5708 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5709 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5710 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5711 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5712 the action to be performed if it returns false. Return the new offset. */
5713
5714 typedef bool (*sorr_pred_t) (unsigned int, int);
5715 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5716
5717 static int
5718 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5719 int offset, int leaf_function, sorr_pred_t save_p,
5720 sorr_act_t action_true, sorr_act_t action_false)
5721 {
5722 unsigned int i;
5723 rtx mem;
5724 rtx_insn *insn;
5725
5726 if (TARGET_ARCH64 && high <= 32)
5727 {
5728 int fp_offset = -1;
5729
5730 for (i = low; i < high; i++)
5731 {
5732 if (save_p (i, leaf_function))
5733 {
5734 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5735 base, offset));
5736 if (action_true == SORR_SAVE)
5737 {
5738 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5739 RTX_FRAME_RELATED_P (insn) = 1;
5740 }
5741 else /* action_true == SORR_RESTORE */
5742 {
5743 /* The frame pointer must be restored last since its old
5744 value may be used as base address for the frame. This
5745 is problematic in 64-bit mode only because of the lack
5746 of double-word load instruction. */
5747 if (i == HARD_FRAME_POINTER_REGNUM)
5748 fp_offset = offset;
5749 else
5750 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5751 }
5752 offset += 8;
5753 }
5754 else if (action_false == SORR_ADVANCE)
5755 offset += 8;
5756 }
5757
5758 if (fp_offset >= 0)
5759 {
5760 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5761 emit_move_insn (hard_frame_pointer_rtx, mem);
5762 }
5763 }
5764 else
5765 {
5766 for (i = low; i < high; i += 2)
5767 {
5768 bool reg0 = save_p (i, leaf_function);
5769 bool reg1 = save_p (i + 1, leaf_function);
5770 machine_mode mode;
5771 int regno;
5772
5773 if (reg0 && reg1)
5774 {
5775 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5776 regno = i;
5777 }
5778 else if (reg0)
5779 {
5780 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5781 regno = i;
5782 }
5783 else if (reg1)
5784 {
5785 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5786 regno = i + 1;
5787 offset += 4;
5788 }
5789 else
5790 {
5791 if (action_false == SORR_ADVANCE)
5792 offset += 8;
5793 continue;
5794 }
5795
5796 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5797 if (action_true == SORR_SAVE)
5798 {
5799 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5800 RTX_FRAME_RELATED_P (insn) = 1;
5801 if (mode == DImode)
5802 {
5803 rtx set1, set2;
5804 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5805 offset));
5806 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5807 RTX_FRAME_RELATED_P (set1) = 1;
5808 mem
5809 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5810 offset + 4));
5811 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5812 RTX_FRAME_RELATED_P (set2) = 1;
5813 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5814 gen_rtx_PARALLEL (VOIDmode,
5815 gen_rtvec (2, set1, set2)));
5816 }
5817 }
5818 else /* action_true == SORR_RESTORE */
5819 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5820
5821 /* Bump and round down to double word
5822 in case we already bumped by 4. */
5823 offset = ROUND_DOWN (offset + 8, 8);
5824 }
5825 }
5826
5827 return offset;
5828 }
5829
5830 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5831
5832 static rtx
5833 emit_adjust_base_to_offset (rtx base, int offset)
5834 {
5835 /* ??? This might be optimized a little as %g1 might already have a
5836 value close enough that a single add insn will do. */
5837 /* ??? Although, all of this is probably only a temporary fix because
5838 if %g1 can hold a function result, then sparc_expand_epilogue will
5839 lose (the result will be clobbered). */
5840 rtx new_base = gen_rtx_REG (Pmode, 1);
5841 emit_move_insn (new_base, GEN_INT (offset));
5842 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5843 return new_base;
5844 }
5845
5846 /* Emit code to save/restore call-saved global and FP registers. */
5847
5848 static void
5849 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5850 {
5851 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5852 {
5853 base = emit_adjust_base_to_offset (base, offset);
5854 offset = 0;
5855 }
5856
5857 offset
5858 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5859 save_global_or_fp_reg_p, action, SORR_NONE);
5860 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5861 save_global_or_fp_reg_p, action, SORR_NONE);
5862 }
5863
5864 /* Emit code to save/restore call-saved local and in registers. */
5865
5866 static void
5867 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5868 {
5869 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5870 {
5871 base = emit_adjust_base_to_offset (base, offset);
5872 offset = 0;
5873 }
5874
5875 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5876 save_local_or_in_reg_p, action, SORR_ADVANCE);
5877 }
5878
5879 /* Emit a window_save insn. */
5880
5881 static rtx_insn *
5882 emit_window_save (rtx increment)
5883 {
5884 rtx_insn *insn = emit_insn (gen_window_save (increment));
5885 RTX_FRAME_RELATED_P (insn) = 1;
5886
5887 /* The incoming return address (%o7) is saved in %i7. */
5888 add_reg_note (insn, REG_CFA_REGISTER,
5889 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5890 gen_rtx_REG (Pmode,
5891 INCOMING_RETURN_ADDR_REGNUM)));
5892
5893 /* The window save event. */
5894 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5895
5896 /* The CFA is %fp, the hard frame pointer. */
5897 add_reg_note (insn, REG_CFA_DEF_CFA,
5898 plus_constant (Pmode, hard_frame_pointer_rtx,
5899 INCOMING_FRAME_SP_OFFSET));
5900
5901 return insn;
5902 }
5903
5904 /* Generate an increment for the stack pointer. */
5905
5906 static rtx
5907 gen_stack_pointer_inc (rtx increment)
5908 {
5909 return gen_rtx_SET (stack_pointer_rtx,
5910 gen_rtx_PLUS (Pmode,
5911 stack_pointer_rtx,
5912 increment));
5913 }
5914
5915 /* Expand the function prologue. The prologue is responsible for reserving
5916 storage for the frame, saving the call-saved registers and loading the
5917 GOT register if needed. */
5918
5919 void
5920 sparc_expand_prologue (void)
5921 {
5922 HOST_WIDE_INT size;
5923 rtx_insn *insn;
5924
5925 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5926 on the final value of the flag means deferring the prologue/epilogue
5927 expansion until just before the second scheduling pass, which is too
5928 late to emit multiple epilogues or return insns.
5929
5930 Of course we are making the assumption that the value of the flag
5931 will not change between now and its final value. Of the three parts
5932 of the formula, only the last one can reasonably vary. Let's take a
5933 closer look, after assuming that the first two ones are set to true
5934 (otherwise the last value is effectively silenced).
5935
5936 If only_leaf_regs_used returns false, the global predicate will also
5937 be false so the actual frame size calculated below will be positive.
5938 As a consequence, the save_register_window insn will be emitted in
5939 the instruction stream; now this insn explicitly references %fp
5940 which is not a leaf register so only_leaf_regs_used will always
5941 return false subsequently.
5942
5943 If only_leaf_regs_used returns true, we hope that the subsequent
5944 optimization passes won't cause non-leaf registers to pop up. For
5945 example, the regrename pass has special provisions to not rename to
5946 non-leaf registers in a leaf function. */
5947 sparc_leaf_function_p
5948 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5949
5950 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5951
5952 if (flag_stack_usage_info)
5953 current_function_static_stack_size = size;
5954
5955 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5956 || flag_stack_clash_protection)
5957 {
5958 if (crtl->is_leaf && !cfun->calls_alloca)
5959 {
5960 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5961 sparc_emit_probe_stack_range (get_stack_check_protect (),
5962 size - get_stack_check_protect ());
5963 }
5964 else if (size > 0)
5965 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5966 }
5967
5968 if (size == 0)
5969 ; /* do nothing. */
5970 else if (sparc_leaf_function_p)
5971 {
5972 rtx size_int_rtx = GEN_INT (-size);
5973
5974 if (size <= 4096)
5975 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5976 else if (size <= 8192)
5977 {
5978 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5979 RTX_FRAME_RELATED_P (insn) = 1;
5980
5981 /* %sp is still the CFA register. */
5982 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5983 }
5984 else
5985 {
5986 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5987 emit_move_insn (size_rtx, size_int_rtx);
5988 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5989 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5990 gen_stack_pointer_inc (size_int_rtx));
5991 }
5992
5993 RTX_FRAME_RELATED_P (insn) = 1;
5994 }
5995 else
5996 {
5997 rtx size_int_rtx = GEN_INT (-size);
5998
5999 if (size <= 4096)
6000 emit_window_save (size_int_rtx);
6001 else if (size <= 8192)
6002 {
6003 emit_window_save (GEN_INT (-4096));
6004
6005 /* %sp is not the CFA register anymore. */
6006 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6007
6008 /* Make sure no %fp-based store is issued until after the frame is
6009 established. The offset between the frame pointer and the stack
6010 pointer is calculated relative to the value of the stack pointer
6011 at the end of the function prologue, and moving instructions that
6012 access the stack via the frame pointer between the instructions
6013 that decrement the stack pointer could result in accessing the
6014 register window save area, which is volatile. */
6015 emit_insn (gen_frame_blockage ());
6016 }
6017 else
6018 {
6019 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6020 emit_move_insn (size_rtx, size_int_rtx);
6021 emit_window_save (size_rtx);
6022 }
6023 }
6024
6025 if (sparc_leaf_function_p)
6026 {
6027 sparc_frame_base_reg = stack_pointer_rtx;
6028 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6029 }
6030 else
6031 {
6032 sparc_frame_base_reg = hard_frame_pointer_rtx;
6033 sparc_frame_base_offset = SPARC_STACK_BIAS;
6034 }
6035
6036 if (sparc_n_global_fp_regs > 0)
6037 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6038 sparc_frame_base_offset
6039 - sparc_apparent_frame_size,
6040 SORR_SAVE);
6041
6042 /* Load the GOT register if needed. */
6043 if (crtl->uses_pic_offset_table)
6044 load_got_register ();
6045
6046 /* Advertise that the data calculated just above are now valid. */
6047 sparc_prologue_data_valid_p = true;
6048 }
6049
6050 /* Expand the function prologue. The prologue is responsible for reserving
6051 storage for the frame, saving the call-saved registers and loading the
6052 GOT register if needed. */
6053
6054 void
6055 sparc_flat_expand_prologue (void)
6056 {
6057 HOST_WIDE_INT size;
6058 rtx_insn *insn;
6059
6060 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6061
6062 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6063
6064 if (flag_stack_usage_info)
6065 current_function_static_stack_size = size;
6066
6067 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6068 || flag_stack_clash_protection)
6069 {
6070 if (crtl->is_leaf && !cfun->calls_alloca)
6071 {
6072 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6073 sparc_emit_probe_stack_range (get_stack_check_protect (),
6074 size - get_stack_check_protect ());
6075 }
6076 else if (size > 0)
6077 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6078 }
6079
6080 if (sparc_save_local_in_regs_p)
6081 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6082 SORR_SAVE);
6083
6084 if (size == 0)
6085 ; /* do nothing. */
6086 else
6087 {
6088 rtx size_int_rtx, size_rtx;
6089
6090 size_rtx = size_int_rtx = GEN_INT (-size);
6091
6092 /* We establish the frame (i.e. decrement the stack pointer) first, even
6093 if we use a frame pointer, because we cannot clobber any call-saved
6094 registers, including the frame pointer, if we haven't created a new
6095 register save area, for the sake of compatibility with the ABI. */
6096 if (size <= 4096)
6097 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6098 else if (size <= 8192 && !frame_pointer_needed)
6099 {
6100 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6101 RTX_FRAME_RELATED_P (insn) = 1;
6102 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6103 }
6104 else
6105 {
6106 size_rtx = gen_rtx_REG (Pmode, 1);
6107 emit_move_insn (size_rtx, size_int_rtx);
6108 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6109 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6110 gen_stack_pointer_inc (size_int_rtx));
6111 }
6112 RTX_FRAME_RELATED_P (insn) = 1;
6113
6114 /* Ensure nothing is scheduled until after the frame is established. */
6115 emit_insn (gen_blockage ());
6116
6117 if (frame_pointer_needed)
6118 {
6119 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6120 gen_rtx_MINUS (Pmode,
6121 stack_pointer_rtx,
6122 size_rtx)));
6123 RTX_FRAME_RELATED_P (insn) = 1;
6124
6125 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6126 gen_rtx_SET (hard_frame_pointer_rtx,
6127 plus_constant (Pmode, stack_pointer_rtx,
6128 size)));
6129 }
6130
6131 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6132 {
6133 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6134 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6135
6136 insn = emit_move_insn (i7, o7);
6137 RTX_FRAME_RELATED_P (insn) = 1;
6138
6139 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6140
6141 /* Prevent this instruction from ever being considered dead,
6142 even if this function has no epilogue. */
6143 emit_use (i7);
6144 }
6145 }
6146
6147 if (frame_pointer_needed)
6148 {
6149 sparc_frame_base_reg = hard_frame_pointer_rtx;
6150 sparc_frame_base_offset = SPARC_STACK_BIAS;
6151 }
6152 else
6153 {
6154 sparc_frame_base_reg = stack_pointer_rtx;
6155 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6156 }
6157
6158 if (sparc_n_global_fp_regs > 0)
6159 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6160 sparc_frame_base_offset
6161 - sparc_apparent_frame_size,
6162 SORR_SAVE);
6163
6164 /* Load the GOT register if needed. */
6165 if (crtl->uses_pic_offset_table)
6166 load_got_register ();
6167
6168 /* Advertise that the data calculated just above are now valid. */
6169 sparc_prologue_data_valid_p = true;
6170 }
6171
6172 /* This function generates the assembly code for function entry, which boils
6173 down to emitting the necessary .register directives. */
6174
6175 static void
6176 sparc_asm_function_prologue (FILE *file)
6177 {
6178 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6179 if (!TARGET_FLAT)
6180 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6181
6182 sparc_output_scratch_registers (file);
6183 }
6184
6185 /* Expand the function epilogue, either normal or part of a sibcall.
6186 We emit all the instructions except the return or the call. */
6187
6188 void
6189 sparc_expand_epilogue (bool for_eh)
6190 {
6191 HOST_WIDE_INT size = sparc_frame_size;
6192
6193 if (cfun->calls_alloca)
6194 emit_insn (gen_frame_blockage ());
6195
6196 if (sparc_n_global_fp_regs > 0)
6197 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6198 sparc_frame_base_offset
6199 - sparc_apparent_frame_size,
6200 SORR_RESTORE);
6201
6202 if (size == 0 || for_eh)
6203 ; /* do nothing. */
6204 else if (sparc_leaf_function_p)
6205 {
6206 if (size <= 4096)
6207 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6208 else if (size <= 8192)
6209 {
6210 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6211 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6212 }
6213 else
6214 {
6215 rtx reg = gen_rtx_REG (Pmode, 1);
6216 emit_move_insn (reg, GEN_INT (size));
6217 emit_insn (gen_stack_pointer_inc (reg));
6218 }
6219 }
6220 }
6221
6222 /* Expand the function epilogue, either normal or part of a sibcall.
6223 We emit all the instructions except the return or the call. */
6224
6225 void
6226 sparc_flat_expand_epilogue (bool for_eh)
6227 {
6228 HOST_WIDE_INT size = sparc_frame_size;
6229
6230 if (sparc_n_global_fp_regs > 0)
6231 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6232 sparc_frame_base_offset
6233 - sparc_apparent_frame_size,
6234 SORR_RESTORE);
6235
6236 /* If we have a frame pointer, we'll need both to restore it before the
6237 frame is destroyed and use its current value in destroying the frame.
6238 Since we don't have an atomic way to do that in the flat window model,
6239 we save the current value into a temporary register (%g1). */
6240 if (frame_pointer_needed && !for_eh)
6241 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6242
6243 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6244 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6245 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6246
6247 if (sparc_save_local_in_regs_p)
6248 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6249 sparc_frame_base_offset,
6250 SORR_RESTORE);
6251
6252 if (size == 0 || for_eh)
6253 ; /* do nothing. */
6254 else if (frame_pointer_needed)
6255 {
6256 /* Make sure the frame is destroyed after everything else is done. */
6257 emit_insn (gen_blockage ());
6258
6259 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6260 }
6261 else
6262 {
6263 /* Likewise. */
6264 emit_insn (gen_blockage ());
6265
6266 if (size <= 4096)
6267 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6268 else if (size <= 8192)
6269 {
6270 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6271 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6272 }
6273 else
6274 {
6275 rtx reg = gen_rtx_REG (Pmode, 1);
6276 emit_move_insn (reg, GEN_INT (size));
6277 emit_insn (gen_stack_pointer_inc (reg));
6278 }
6279 }
6280 }
6281
6282 /* Return true if it is appropriate to emit `return' instructions in the
6283 body of a function. */
6284
6285 bool
6286 sparc_can_use_return_insn_p (void)
6287 {
6288 return sparc_prologue_data_valid_p
6289 && sparc_n_global_fp_regs == 0
6290 && TARGET_FLAT
6291 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6292 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6293 }
6294
6295 /* This function generates the assembly code for function exit. */
6296
6297 static void
6298 sparc_asm_function_epilogue (FILE *file)
6299 {
6300 /* If the last two instructions of a function are "call foo; dslot;"
6301 the return address might point to the first instruction in the next
6302 function and we have to output a dummy nop for the sake of sane
6303 backtraces in such cases. This is pointless for sibling calls since
6304 the return address is explicitly adjusted. */
6305
6306 rtx_insn *insn = get_last_insn ();
6307
6308 rtx last_real_insn = prev_real_insn (insn);
6309 if (last_real_insn
6310 && NONJUMP_INSN_P (last_real_insn)
6311 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6312 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6313
6314 if (last_real_insn
6315 && CALL_P (last_real_insn)
6316 && !SIBLING_CALL_P (last_real_insn))
6317 fputs("\tnop\n", file);
6318
6319 sparc_output_deferred_case_vectors ();
6320 }
6321
6322 /* Output a 'restore' instruction. */
6323
6324 static void
6325 output_restore (rtx pat)
6326 {
6327 rtx operands[3];
6328
6329 if (! pat)
6330 {
6331 fputs ("\t restore\n", asm_out_file);
6332 return;
6333 }
6334
6335 gcc_assert (GET_CODE (pat) == SET);
6336
6337 operands[0] = SET_DEST (pat);
6338 pat = SET_SRC (pat);
6339
6340 switch (GET_CODE (pat))
6341 {
6342 case PLUS:
6343 operands[1] = XEXP (pat, 0);
6344 operands[2] = XEXP (pat, 1);
6345 output_asm_insn (" restore %r1, %2, %Y0", operands);
6346 break;
6347 case LO_SUM:
6348 operands[1] = XEXP (pat, 0);
6349 operands[2] = XEXP (pat, 1);
6350 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6351 break;
6352 case ASHIFT:
6353 operands[1] = XEXP (pat, 0);
6354 gcc_assert (XEXP (pat, 1) == const1_rtx);
6355 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6356 break;
6357 default:
6358 operands[1] = pat;
6359 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6360 break;
6361 }
6362 }
6363
6364 /* Output a return. */
6365
6366 const char *
6367 output_return (rtx_insn *insn)
6368 {
6369 if (crtl->calls_eh_return)
6370 {
6371 /* If the function uses __builtin_eh_return, the eh_return
6372 machinery occupies the delay slot. */
6373 gcc_assert (!final_sequence);
6374
6375 if (flag_delayed_branch)
6376 {
6377 if (!TARGET_FLAT && TARGET_V9)
6378 fputs ("\treturn\t%i7+8\n", asm_out_file);
6379 else
6380 {
6381 if (!TARGET_FLAT)
6382 fputs ("\trestore\n", asm_out_file);
6383
6384 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6385 }
6386
6387 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6388 }
6389 else
6390 {
6391 if (!TARGET_FLAT)
6392 fputs ("\trestore\n", asm_out_file);
6393
6394 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6395 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6396 }
6397 }
6398 else if (sparc_leaf_function_p || TARGET_FLAT)
6399 {
6400 /* This is a leaf or flat function so we don't have to bother restoring
6401 the register window, which frees us from dealing with the convoluted
6402 semantics of restore/return. We simply output the jump to the
6403 return address and the insn in the delay slot (if any). */
6404
6405 return "jmp\t%%o7+%)%#";
6406 }
6407 else
6408 {
6409 /* This is a regular function so we have to restore the register window.
6410 We may have a pending insn for the delay slot, which will be either
6411 combined with the 'restore' instruction or put in the delay slot of
6412 the 'return' instruction. */
6413
6414 if (final_sequence)
6415 {
6416 rtx_insn *delay;
6417 rtx pat;
6418 int seen;
6419
6420 delay = NEXT_INSN (insn);
6421 gcc_assert (delay);
6422
6423 pat = PATTERN (delay);
6424
6425 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6426 {
6427 epilogue_renumber (&pat, 0);
6428 return "return\t%%i7+%)%#";
6429 }
6430 else
6431 {
6432 output_asm_insn ("jmp\t%%i7+%)", NULL);
6433
6434 /* We're going to output the insn in the delay slot manually.
6435 Make sure to output its source location first. */
6436 PATTERN (delay) = gen_blockage ();
6437 INSN_CODE (delay) = -1;
6438 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6439 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6440
6441 output_restore (pat);
6442 }
6443 }
6444 else
6445 {
6446 /* The delay slot is empty. */
6447 if (TARGET_V9)
6448 return "return\t%%i7+%)\n\t nop";
6449 else if (flag_delayed_branch)
6450 return "jmp\t%%i7+%)\n\t restore";
6451 else
6452 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6453 }
6454 }
6455
6456 return "";
6457 }
6458
6459 /* Output a sibling call. */
6460
6461 const char *
6462 output_sibcall (rtx_insn *insn, rtx call_operand)
6463 {
6464 rtx operands[1];
6465
6466 gcc_assert (flag_delayed_branch);
6467
6468 operands[0] = call_operand;
6469
6470 if (sparc_leaf_function_p || TARGET_FLAT)
6471 {
6472 /* This is a leaf or flat function so we don't have to bother restoring
6473 the register window. We simply output the jump to the function and
6474 the insn in the delay slot (if any). */
6475
6476 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6477
6478 if (final_sequence)
6479 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6480 operands);
6481 else
6482 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6483 it into branch if possible. */
6484 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6485 operands);
6486 }
6487 else
6488 {
6489 /* This is a regular function so we have to restore the register window.
6490 We may have a pending insn for the delay slot, which will be combined
6491 with the 'restore' instruction. */
6492
6493 output_asm_insn ("call\t%a0, 0", operands);
6494
6495 if (final_sequence)
6496 {
6497 rtx_insn *delay;
6498 rtx pat;
6499 int seen;
6500
6501 delay = NEXT_INSN (insn);
6502 gcc_assert (delay);
6503
6504 pat = PATTERN (delay);
6505
6506 /* We're going to output the insn in the delay slot manually.
6507 Make sure to output its source location first. */
6508 PATTERN (delay) = gen_blockage ();
6509 INSN_CODE (delay) = -1;
6510 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6511 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6512
6513 output_restore (pat);
6514 }
6515 else
6516 output_restore (NULL_RTX);
6517 }
6518
6519 return "";
6520 }
6521 \f
6522 /* Functions for handling argument passing.
6523
6524 For 32-bit, the first 6 args are normally in registers and the rest are
6525 pushed. Any arg that starts within the first 6 words is at least
6526 partially passed in a register unless its data type forbids.
6527
6528 For 64-bit, the argument registers are laid out as an array of 16 elements
6529 and arguments are added sequentially. The first 6 int args and up to the
6530 first 16 fp args (depending on size) are passed in regs.
6531
6532 Slot Stack Integral Float Float in structure Double Long Double
6533 ---- ----- -------- ----- ------------------ ------ -----------
6534 15 [SP+248] %f31 %f30,%f31 %d30
6535 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6536 13 [SP+232] %f27 %f26,%f27 %d26
6537 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6538 11 [SP+216] %f23 %f22,%f23 %d22
6539 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6540 9 [SP+200] %f19 %f18,%f19 %d18
6541 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6542 7 [SP+184] %f15 %f14,%f15 %d14
6543 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6544 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6545 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6546 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6547 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6548 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6549 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6550
6551 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6552
6553 Integral arguments are always passed as 64-bit quantities appropriately
6554 extended.
6555
6556 Passing of floating point values is handled as follows.
6557 If a prototype is in scope:
6558 If the value is in a named argument (i.e. not a stdarg function or a
6559 value not part of the `...') then the value is passed in the appropriate
6560 fp reg.
6561 If the value is part of the `...' and is passed in one of the first 6
6562 slots then the value is passed in the appropriate int reg.
6563 If the value is part of the `...' and is not passed in one of the first 6
6564 slots then the value is passed in memory.
6565 If a prototype is not in scope:
6566 If the value is one of the first 6 arguments the value is passed in the
6567 appropriate integer reg and the appropriate fp reg.
6568 If the value is not one of the first 6 arguments the value is passed in
6569 the appropriate fp reg and in memory.
6570
6571
6572 Summary of the calling conventions implemented by GCC on the SPARC:
6573
6574 32-bit ABI:
6575 size argument return value
6576
6577 small integer <4 int. reg. int. reg.
6578 word 4 int. reg. int. reg.
6579 double word 8 int. reg. int. reg.
6580
6581 _Complex small integer <8 int. reg. int. reg.
6582 _Complex word 8 int. reg. int. reg.
6583 _Complex double word 16 memory int. reg.
6584
6585 vector integer <=8 int. reg. FP reg.
6586 vector integer >8 memory memory
6587
6588 float 4 int. reg. FP reg.
6589 double 8 int. reg. FP reg.
6590 long double 16 memory memory
6591
6592 _Complex float 8 memory FP reg.
6593 _Complex double 16 memory FP reg.
6594 _Complex long double 32 memory FP reg.
6595
6596 vector float any memory memory
6597
6598 aggregate any memory memory
6599
6600
6601
6602 64-bit ABI:
6603 size argument return value
6604
6605 small integer <8 int. reg. int. reg.
6606 word 8 int. reg. int. reg.
6607 double word 16 int. reg. int. reg.
6608
6609 _Complex small integer <16 int. reg. int. reg.
6610 _Complex word 16 int. reg. int. reg.
6611 _Complex double word 32 memory int. reg.
6612
6613 vector integer <=16 FP reg. FP reg.
6614 vector integer 16<s<=32 memory FP reg.
6615 vector integer >32 memory memory
6616
6617 float 4 FP reg. FP reg.
6618 double 8 FP reg. FP reg.
6619 long double 16 FP reg. FP reg.
6620
6621 _Complex float 8 FP reg. FP reg.
6622 _Complex double 16 FP reg. FP reg.
6623 _Complex long double 32 memory FP reg.
6624
6625 vector float <=16 FP reg. FP reg.
6626 vector float 16<s<=32 memory FP reg.
6627 vector float >32 memory memory
6628
6629 aggregate <=16 reg. reg.
6630 aggregate 16<s<=32 memory reg.
6631 aggregate >32 memory memory
6632
6633
6634
6635 Note #1: complex floating-point types follow the extended SPARC ABIs as
6636 implemented by the Sun compiler.
6637
6638 Note #2: integral vector types follow the scalar floating-point types
6639 conventions to match what is implemented by the Sun VIS SDK.
6640
6641 Note #3: floating-point vector types follow the aggregate types
6642 conventions. */
6643
6644
6645 /* Maximum number of int regs for args. */
6646 #define SPARC_INT_ARG_MAX 6
6647 /* Maximum number of fp regs for args. */
6648 #define SPARC_FP_ARG_MAX 16
6649 /* Number of words (partially) occupied for a given size in units. */
6650 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6651
6652 /* Handle the INIT_CUMULATIVE_ARGS macro.
6653 Initialize a variable CUM of type CUMULATIVE_ARGS
6654 for a call to a function whose data type is FNTYPE.
6655 For a library call, FNTYPE is 0. */
6656
6657 void
6658 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6659 {
6660 cum->words = 0;
6661 cum->prototype_p = fntype && prototype_p (fntype);
6662 cum->libcall_p = !fntype;
6663 }
6664
6665 /* Handle promotion of pointer and integer arguments. */
6666
6667 static machine_mode
6668 sparc_promote_function_mode (const_tree type, machine_mode mode,
6669 int *punsignedp, const_tree, int)
6670 {
6671 if (type && POINTER_TYPE_P (type))
6672 {
6673 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6674 return Pmode;
6675 }
6676
6677 /* Integral arguments are passed as full words, as per the ABI. */
6678 if (GET_MODE_CLASS (mode) == MODE_INT
6679 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6680 return word_mode;
6681
6682 return mode;
6683 }
6684
6685 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6686
6687 static bool
6688 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6689 {
6690 return TARGET_ARCH64 ? true : false;
6691 }
6692
6693 /* Traverse the record TYPE recursively and call FUNC on its fields.
6694 NAMED is true if this is for a named parameter. DATA is passed
6695 to FUNC for each field. OFFSET is the starting position and
6696 PACKED is true if we are inside a packed record. */
6697
6698 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6699 static void
6700 traverse_record_type (const_tree type, bool named, T *data,
6701 HOST_WIDE_INT offset = 0, bool packed = false)
6702 {
6703 /* The ABI obviously doesn't specify how packed structures are passed.
6704 These are passed in integer regs if possible, otherwise memory. */
6705 if (!packed)
6706 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6707 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6708 {
6709 packed = true;
6710 break;
6711 }
6712
6713 /* Walk the real fields, but skip those with no size or a zero size.
6714 ??? Fields with variable offset are handled as having zero offset. */
6715 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6716 if (TREE_CODE (field) == FIELD_DECL)
6717 {
6718 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6719 continue;
6720
6721 HOST_WIDE_INT bitpos = offset;
6722 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6723 bitpos += int_bit_position (field);
6724
6725 tree field_type = TREE_TYPE (field);
6726 if (TREE_CODE (field_type) == RECORD_TYPE)
6727 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6728 packed);
6729 else
6730 {
6731 const bool fp_type
6732 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6733 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6734 data);
6735 }
6736 }
6737 }
6738
6739 /* Handle recursive register classifying for structure layout. */
6740
6741 typedef struct
6742 {
6743 bool fp_regs; /* true if field eligible to FP registers. */
6744 bool fp_regs_in_first_word; /* true if such field in first word. */
6745 } classify_data_t;
6746
6747 /* A subroutine of function_arg_slotno. Classify the field. */
6748
6749 inline void
6750 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6751 classify_data_t *data)
6752 {
6753 if (fp)
6754 {
6755 data->fp_regs = true;
6756 if (bitpos < BITS_PER_WORD)
6757 data->fp_regs_in_first_word = true;
6758 }
6759 }
6760
6761 /* Compute the slot number to pass an argument in.
6762 Return the slot number or -1 if passing on the stack.
6763
6764 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6765 the preceding args and about the function being called.
6766 MODE is the argument's machine mode.
6767 TYPE is the data type of the argument (as a tree).
6768 This is null for libcalls where that information may
6769 not be available.
6770 NAMED is nonzero if this argument is a named parameter
6771 (otherwise it is an extra parameter matching an ellipsis).
6772 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6773 *PREGNO records the register number to use if scalar type.
6774 *PPADDING records the amount of padding needed in words. */
6775
6776 static int
6777 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6778 const_tree type, bool named, bool incoming,
6779 int *pregno, int *ppadding)
6780 {
6781 int regbase = (incoming
6782 ? SPARC_INCOMING_INT_ARG_FIRST
6783 : SPARC_OUTGOING_INT_ARG_FIRST);
6784 int slotno = cum->words;
6785 enum mode_class mclass;
6786 int regno;
6787
6788 *ppadding = 0;
6789
6790 if (type && TREE_ADDRESSABLE (type))
6791 return -1;
6792
6793 if (TARGET_ARCH32
6794 && mode == BLKmode
6795 && type
6796 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6797 return -1;
6798
6799 /* For SPARC64, objects requiring 16-byte alignment get it. */
6800 if (TARGET_ARCH64
6801 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6802 && (slotno & 1) != 0)
6803 slotno++, *ppadding = 1;
6804
6805 mclass = GET_MODE_CLASS (mode);
6806 if (type && TREE_CODE (type) == VECTOR_TYPE)
6807 {
6808 /* Vector types deserve special treatment because they are
6809 polymorphic wrt their mode, depending upon whether VIS
6810 instructions are enabled. */
6811 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6812 {
6813 /* The SPARC port defines no floating-point vector modes. */
6814 gcc_assert (mode == BLKmode);
6815 }
6816 else
6817 {
6818 /* Integral vector types should either have a vector
6819 mode or an integral mode, because we are guaranteed
6820 by pass_by_reference that their size is not greater
6821 than 16 bytes and TImode is 16-byte wide. */
6822 gcc_assert (mode != BLKmode);
6823
6824 /* Vector integers are handled like floats according to
6825 the Sun VIS SDK. */
6826 mclass = MODE_FLOAT;
6827 }
6828 }
6829
6830 switch (mclass)
6831 {
6832 case MODE_FLOAT:
6833 case MODE_COMPLEX_FLOAT:
6834 case MODE_VECTOR_INT:
6835 if (TARGET_ARCH64 && TARGET_FPU && named)
6836 {
6837 /* If all arg slots are filled, then must pass on stack. */
6838 if (slotno >= SPARC_FP_ARG_MAX)
6839 return -1;
6840
6841 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6842 /* Arguments filling only one single FP register are
6843 right-justified in the outer double FP register. */
6844 if (GET_MODE_SIZE (mode) <= 4)
6845 regno++;
6846 break;
6847 }
6848 /* fallthrough */
6849
6850 case MODE_INT:
6851 case MODE_COMPLEX_INT:
6852 /* If all arg slots are filled, then must pass on stack. */
6853 if (slotno >= SPARC_INT_ARG_MAX)
6854 return -1;
6855
6856 regno = regbase + slotno;
6857 break;
6858
6859 case MODE_RANDOM:
6860 if (mode == VOIDmode)
6861 /* MODE is VOIDmode when generating the actual call. */
6862 return -1;
6863
6864 gcc_assert (mode == BLKmode);
6865
6866 if (TARGET_ARCH32
6867 || !type
6868 || (TREE_CODE (type) != RECORD_TYPE
6869 && TREE_CODE (type) != VECTOR_TYPE))
6870 {
6871 /* If all arg slots are filled, then must pass on stack. */
6872 if (slotno >= SPARC_INT_ARG_MAX)
6873 return -1;
6874
6875 regno = regbase + slotno;
6876 }
6877 else /* TARGET_ARCH64 && type */
6878 {
6879 /* If all arg slots are filled, then must pass on stack. */
6880 if (slotno >= SPARC_FP_ARG_MAX)
6881 return -1;
6882
6883 if (TREE_CODE (type) == RECORD_TYPE)
6884 {
6885 classify_data_t data = { false, false };
6886 traverse_record_type<classify_data_t, classify_registers>
6887 (type, named, &data);
6888
6889 if (data.fp_regs)
6890 {
6891 /* If all FP slots are filled except for the last one and
6892 there is no FP field in the first word, then must pass
6893 on stack. */
6894 if (slotno >= SPARC_FP_ARG_MAX - 1
6895 && !data.fp_regs_in_first_word)
6896 return -1;
6897 }
6898 else
6899 {
6900 /* If all int slots are filled, then must pass on stack. */
6901 if (slotno >= SPARC_INT_ARG_MAX)
6902 return -1;
6903 }
6904 }
6905
6906 /* PREGNO isn't set since both int and FP regs can be used. */
6907 return slotno;
6908 }
6909 break;
6910
6911 default :
6912 gcc_unreachable ();
6913 }
6914
6915 *pregno = regno;
6916 return slotno;
6917 }
6918
6919 /* Handle recursive register counting/assigning for structure layout. */
6920
6921 typedef struct
6922 {
6923 int slotno; /* slot number of the argument. */
6924 int regbase; /* regno of the base register. */
6925 int intoffset; /* offset of the first pending integer field. */
6926 int nregs; /* number of words passed in registers. */
6927 bool stack; /* true if part of the argument is on the stack. */
6928 rtx ret; /* return expression being built. */
6929 } assign_data_t;
6930
6931 /* A subroutine of function_arg_record_value. Compute the number of integer
6932 registers to be assigned between PARMS->intoffset and BITPOS. Return
6933 true if at least one integer register is assigned or false otherwise. */
6934
6935 static bool
6936 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6937 {
6938 if (data->intoffset < 0)
6939 return false;
6940
6941 const int intoffset = data->intoffset;
6942 data->intoffset = -1;
6943
6944 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6945 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6946 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6947 int nregs = (endbit - startbit) / BITS_PER_WORD;
6948
6949 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6950 {
6951 nregs = SPARC_INT_ARG_MAX - this_slotno;
6952
6953 /* We need to pass this field (partly) on the stack. */
6954 data->stack = 1;
6955 }
6956
6957 if (nregs <= 0)
6958 return false;
6959
6960 *pnregs = nregs;
6961 return true;
6962 }
6963
6964 /* A subroutine of function_arg_record_value. Compute the number and the mode
6965 of the FP registers to be assigned for FIELD. Return true if at least one
6966 FP register is assigned or false otherwise. */
6967
6968 static bool
6969 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6970 assign_data_t *data,
6971 int *pnregs, machine_mode *pmode)
6972 {
6973 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6974 machine_mode mode = DECL_MODE (field);
6975 int nregs, nslots;
6976
6977 /* Slots are counted as words while regs are counted as having the size of
6978 the (inner) mode. */
6979 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6980 {
6981 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6982 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6983 }
6984 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6985 {
6986 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6987 nregs = 2;
6988 }
6989 else
6990 nregs = 1;
6991
6992 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6993
6994 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6995 {
6996 nslots = SPARC_FP_ARG_MAX - this_slotno;
6997 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6998
6999 /* We need to pass this field (partly) on the stack. */
7000 data->stack = 1;
7001
7002 if (nregs <= 0)
7003 return false;
7004 }
7005
7006 *pnregs = nregs;
7007 *pmode = mode;
7008 return true;
7009 }
7010
7011 /* A subroutine of function_arg_record_value. Count the number of registers
7012 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7013
7014 inline void
7015 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7016 assign_data_t *data)
7017 {
7018 if (fp)
7019 {
7020 int nregs;
7021 machine_mode mode;
7022
7023 if (compute_int_layout (bitpos, data, &nregs))
7024 data->nregs += nregs;
7025
7026 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7027 data->nregs += nregs;
7028 }
7029 else
7030 {
7031 if (data->intoffset < 0)
7032 data->intoffset = bitpos;
7033 }
7034 }
7035
7036 /* A subroutine of function_arg_record_value. Assign the bits of the
7037 structure between PARMS->intoffset and BITPOS to integer registers. */
7038
7039 static void
7040 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
7041 {
7042 int intoffset = data->intoffset;
7043 machine_mode mode;
7044 int nregs;
7045
7046 if (!compute_int_layout (bitpos, data, &nregs))
7047 return;
7048
7049 /* If this is the trailing part of a word, only load that much into
7050 the register. Otherwise load the whole register. Note that in
7051 the latter case we may pick up unwanted bits. It's not a problem
7052 at the moment but may wish to revisit. */
7053 if (intoffset % BITS_PER_WORD != 0)
7054 mode = smallest_int_mode_for_size (BITS_PER_WORD
7055 - intoffset % BITS_PER_WORD);
7056 else
7057 mode = word_mode;
7058
7059 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7060 unsigned int regno = data->regbase + this_slotno;
7061 intoffset /= BITS_PER_UNIT;
7062
7063 do
7064 {
7065 rtx reg = gen_rtx_REG (mode, regno);
7066 XVECEXP (data->ret, 0, data->stack + data->nregs)
7067 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7068 data->nregs += 1;
7069 mode = word_mode;
7070 regno += 1;
7071 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7072 }
7073 while (--nregs > 0);
7074 }
7075
7076 /* A subroutine of function_arg_record_value. Assign FIELD at position
7077 BITPOS to FP registers. */
7078
7079 static void
7080 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
7081 assign_data_t *data)
7082 {
7083 int nregs;
7084 machine_mode mode;
7085
7086 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7087 return;
7088
7089 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7090 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7091 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7092 regno++;
7093 int pos = bitpos / BITS_PER_UNIT;
7094
7095 do
7096 {
7097 rtx reg = gen_rtx_REG (mode, regno);
7098 XVECEXP (data->ret, 0, data->stack + data->nregs)
7099 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7100 data->nregs += 1;
7101 regno += GET_MODE_SIZE (mode) / 4;
7102 pos += GET_MODE_SIZE (mode);
7103 }
7104 while (--nregs > 0);
7105 }
7106
7107 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7108 the structure between PARMS->intoffset and BITPOS to registers. */
7109
7110 inline void
7111 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7112 assign_data_t *data)
7113 {
7114 if (fp)
7115 {
7116 assign_int_registers (bitpos, data);
7117
7118 assign_fp_registers (field, bitpos, data);
7119 }
7120 else
7121 {
7122 if (data->intoffset < 0)
7123 data->intoffset = bitpos;
7124 }
7125 }
7126
7127 /* Used by function_arg and sparc_function_value_1 to implement the complex
7128 conventions of the 64-bit ABI for passing and returning structures.
7129 Return an expression valid as a return value for the FUNCTION_ARG
7130 and TARGET_FUNCTION_VALUE.
7131
7132 TYPE is the data type of the argument (as a tree).
7133 This is null for libcalls where that information may
7134 not be available.
7135 MODE is the argument's machine mode.
7136 SLOTNO is the index number of the argument's slot in the parameter array.
7137 NAMED is true if this argument is a named parameter
7138 (otherwise it is an extra parameter matching an ellipsis).
7139 REGBASE is the regno of the base register for the parameter array. */
7140
7141 static rtx
7142 function_arg_record_value (const_tree type, machine_mode mode,
7143 int slotno, bool named, int regbase)
7144 {
7145 HOST_WIDE_INT typesize = int_size_in_bytes (type);
7146 assign_data_t data;
7147 int nregs;
7148
7149 data.slotno = slotno;
7150 data.regbase = regbase;
7151
7152 /* Count how many registers we need. */
7153 data.nregs = 0;
7154 data.intoffset = 0;
7155 data.stack = false;
7156 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7157
7158 /* Take into account pending integer fields. */
7159 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
7160 data.nregs += nregs;
7161
7162 /* Allocate the vector and handle some annoying special cases. */
7163 nregs = data.nregs;
7164
7165 if (nregs == 0)
7166 {
7167 /* ??? Empty structure has no value? Duh? */
7168 if (typesize <= 0)
7169 {
7170 /* Though there's nothing really to store, return a word register
7171 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7172 leads to breakage due to the fact that there are zero bytes to
7173 load. */
7174 return gen_rtx_REG (mode, regbase);
7175 }
7176
7177 /* ??? C++ has structures with no fields, and yet a size. Give up
7178 for now and pass everything back in integer registers. */
7179 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7180 if (nregs + slotno > SPARC_INT_ARG_MAX)
7181 nregs = SPARC_INT_ARG_MAX - slotno;
7182 }
7183
7184 gcc_assert (nregs > 0);
7185
7186 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7187
7188 /* If at least one field must be passed on the stack, generate
7189 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7190 also be passed on the stack. We can't do much better because the
7191 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7192 of structures for which the fields passed exclusively in registers
7193 are not at the beginning of the structure. */
7194 if (data.stack)
7195 XVECEXP (data.ret, 0, 0)
7196 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7197
7198 /* Assign the registers. */
7199 data.nregs = 0;
7200 data.intoffset = 0;
7201 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7202
7203 /* Assign pending integer fields. */
7204 assign_int_registers (typesize * BITS_PER_UNIT, &data);
7205
7206 gcc_assert (data.nregs == nregs);
7207
7208 return data.ret;
7209 }
7210
7211 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7212 of the 64-bit ABI for passing and returning unions.
7213 Return an expression valid as a return value for the FUNCTION_ARG
7214 and TARGET_FUNCTION_VALUE.
7215
7216 SIZE is the size in bytes of the union.
7217 MODE is the argument's machine mode.
7218 REGNO is the hard register the union will be passed in. */
7219
7220 static rtx
7221 function_arg_union_value (int size, machine_mode mode, int slotno,
7222 int regno)
7223 {
7224 int nwords = CEIL_NWORDS (size), i;
7225 rtx regs;
7226
7227 /* See comment in previous function for empty structures. */
7228 if (nwords == 0)
7229 return gen_rtx_REG (mode, regno);
7230
7231 if (slotno == SPARC_INT_ARG_MAX - 1)
7232 nwords = 1;
7233
7234 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7235
7236 for (i = 0; i < nwords; i++)
7237 {
7238 /* Unions are passed left-justified. */
7239 XVECEXP (regs, 0, i)
7240 = gen_rtx_EXPR_LIST (VOIDmode,
7241 gen_rtx_REG (word_mode, regno),
7242 GEN_INT (UNITS_PER_WORD * i));
7243 regno++;
7244 }
7245
7246 return regs;
7247 }
7248
7249 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7250 for passing and returning BLKmode vectors.
7251 Return an expression valid as a return value for the FUNCTION_ARG
7252 and TARGET_FUNCTION_VALUE.
7253
7254 SIZE is the size in bytes of the vector.
7255 REGNO is the FP hard register the vector will be passed in. */
7256
7257 static rtx
7258 function_arg_vector_value (int size, int regno)
7259 {
7260 const int nregs = MAX (1, size / 8);
7261 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7262
7263 if (size < 8)
7264 XVECEXP (regs, 0, 0)
7265 = gen_rtx_EXPR_LIST (VOIDmode,
7266 gen_rtx_REG (SImode, regno),
7267 const0_rtx);
7268 else
7269 for (int i = 0; i < nregs; i++)
7270 XVECEXP (regs, 0, i)
7271 = gen_rtx_EXPR_LIST (VOIDmode,
7272 gen_rtx_REG (DImode, regno + 2*i),
7273 GEN_INT (i*8));
7274
7275 return regs;
7276 }
7277
7278 /* Determine where to put an argument to a function.
7279 Value is zero to push the argument on the stack,
7280 or a hard register in which to store the argument.
7281
7282 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7283 the preceding args and about the function being called.
7284 MODE is the argument's machine mode.
7285 TYPE is the data type of the argument (as a tree).
7286 This is null for libcalls where that information may
7287 not be available.
7288 NAMED is true if this argument is a named parameter
7289 (otherwise it is an extra parameter matching an ellipsis).
7290 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7291 TARGET_FUNCTION_INCOMING_ARG. */
7292
7293 static rtx
7294 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7295 const_tree type, bool named, bool incoming)
7296 {
7297 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7298
7299 int regbase = (incoming
7300 ? SPARC_INCOMING_INT_ARG_FIRST
7301 : SPARC_OUTGOING_INT_ARG_FIRST);
7302 int slotno, regno, padding;
7303 enum mode_class mclass = GET_MODE_CLASS (mode);
7304
7305 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7306 &regno, &padding);
7307 if (slotno == -1)
7308 return 0;
7309
7310 /* Vector types deserve special treatment because they are polymorphic wrt
7311 their mode, depending upon whether VIS instructions are enabled. */
7312 if (type && TREE_CODE (type) == VECTOR_TYPE)
7313 {
7314 HOST_WIDE_INT size = int_size_in_bytes (type);
7315 gcc_assert ((TARGET_ARCH32 && size <= 8)
7316 || (TARGET_ARCH64 && size <= 16));
7317
7318 if (mode == BLKmode)
7319 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7320
7321 mclass = MODE_FLOAT;
7322 }
7323
7324 if (TARGET_ARCH32)
7325 return gen_rtx_REG (mode, regno);
7326
7327 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7328 and are promoted to registers if possible. */
7329 if (type && TREE_CODE (type) == RECORD_TYPE)
7330 {
7331 HOST_WIDE_INT size = int_size_in_bytes (type);
7332 gcc_assert (size <= 16);
7333
7334 return function_arg_record_value (type, mode, slotno, named, regbase);
7335 }
7336
7337 /* Unions up to 16 bytes in size are passed in integer registers. */
7338 else if (type && TREE_CODE (type) == UNION_TYPE)
7339 {
7340 HOST_WIDE_INT size = int_size_in_bytes (type);
7341 gcc_assert (size <= 16);
7342
7343 return function_arg_union_value (size, mode, slotno, regno);
7344 }
7345
7346 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7347 but also have the slot allocated for them.
7348 If no prototype is in scope fp values in register slots get passed
7349 in two places, either fp regs and int regs or fp regs and memory. */
7350 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7351 && SPARC_FP_REG_P (regno))
7352 {
7353 rtx reg = gen_rtx_REG (mode, regno);
7354 if (cum->prototype_p || cum->libcall_p)
7355 return reg;
7356 else
7357 {
7358 rtx v0, v1;
7359
7360 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7361 {
7362 int intreg;
7363
7364 /* On incoming, we don't need to know that the value
7365 is passed in %f0 and %i0, and it confuses other parts
7366 causing needless spillage even on the simplest cases. */
7367 if (incoming)
7368 return reg;
7369
7370 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7371 + (regno - SPARC_FP_ARG_FIRST) / 2);
7372
7373 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7374 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7375 const0_rtx);
7376 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7377 }
7378 else
7379 {
7380 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7381 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7382 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7383 }
7384 }
7385 }
7386
7387 /* All other aggregate types are passed in an integer register in a mode
7388 corresponding to the size of the type. */
7389 else if (type && AGGREGATE_TYPE_P (type))
7390 {
7391 HOST_WIDE_INT size = int_size_in_bytes (type);
7392 gcc_assert (size <= 16);
7393
7394 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7395 }
7396
7397 return gen_rtx_REG (mode, regno);
7398 }
7399
7400 /* Handle the TARGET_FUNCTION_ARG target hook. */
7401
7402 static rtx
7403 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7404 const_tree type, bool named)
7405 {
7406 return sparc_function_arg_1 (cum, mode, type, named, false);
7407 }
7408
7409 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7410
7411 static rtx
7412 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7413 const_tree type, bool named)
7414 {
7415 return sparc_function_arg_1 (cum, mode, type, named, true);
7416 }
7417
7418 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7419
7420 static unsigned int
7421 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7422 {
7423 return ((TARGET_ARCH64
7424 && (GET_MODE_ALIGNMENT (mode) == 128
7425 || (type && TYPE_ALIGN (type) == 128)))
7426 ? 128
7427 : PARM_BOUNDARY);
7428 }
7429
7430 /* For an arg passed partly in registers and partly in memory,
7431 this is the number of bytes of registers used.
7432 For args passed entirely in registers or entirely in memory, zero.
7433
7434 Any arg that starts in the first 6 regs but won't entirely fit in them
7435 needs partial registers on v8. On v9, structures with integer
7436 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7437 values that begin in the last fp reg [where "last fp reg" varies with the
7438 mode] will be split between that reg and memory. */
7439
7440 static int
7441 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7442 tree type, bool named)
7443 {
7444 int slotno, regno, padding;
7445
7446 /* We pass false for incoming here, it doesn't matter. */
7447 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7448 false, &regno, &padding);
7449
7450 if (slotno == -1)
7451 return 0;
7452
7453 if (TARGET_ARCH32)
7454 {
7455 if ((slotno + (mode == BLKmode
7456 ? CEIL_NWORDS (int_size_in_bytes (type))
7457 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7458 > SPARC_INT_ARG_MAX)
7459 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7460 }
7461 else
7462 {
7463 /* We are guaranteed by pass_by_reference that the size of the
7464 argument is not greater than 16 bytes, so we only need to return
7465 one word if the argument is partially passed in registers. */
7466
7467 if (type && AGGREGATE_TYPE_P (type))
7468 {
7469 int size = int_size_in_bytes (type);
7470
7471 if (size > UNITS_PER_WORD
7472 && (slotno == SPARC_INT_ARG_MAX - 1
7473 || slotno == SPARC_FP_ARG_MAX - 1))
7474 return UNITS_PER_WORD;
7475 }
7476 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7477 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7478 && ! (TARGET_FPU && named)))
7479 {
7480 /* The complex types are passed as packed types. */
7481 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7482 && slotno == SPARC_INT_ARG_MAX - 1)
7483 return UNITS_PER_WORD;
7484 }
7485 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7486 {
7487 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7488 > SPARC_FP_ARG_MAX)
7489 return UNITS_PER_WORD;
7490 }
7491 }
7492
7493 return 0;
7494 }
7495
7496 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7497 Specify whether to pass the argument by reference. */
7498
7499 static bool
7500 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7501 machine_mode mode, const_tree type,
7502 bool named ATTRIBUTE_UNUSED)
7503 {
7504 if (TARGET_ARCH32)
7505 /* Original SPARC 32-bit ABI says that structures and unions,
7506 and quad-precision floats are passed by reference. For Pascal,
7507 also pass arrays by reference. All other base types are passed
7508 in registers.
7509
7510 Extended ABI (as implemented by the Sun compiler) says that all
7511 complex floats are passed by reference. Pass complex integers
7512 in registers up to 8 bytes. More generally, enforce the 2-word
7513 cap for passing arguments in registers.
7514
7515 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7516 integers are passed like floats of the same size, that is in
7517 registers up to 8 bytes. Pass all vector floats by reference
7518 like structure and unions. */
7519 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7520 || mode == SCmode
7521 /* Catch CDImode, TFmode, DCmode and TCmode. */
7522 || GET_MODE_SIZE (mode) > 8
7523 || (type
7524 && TREE_CODE (type) == VECTOR_TYPE
7525 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7526 else
7527 /* Original SPARC 64-bit ABI says that structures and unions
7528 smaller than 16 bytes are passed in registers, as well as
7529 all other base types.
7530
7531 Extended ABI (as implemented by the Sun compiler) says that
7532 complex floats are passed in registers up to 16 bytes. Pass
7533 all complex integers in registers up to 16 bytes. More generally,
7534 enforce the 2-word cap for passing arguments in registers.
7535
7536 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7537 integers are passed like floats of the same size, that is in
7538 registers (up to 16 bytes). Pass all vector floats like structure
7539 and unions. */
7540 return ((type
7541 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7542 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7543 /* Catch CTImode and TCmode. */
7544 || GET_MODE_SIZE (mode) > 16);
7545 }
7546
7547 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7548 Update the data in CUM to advance over an argument
7549 of mode MODE and data type TYPE.
7550 TYPE is null for libcalls where that information may not be available. */
7551
7552 static void
7553 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7554 const_tree type, bool named)
7555 {
7556 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7557 int regno, padding;
7558
7559 /* We pass false for incoming here, it doesn't matter. */
7560 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7561
7562 /* If argument requires leading padding, add it. */
7563 cum->words += padding;
7564
7565 if (TARGET_ARCH32)
7566 cum->words += (mode == BLKmode
7567 ? CEIL_NWORDS (int_size_in_bytes (type))
7568 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7569 else
7570 {
7571 if (type && AGGREGATE_TYPE_P (type))
7572 {
7573 int size = int_size_in_bytes (type);
7574
7575 if (size <= 8)
7576 ++cum->words;
7577 else if (size <= 16)
7578 cum->words += 2;
7579 else /* passed by reference */
7580 ++cum->words;
7581 }
7582 else
7583 cum->words += (mode == BLKmode
7584 ? CEIL_NWORDS (int_size_in_bytes (type))
7585 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7586 }
7587 }
7588
7589 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7590 are always stored left shifted in their argument slot. */
7591
7592 static pad_direction
7593 sparc_function_arg_padding (machine_mode mode, const_tree type)
7594 {
7595 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7596 return PAD_UPWARD;
7597
7598 /* Fall back to the default. */
7599 return default_function_arg_padding (mode, type);
7600 }
7601
7602 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7603 Specify whether to return the return value in memory. */
7604
7605 static bool
7606 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7607 {
7608 if (TARGET_ARCH32)
7609 /* Original SPARC 32-bit ABI says that structures and unions,
7610 and quad-precision floats are returned in memory. All other
7611 base types are returned in registers.
7612
7613 Extended ABI (as implemented by the Sun compiler) says that
7614 all complex floats are returned in registers (8 FP registers
7615 at most for '_Complex long double'). Return all complex integers
7616 in registers (4 at most for '_Complex long long').
7617
7618 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7619 integers are returned like floats of the same size, that is in
7620 registers up to 8 bytes and in memory otherwise. Return all
7621 vector floats in memory like structure and unions; note that
7622 they always have BLKmode like the latter. */
7623 return (TYPE_MODE (type) == BLKmode
7624 || TYPE_MODE (type) == TFmode
7625 || (TREE_CODE (type) == VECTOR_TYPE
7626 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7627 else
7628 /* Original SPARC 64-bit ABI says that structures and unions
7629 smaller than 32 bytes are returned in registers, as well as
7630 all other base types.
7631
7632 Extended ABI (as implemented by the Sun compiler) says that all
7633 complex floats are returned in registers (8 FP registers at most
7634 for '_Complex long double'). Return all complex integers in
7635 registers (4 at most for '_Complex TItype').
7636
7637 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7638 integers are returned like floats of the same size, that is in
7639 registers. Return all vector floats like structure and unions;
7640 note that they always have BLKmode like the latter. */
7641 return (TYPE_MODE (type) == BLKmode
7642 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7643 }
7644
7645 /* Handle the TARGET_STRUCT_VALUE target hook.
7646 Return where to find the structure return value address. */
7647
7648 static rtx
7649 sparc_struct_value_rtx (tree fndecl, int incoming)
7650 {
7651 if (TARGET_ARCH64)
7652 return 0;
7653 else
7654 {
7655 rtx mem;
7656
7657 if (incoming)
7658 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7659 STRUCT_VALUE_OFFSET));
7660 else
7661 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7662 STRUCT_VALUE_OFFSET));
7663
7664 /* Only follow the SPARC ABI for fixed-size structure returns.
7665 Variable size structure returns are handled per the normal
7666 procedures in GCC. This is enabled by -mstd-struct-return */
7667 if (incoming == 2
7668 && sparc_std_struct_return
7669 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7670 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7671 {
7672 /* We must check and adjust the return address, as it is optional
7673 as to whether the return object is really provided. */
7674 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7675 rtx scratch = gen_reg_rtx (SImode);
7676 rtx_code_label *endlab = gen_label_rtx ();
7677
7678 /* Calculate the return object size. */
7679 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7680 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7681 /* Construct a temporary return value. */
7682 rtx temp_val
7683 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7684
7685 /* Implement SPARC 32-bit psABI callee return struct checking:
7686
7687 Fetch the instruction where we will return to and see if
7688 it's an unimp instruction (the most significant 10 bits
7689 will be zero). */
7690 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7691 plus_constant (Pmode,
7692 ret_reg, 8)));
7693 /* Assume the size is valid and pre-adjust. */
7694 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7695 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7696 0, endlab);
7697 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7698 /* Write the address of the memory pointed to by temp_val into
7699 the memory pointed to by mem. */
7700 emit_move_insn (mem, XEXP (temp_val, 0));
7701 emit_label (endlab);
7702 }
7703
7704 return mem;
7705 }
7706 }
7707
7708 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7709 For v9, function return values are subject to the same rules as arguments,
7710 except that up to 32 bytes may be returned in registers. */
7711
7712 static rtx
7713 sparc_function_value_1 (const_tree type, machine_mode mode,
7714 bool outgoing)
7715 {
7716 /* Beware that the two values are swapped here wrt function_arg. */
7717 int regbase = (outgoing
7718 ? SPARC_INCOMING_INT_ARG_FIRST
7719 : SPARC_OUTGOING_INT_ARG_FIRST);
7720 enum mode_class mclass = GET_MODE_CLASS (mode);
7721 int regno;
7722
7723 /* Vector types deserve special treatment because they are polymorphic wrt
7724 their mode, depending upon whether VIS instructions are enabled. */
7725 if (type && TREE_CODE (type) == VECTOR_TYPE)
7726 {
7727 HOST_WIDE_INT size = int_size_in_bytes (type);
7728 gcc_assert ((TARGET_ARCH32 && size <= 8)
7729 || (TARGET_ARCH64 && size <= 32));
7730
7731 if (mode == BLKmode)
7732 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7733
7734 mclass = MODE_FLOAT;
7735 }
7736
7737 if (TARGET_ARCH64 && type)
7738 {
7739 /* Structures up to 32 bytes in size are returned in registers. */
7740 if (TREE_CODE (type) == RECORD_TYPE)
7741 {
7742 HOST_WIDE_INT size = int_size_in_bytes (type);
7743 gcc_assert (size <= 32);
7744
7745 return function_arg_record_value (type, mode, 0, 1, regbase);
7746 }
7747
7748 /* Unions up to 32 bytes in size are returned in integer registers. */
7749 else if (TREE_CODE (type) == UNION_TYPE)
7750 {
7751 HOST_WIDE_INT size = int_size_in_bytes (type);
7752 gcc_assert (size <= 32);
7753
7754 return function_arg_union_value (size, mode, 0, regbase);
7755 }
7756
7757 /* Objects that require it are returned in FP registers. */
7758 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7759 ;
7760
7761 /* All other aggregate types are returned in an integer register in a
7762 mode corresponding to the size of the type. */
7763 else if (AGGREGATE_TYPE_P (type))
7764 {
7765 /* All other aggregate types are passed in an integer register
7766 in a mode corresponding to the size of the type. */
7767 HOST_WIDE_INT size = int_size_in_bytes (type);
7768 gcc_assert (size <= 32);
7769
7770 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7771
7772 /* ??? We probably should have made the same ABI change in
7773 3.4.0 as the one we made for unions. The latter was
7774 required by the SCD though, while the former is not
7775 specified, so we favored compatibility and efficiency.
7776
7777 Now we're stuck for aggregates larger than 16 bytes,
7778 because OImode vanished in the meantime. Let's not
7779 try to be unduly clever, and simply follow the ABI
7780 for unions in that case. */
7781 if (mode == BLKmode)
7782 return function_arg_union_value (size, mode, 0, regbase);
7783 else
7784 mclass = MODE_INT;
7785 }
7786
7787 /* We should only have pointer and integer types at this point. This
7788 must match sparc_promote_function_mode. */
7789 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7790 mode = word_mode;
7791 }
7792
7793 /* We should only have pointer and integer types at this point, except with
7794 -freg-struct-return. This must match sparc_promote_function_mode. */
7795 else if (TARGET_ARCH32
7796 && !(type && AGGREGATE_TYPE_P (type))
7797 && mclass == MODE_INT
7798 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7799 mode = word_mode;
7800
7801 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7802 regno = SPARC_FP_ARG_FIRST;
7803 else
7804 regno = regbase;
7805
7806 return gen_rtx_REG (mode, regno);
7807 }
7808
7809 /* Handle TARGET_FUNCTION_VALUE.
7810 On the SPARC, the value is found in the first "output" register, but the
7811 called function leaves it in the first "input" register. */
7812
7813 static rtx
7814 sparc_function_value (const_tree valtype,
7815 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7816 bool outgoing)
7817 {
7818 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7819 }
7820
7821 /* Handle TARGET_LIBCALL_VALUE. */
7822
7823 static rtx
7824 sparc_libcall_value (machine_mode mode,
7825 const_rtx fun ATTRIBUTE_UNUSED)
7826 {
7827 return sparc_function_value_1 (NULL_TREE, mode, false);
7828 }
7829
7830 /* Handle FUNCTION_VALUE_REGNO_P.
7831 On the SPARC, the first "output" reg is used for integer values, and the
7832 first floating point register is used for floating point values. */
7833
7834 static bool
7835 sparc_function_value_regno_p (const unsigned int regno)
7836 {
7837 return (regno == 8 || (TARGET_FPU && regno == 32));
7838 }
7839
7840 /* Do what is necessary for `va_start'. We look at the current function
7841 to determine if stdarg or varargs is used and return the address of
7842 the first unnamed parameter. */
7843
7844 static rtx
7845 sparc_builtin_saveregs (void)
7846 {
7847 int first_reg = crtl->args.info.words;
7848 rtx address;
7849 int regno;
7850
7851 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7852 emit_move_insn (gen_rtx_MEM (word_mode,
7853 gen_rtx_PLUS (Pmode,
7854 frame_pointer_rtx,
7855 GEN_INT (FIRST_PARM_OFFSET (0)
7856 + (UNITS_PER_WORD
7857 * regno)))),
7858 gen_rtx_REG (word_mode,
7859 SPARC_INCOMING_INT_ARG_FIRST + regno));
7860
7861 address = gen_rtx_PLUS (Pmode,
7862 frame_pointer_rtx,
7863 GEN_INT (FIRST_PARM_OFFSET (0)
7864 + UNITS_PER_WORD * first_reg));
7865
7866 return address;
7867 }
7868
7869 /* Implement `va_start' for stdarg. */
7870
7871 static void
7872 sparc_va_start (tree valist, rtx nextarg)
7873 {
7874 nextarg = expand_builtin_saveregs ();
7875 std_expand_builtin_va_start (valist, nextarg);
7876 }
7877
7878 /* Implement `va_arg' for stdarg. */
7879
7880 static tree
7881 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7882 gimple_seq *post_p)
7883 {
7884 HOST_WIDE_INT size, rsize, align;
7885 tree addr, incr;
7886 bool indirect;
7887 tree ptrtype = build_pointer_type (type);
7888
7889 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7890 {
7891 indirect = true;
7892 size = rsize = UNITS_PER_WORD;
7893 align = 0;
7894 }
7895 else
7896 {
7897 indirect = false;
7898 size = int_size_in_bytes (type);
7899 rsize = ROUND_UP (size, UNITS_PER_WORD);
7900 align = 0;
7901
7902 if (TARGET_ARCH64)
7903 {
7904 /* For SPARC64, objects requiring 16-byte alignment get it. */
7905 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7906 align = 2 * UNITS_PER_WORD;
7907
7908 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7909 are left-justified in their slots. */
7910 if (AGGREGATE_TYPE_P (type))
7911 {
7912 if (size == 0)
7913 size = rsize = UNITS_PER_WORD;
7914 else
7915 size = rsize;
7916 }
7917 }
7918 }
7919
7920 incr = valist;
7921 if (align)
7922 {
7923 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7924 incr = fold_convert (sizetype, incr);
7925 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7926 size_int (-align));
7927 incr = fold_convert (ptr_type_node, incr);
7928 }
7929
7930 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7931 addr = incr;
7932
7933 if (BYTES_BIG_ENDIAN && size < rsize)
7934 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7935
7936 if (indirect)
7937 {
7938 addr = fold_convert (build_pointer_type (ptrtype), addr);
7939 addr = build_va_arg_indirect_ref (addr);
7940 }
7941
7942 /* If the address isn't aligned properly for the type, we need a temporary.
7943 FIXME: This is inefficient, usually we can do this in registers. */
7944 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7945 {
7946 tree tmp = create_tmp_var (type, "va_arg_tmp");
7947 tree dest_addr = build_fold_addr_expr (tmp);
7948 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7949 3, dest_addr, addr, size_int (rsize));
7950 TREE_ADDRESSABLE (tmp) = 1;
7951 gimplify_and_add (copy, pre_p);
7952 addr = dest_addr;
7953 }
7954
7955 else
7956 addr = fold_convert (ptrtype, addr);
7957
7958 incr = fold_build_pointer_plus_hwi (incr, rsize);
7959 gimplify_assign (valist, incr, post_p);
7960
7961 return build_va_arg_indirect_ref (addr);
7962 }
7963 \f
7964 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7965 Specify whether the vector mode is supported by the hardware. */
7966
7967 static bool
7968 sparc_vector_mode_supported_p (machine_mode mode)
7969 {
7970 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7971 }
7972 \f
7973 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7974
7975 static machine_mode
7976 sparc_preferred_simd_mode (scalar_mode mode)
7977 {
7978 if (TARGET_VIS)
7979 switch (mode)
7980 {
7981 case E_SImode:
7982 return V2SImode;
7983 case E_HImode:
7984 return V4HImode;
7985 case E_QImode:
7986 return V8QImode;
7987
7988 default:;
7989 }
7990
7991 return word_mode;
7992 }
7993 \f
7994 /* Return the string to output an unconditional branch to LABEL, which is
7995 the operand number of the label.
7996
7997 DEST is the destination insn (i.e. the label), INSN is the source. */
7998
7999 const char *
8000 output_ubranch (rtx dest, rtx_insn *insn)
8001 {
8002 static char string[64];
8003 bool v9_form = false;
8004 int delta;
8005 char *p;
8006
8007 /* Even if we are trying to use cbcond for this, evaluate
8008 whether we can use V9 branches as our backup plan. */
8009
8010 delta = 5000000;
8011 if (INSN_ADDRESSES_SET_P ())
8012 delta = (INSN_ADDRESSES (INSN_UID (dest))
8013 - INSN_ADDRESSES (INSN_UID (insn)));
8014
8015 /* Leave some instructions for "slop". */
8016 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8017 v9_form = true;
8018
8019 if (TARGET_CBCOND)
8020 {
8021 bool emit_nop = emit_cbcond_nop (insn);
8022 bool far = false;
8023 const char *rval;
8024
8025 if (delta < -500 || delta > 500)
8026 far = true;
8027
8028 if (far)
8029 {
8030 if (v9_form)
8031 rval = "ba,a,pt\t%%xcc, %l0";
8032 else
8033 rval = "b,a\t%l0";
8034 }
8035 else
8036 {
8037 if (emit_nop)
8038 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8039 else
8040 rval = "cwbe\t%%g0, %%g0, %l0";
8041 }
8042 return rval;
8043 }
8044
8045 if (v9_form)
8046 strcpy (string, "ba%*,pt\t%%xcc, ");
8047 else
8048 strcpy (string, "b%*\t");
8049
8050 p = strchr (string, '\0');
8051 *p++ = '%';
8052 *p++ = 'l';
8053 *p++ = '0';
8054 *p++ = '%';
8055 *p++ = '(';
8056 *p = '\0';
8057
8058 return string;
8059 }
8060
8061 /* Return the string to output a conditional branch to LABEL, which is
8062 the operand number of the label. OP is the conditional expression.
8063 XEXP (OP, 0) is assumed to be a condition code register (integer or
8064 floating point) and its mode specifies what kind of comparison we made.
8065
8066 DEST is the destination insn (i.e. the label), INSN is the source.
8067
8068 REVERSED is nonzero if we should reverse the sense of the comparison.
8069
8070 ANNUL is nonzero if we should generate an annulling branch. */
8071
8072 const char *
8073 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8074 rtx_insn *insn)
8075 {
8076 static char string[64];
8077 enum rtx_code code = GET_CODE (op);
8078 rtx cc_reg = XEXP (op, 0);
8079 machine_mode mode = GET_MODE (cc_reg);
8080 const char *labelno, *branch;
8081 int spaces = 8, far;
8082 char *p;
8083
8084 /* v9 branches are limited to +-1MB. If it is too far away,
8085 change
8086
8087 bne,pt %xcc, .LC30
8088
8089 to
8090
8091 be,pn %xcc, .+12
8092 nop
8093 ba .LC30
8094
8095 and
8096
8097 fbne,a,pn %fcc2, .LC29
8098
8099 to
8100
8101 fbe,pt %fcc2, .+16
8102 nop
8103 ba .LC29 */
8104
8105 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8106 if (reversed ^ far)
8107 {
8108 /* Reversal of FP compares takes care -- an ordered compare
8109 becomes an unordered compare and vice versa. */
8110 if (mode == CCFPmode || mode == CCFPEmode)
8111 code = reverse_condition_maybe_unordered (code);
8112 else
8113 code = reverse_condition (code);
8114 }
8115
8116 /* Start by writing the branch condition. */
8117 if (mode == CCFPmode || mode == CCFPEmode)
8118 {
8119 switch (code)
8120 {
8121 case NE:
8122 branch = "fbne";
8123 break;
8124 case EQ:
8125 branch = "fbe";
8126 break;
8127 case GE:
8128 branch = "fbge";
8129 break;
8130 case GT:
8131 branch = "fbg";
8132 break;
8133 case LE:
8134 branch = "fble";
8135 break;
8136 case LT:
8137 branch = "fbl";
8138 break;
8139 case UNORDERED:
8140 branch = "fbu";
8141 break;
8142 case ORDERED:
8143 branch = "fbo";
8144 break;
8145 case UNGT:
8146 branch = "fbug";
8147 break;
8148 case UNLT:
8149 branch = "fbul";
8150 break;
8151 case UNEQ:
8152 branch = "fbue";
8153 break;
8154 case UNGE:
8155 branch = "fbuge";
8156 break;
8157 case UNLE:
8158 branch = "fbule";
8159 break;
8160 case LTGT:
8161 branch = "fblg";
8162 break;
8163 default:
8164 gcc_unreachable ();
8165 }
8166
8167 /* ??? !v9: FP branches cannot be preceded by another floating point
8168 insn. Because there is currently no concept of pre-delay slots,
8169 we can fix this only by always emitting a nop before a floating
8170 point branch. */
8171
8172 string[0] = '\0';
8173 if (! TARGET_V9)
8174 strcpy (string, "nop\n\t");
8175 strcat (string, branch);
8176 }
8177 else
8178 {
8179 switch (code)
8180 {
8181 case NE:
8182 if (mode == CCVmode || mode == CCXVmode)
8183 branch = "bvs";
8184 else
8185 branch = "bne";
8186 break;
8187 case EQ:
8188 if (mode == CCVmode || mode == CCXVmode)
8189 branch = "bvc";
8190 else
8191 branch = "be";
8192 break;
8193 case GE:
8194 if (mode == CCNZmode || mode == CCXNZmode)
8195 branch = "bpos";
8196 else
8197 branch = "bge";
8198 break;
8199 case GT:
8200 branch = "bg";
8201 break;
8202 case LE:
8203 branch = "ble";
8204 break;
8205 case LT:
8206 if (mode == CCNZmode || mode == CCXNZmode)
8207 branch = "bneg";
8208 else
8209 branch = "bl";
8210 break;
8211 case GEU:
8212 branch = "bgeu";
8213 break;
8214 case GTU:
8215 branch = "bgu";
8216 break;
8217 case LEU:
8218 branch = "bleu";
8219 break;
8220 case LTU:
8221 branch = "blu";
8222 break;
8223 default:
8224 gcc_unreachable ();
8225 }
8226 strcpy (string, branch);
8227 }
8228 spaces -= strlen (branch);
8229 p = strchr (string, '\0');
8230
8231 /* Now add the annulling, the label, and a possible noop. */
8232 if (annul && ! far)
8233 {
8234 strcpy (p, ",a");
8235 p += 2;
8236 spaces -= 2;
8237 }
8238
8239 if (TARGET_V9)
8240 {
8241 rtx note;
8242 int v8 = 0;
8243
8244 if (! far && insn && INSN_ADDRESSES_SET_P ())
8245 {
8246 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8247 - INSN_ADDRESSES (INSN_UID (insn)));
8248 /* Leave some instructions for "slop". */
8249 if (delta < -260000 || delta >= 260000)
8250 v8 = 1;
8251 }
8252
8253 switch (mode)
8254 {
8255 case E_CCmode:
8256 case E_CCNZmode:
8257 case E_CCCmode:
8258 case E_CCVmode:
8259 labelno = "%%icc, ";
8260 if (v8)
8261 labelno = "";
8262 break;
8263 case E_CCXmode:
8264 case E_CCXNZmode:
8265 case E_CCXCmode:
8266 case E_CCXVmode:
8267 labelno = "%%xcc, ";
8268 gcc_assert (!v8);
8269 break;
8270 case E_CCFPmode:
8271 case E_CCFPEmode:
8272 {
8273 static char v9_fcc_labelno[] = "%%fccX, ";
8274 /* Set the char indicating the number of the fcc reg to use. */
8275 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8276 labelno = v9_fcc_labelno;
8277 if (v8)
8278 {
8279 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8280 labelno = "";
8281 }
8282 }
8283 break;
8284 default:
8285 gcc_unreachable ();
8286 }
8287
8288 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8289 {
8290 strcpy (p,
8291 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8292 >= profile_probability::even ()) ^ far)
8293 ? ",pt" : ",pn");
8294 p += 3;
8295 spaces -= 3;
8296 }
8297 }
8298 else
8299 labelno = "";
8300
8301 if (spaces > 0)
8302 *p++ = '\t';
8303 else
8304 *p++ = ' ';
8305 strcpy (p, labelno);
8306 p = strchr (p, '\0');
8307 if (far)
8308 {
8309 strcpy (p, ".+12\n\t nop\n\tb\t");
8310 /* Skip the next insn if requested or
8311 if we know that it will be a nop. */
8312 if (annul || ! final_sequence)
8313 p[3] = '6';
8314 p += 14;
8315 }
8316 *p++ = '%';
8317 *p++ = 'l';
8318 *p++ = label + '0';
8319 *p++ = '%';
8320 *p++ = '#';
8321 *p = '\0';
8322
8323 return string;
8324 }
8325
8326 /* Emit a library call comparison between floating point X and Y.
8327 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8328 Return the new operator to be used in the comparison sequence.
8329
8330 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8331 values as arguments instead of the TFmode registers themselves,
8332 that's why we cannot call emit_float_lib_cmp. */
8333
8334 rtx
8335 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8336 {
8337 const char *qpfunc;
8338 rtx slot0, slot1, result, tem, tem2, libfunc;
8339 machine_mode mode;
8340 enum rtx_code new_comparison;
8341
8342 switch (comparison)
8343 {
8344 case EQ:
8345 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8346 break;
8347
8348 case NE:
8349 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8350 break;
8351
8352 case GT:
8353 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8354 break;
8355
8356 case GE:
8357 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8358 break;
8359
8360 case LT:
8361 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8362 break;
8363
8364 case LE:
8365 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8366 break;
8367
8368 case ORDERED:
8369 case UNORDERED:
8370 case UNGT:
8371 case UNLT:
8372 case UNEQ:
8373 case UNGE:
8374 case UNLE:
8375 case LTGT:
8376 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8377 break;
8378
8379 default:
8380 gcc_unreachable ();
8381 }
8382
8383 if (TARGET_ARCH64)
8384 {
8385 if (MEM_P (x))
8386 {
8387 tree expr = MEM_EXPR (x);
8388 if (expr)
8389 mark_addressable (expr);
8390 slot0 = x;
8391 }
8392 else
8393 {
8394 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8395 emit_move_insn (slot0, x);
8396 }
8397
8398 if (MEM_P (y))
8399 {
8400 tree expr = MEM_EXPR (y);
8401 if (expr)
8402 mark_addressable (expr);
8403 slot1 = y;
8404 }
8405 else
8406 {
8407 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8408 emit_move_insn (slot1, y);
8409 }
8410
8411 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8412 emit_library_call (libfunc, LCT_NORMAL,
8413 DImode,
8414 XEXP (slot0, 0), Pmode,
8415 XEXP (slot1, 0), Pmode);
8416 mode = DImode;
8417 }
8418 else
8419 {
8420 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8421 emit_library_call (libfunc, LCT_NORMAL,
8422 SImode,
8423 x, TFmode, y, TFmode);
8424 mode = SImode;
8425 }
8426
8427
8428 /* Immediately move the result of the libcall into a pseudo
8429 register so reload doesn't clobber the value if it needs
8430 the return register for a spill reg. */
8431 result = gen_reg_rtx (mode);
8432 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8433
8434 switch (comparison)
8435 {
8436 default:
8437 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8438 case ORDERED:
8439 case UNORDERED:
8440 new_comparison = (comparison == UNORDERED ? EQ : NE);
8441 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8442 case UNGT:
8443 case UNGE:
8444 new_comparison = (comparison == UNGT ? GT : NE);
8445 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8446 case UNLE:
8447 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8448 case UNLT:
8449 tem = gen_reg_rtx (mode);
8450 if (TARGET_ARCH32)
8451 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8452 else
8453 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8454 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8455 case UNEQ:
8456 case LTGT:
8457 tem = gen_reg_rtx (mode);
8458 if (TARGET_ARCH32)
8459 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8460 else
8461 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8462 tem2 = gen_reg_rtx (mode);
8463 if (TARGET_ARCH32)
8464 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8465 else
8466 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8467 new_comparison = (comparison == UNEQ ? EQ : NE);
8468 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8469 }
8470
8471 gcc_unreachable ();
8472 }
8473
8474 /* Generate an unsigned DImode to FP conversion. This is the same code
8475 optabs would emit if we didn't have TFmode patterns. */
8476
8477 void
8478 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8479 {
8480 rtx i0, i1, f0, in, out;
8481
8482 out = operands[0];
8483 in = force_reg (DImode, operands[1]);
8484 rtx_code_label *neglab = gen_label_rtx ();
8485 rtx_code_label *donelab = gen_label_rtx ();
8486 i0 = gen_reg_rtx (DImode);
8487 i1 = gen_reg_rtx (DImode);
8488 f0 = gen_reg_rtx (mode);
8489
8490 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8491
8492 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8493 emit_jump_insn (gen_jump (donelab));
8494 emit_barrier ();
8495
8496 emit_label (neglab);
8497
8498 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8499 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8500 emit_insn (gen_iordi3 (i0, i0, i1));
8501 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8502 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8503
8504 emit_label (donelab);
8505 }
8506
8507 /* Generate an FP to unsigned DImode conversion. This is the same code
8508 optabs would emit if we didn't have TFmode patterns. */
8509
8510 void
8511 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8512 {
8513 rtx i0, i1, f0, in, out, limit;
8514
8515 out = operands[0];
8516 in = force_reg (mode, operands[1]);
8517 rtx_code_label *neglab = gen_label_rtx ();
8518 rtx_code_label *donelab = gen_label_rtx ();
8519 i0 = gen_reg_rtx (DImode);
8520 i1 = gen_reg_rtx (DImode);
8521 limit = gen_reg_rtx (mode);
8522 f0 = gen_reg_rtx (mode);
8523
8524 emit_move_insn (limit,
8525 const_double_from_real_value (
8526 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8527 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8528
8529 emit_insn (gen_rtx_SET (out,
8530 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8531 emit_jump_insn (gen_jump (donelab));
8532 emit_barrier ();
8533
8534 emit_label (neglab);
8535
8536 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8537 emit_insn (gen_rtx_SET (i0,
8538 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8539 emit_insn (gen_movdi (i1, const1_rtx));
8540 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8541 emit_insn (gen_xordi3 (out, i0, i1));
8542
8543 emit_label (donelab);
8544 }
8545
8546 /* Return the string to output a compare and branch instruction to DEST.
8547 DEST is the destination insn (i.e. the label), INSN is the source,
8548 and OP is the conditional expression. */
8549
8550 const char *
8551 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8552 {
8553 machine_mode mode = GET_MODE (XEXP (op, 0));
8554 enum rtx_code code = GET_CODE (op);
8555 const char *cond_str, *tmpl;
8556 int far, emit_nop, len;
8557 static char string[64];
8558 char size_char;
8559
8560 /* Compare and Branch is limited to +-2KB. If it is too far away,
8561 change
8562
8563 cxbne X, Y, .LC30
8564
8565 to
8566
8567 cxbe X, Y, .+16
8568 nop
8569 ba,pt xcc, .LC30
8570 nop */
8571
8572 len = get_attr_length (insn);
8573
8574 far = len == 4;
8575 emit_nop = len == 2;
8576
8577 if (far)
8578 code = reverse_condition (code);
8579
8580 size_char = ((mode == SImode) ? 'w' : 'x');
8581
8582 switch (code)
8583 {
8584 case NE:
8585 cond_str = "ne";
8586 break;
8587
8588 case EQ:
8589 cond_str = "e";
8590 break;
8591
8592 case GE:
8593 cond_str = "ge";
8594 break;
8595
8596 case GT:
8597 cond_str = "g";
8598 break;
8599
8600 case LE:
8601 cond_str = "le";
8602 break;
8603
8604 case LT:
8605 cond_str = "l";
8606 break;
8607
8608 case GEU:
8609 cond_str = "cc";
8610 break;
8611
8612 case GTU:
8613 cond_str = "gu";
8614 break;
8615
8616 case LEU:
8617 cond_str = "leu";
8618 break;
8619
8620 case LTU:
8621 cond_str = "cs";
8622 break;
8623
8624 default:
8625 gcc_unreachable ();
8626 }
8627
8628 if (far)
8629 {
8630 int veryfar = 1, delta;
8631
8632 if (INSN_ADDRESSES_SET_P ())
8633 {
8634 delta = (INSN_ADDRESSES (INSN_UID (dest))
8635 - INSN_ADDRESSES (INSN_UID (insn)));
8636 /* Leave some instructions for "slop". */
8637 if (delta >= -260000 && delta < 260000)
8638 veryfar = 0;
8639 }
8640
8641 if (veryfar)
8642 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8643 else
8644 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8645 }
8646 else
8647 {
8648 if (emit_nop)
8649 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8650 else
8651 tmpl = "c%cb%s\t%%1, %%2, %%3";
8652 }
8653
8654 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8655
8656 return string;
8657 }
8658
8659 /* Return the string to output a conditional branch to LABEL, testing
8660 register REG. LABEL is the operand number of the label; REG is the
8661 operand number of the reg. OP is the conditional expression. The mode
8662 of REG says what kind of comparison we made.
8663
8664 DEST is the destination insn (i.e. the label), INSN is the source.
8665
8666 REVERSED is nonzero if we should reverse the sense of the comparison.
8667
8668 ANNUL is nonzero if we should generate an annulling branch. */
8669
8670 const char *
8671 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8672 int annul, rtx_insn *insn)
8673 {
8674 static char string[64];
8675 enum rtx_code code = GET_CODE (op);
8676 machine_mode mode = GET_MODE (XEXP (op, 0));
8677 rtx note;
8678 int far;
8679 char *p;
8680
8681 /* branch on register are limited to +-128KB. If it is too far away,
8682 change
8683
8684 brnz,pt %g1, .LC30
8685
8686 to
8687
8688 brz,pn %g1, .+12
8689 nop
8690 ba,pt %xcc, .LC30
8691
8692 and
8693
8694 brgez,a,pn %o1, .LC29
8695
8696 to
8697
8698 brlz,pt %o1, .+16
8699 nop
8700 ba,pt %xcc, .LC29 */
8701
8702 far = get_attr_length (insn) >= 3;
8703
8704 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8705 if (reversed ^ far)
8706 code = reverse_condition (code);
8707
8708 /* Only 64-bit versions of these instructions exist. */
8709 gcc_assert (mode == DImode);
8710
8711 /* Start by writing the branch condition. */
8712
8713 switch (code)
8714 {
8715 case NE:
8716 strcpy (string, "brnz");
8717 break;
8718
8719 case EQ:
8720 strcpy (string, "brz");
8721 break;
8722
8723 case GE:
8724 strcpy (string, "brgez");
8725 break;
8726
8727 case LT:
8728 strcpy (string, "brlz");
8729 break;
8730
8731 case LE:
8732 strcpy (string, "brlez");
8733 break;
8734
8735 case GT:
8736 strcpy (string, "brgz");
8737 break;
8738
8739 default:
8740 gcc_unreachable ();
8741 }
8742
8743 p = strchr (string, '\0');
8744
8745 /* Now add the annulling, reg, label, and nop. */
8746 if (annul && ! far)
8747 {
8748 strcpy (p, ",a");
8749 p += 2;
8750 }
8751
8752 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8753 {
8754 strcpy (p,
8755 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8756 >= profile_probability::even ()) ^ far)
8757 ? ",pt" : ",pn");
8758 p += 3;
8759 }
8760
8761 *p = p < string + 8 ? '\t' : ' ';
8762 p++;
8763 *p++ = '%';
8764 *p++ = '0' + reg;
8765 *p++ = ',';
8766 *p++ = ' ';
8767 if (far)
8768 {
8769 int veryfar = 1, delta;
8770
8771 if (INSN_ADDRESSES_SET_P ())
8772 {
8773 delta = (INSN_ADDRESSES (INSN_UID (dest))
8774 - INSN_ADDRESSES (INSN_UID (insn)));
8775 /* Leave some instructions for "slop". */
8776 if (delta >= -260000 && delta < 260000)
8777 veryfar = 0;
8778 }
8779
8780 strcpy (p, ".+12\n\t nop\n\t");
8781 /* Skip the next insn if requested or
8782 if we know that it will be a nop. */
8783 if (annul || ! final_sequence)
8784 p[3] = '6';
8785 p += 12;
8786 if (veryfar)
8787 {
8788 strcpy (p, "b\t");
8789 p += 2;
8790 }
8791 else
8792 {
8793 strcpy (p, "ba,pt\t%%xcc, ");
8794 p += 13;
8795 }
8796 }
8797 *p++ = '%';
8798 *p++ = 'l';
8799 *p++ = '0' + label;
8800 *p++ = '%';
8801 *p++ = '#';
8802 *p = '\0';
8803
8804 return string;
8805 }
8806
8807 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8808 Such instructions cannot be used in the delay slot of return insn on v9.
8809 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8810 */
8811
8812 static int
8813 epilogue_renumber (register rtx *where, int test)
8814 {
8815 register const char *fmt;
8816 register int i;
8817 register enum rtx_code code;
8818
8819 if (*where == 0)
8820 return 0;
8821
8822 code = GET_CODE (*where);
8823
8824 switch (code)
8825 {
8826 case REG:
8827 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8828 return 1;
8829 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8830 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8831 /* fallthrough */
8832 case SCRATCH:
8833 case CC0:
8834 case PC:
8835 case CONST_INT:
8836 case CONST_WIDE_INT:
8837 case CONST_DOUBLE:
8838 return 0;
8839
8840 /* Do not replace the frame pointer with the stack pointer because
8841 it can cause the delayed instruction to load below the stack.
8842 This occurs when instructions like:
8843
8844 (set (reg/i:SI 24 %i0)
8845 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8846 (const_int -20 [0xffffffec])) 0))
8847
8848 are in the return delayed slot. */
8849 case PLUS:
8850 if (GET_CODE (XEXP (*where, 0)) == REG
8851 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8852 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8853 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8854 return 1;
8855 break;
8856
8857 case MEM:
8858 if (SPARC_STACK_BIAS
8859 && GET_CODE (XEXP (*where, 0)) == REG
8860 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8861 return 1;
8862 break;
8863
8864 default:
8865 break;
8866 }
8867
8868 fmt = GET_RTX_FORMAT (code);
8869
8870 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8871 {
8872 if (fmt[i] == 'E')
8873 {
8874 register int j;
8875 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8876 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8877 return 1;
8878 }
8879 else if (fmt[i] == 'e'
8880 && epilogue_renumber (&(XEXP (*where, i)), test))
8881 return 1;
8882 }
8883 return 0;
8884 }
8885 \f
8886 /* Leaf functions and non-leaf functions have different needs. */
8887
8888 static const int
8889 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8890
8891 static const int
8892 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8893
8894 static const int *const reg_alloc_orders[] = {
8895 reg_leaf_alloc_order,
8896 reg_nonleaf_alloc_order};
8897
8898 void
8899 order_regs_for_local_alloc (void)
8900 {
8901 static int last_order_nonleaf = 1;
8902
8903 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8904 {
8905 last_order_nonleaf = !last_order_nonleaf;
8906 memcpy ((char *) reg_alloc_order,
8907 (const char *) reg_alloc_orders[last_order_nonleaf],
8908 FIRST_PSEUDO_REGISTER * sizeof (int));
8909 }
8910 }
8911 \f
8912 /* Return 1 if REG and MEM are legitimate enough to allow the various
8913 MEM<-->REG splits to be run. */
8914
8915 int
8916 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8917 {
8918 /* Punt if we are here by mistake. */
8919 gcc_assert (reload_completed);
8920
8921 /* We must have an offsettable memory reference. */
8922 if (!offsettable_memref_p (mem))
8923 return 0;
8924
8925 /* If we have legitimate args for ldd/std, we do not want
8926 the split to happen. */
8927 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8928 return 0;
8929
8930 /* Success. */
8931 return 1;
8932 }
8933
8934 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8935
8936 void
8937 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8938 {
8939 rtx high_part = gen_highpart (mode, dest);
8940 rtx low_part = gen_lowpart (mode, dest);
8941 rtx word0 = adjust_address (src, mode, 0);
8942 rtx word1 = adjust_address (src, mode, 4);
8943
8944 if (reg_overlap_mentioned_p (high_part, word1))
8945 {
8946 emit_move_insn_1 (low_part, word1);
8947 emit_move_insn_1 (high_part, word0);
8948 }
8949 else
8950 {
8951 emit_move_insn_1 (high_part, word0);
8952 emit_move_insn_1 (low_part, word1);
8953 }
8954 }
8955
8956 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8957
8958 void
8959 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8960 {
8961 rtx word0 = adjust_address (dest, mode, 0);
8962 rtx word1 = adjust_address (dest, mode, 4);
8963 rtx high_part = gen_highpart (mode, src);
8964 rtx low_part = gen_lowpart (mode, src);
8965
8966 emit_move_insn_1 (word0, high_part);
8967 emit_move_insn_1 (word1, low_part);
8968 }
8969
8970 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8971
8972 int
8973 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8974 {
8975 /* Punt if we are here by mistake. */
8976 gcc_assert (reload_completed);
8977
8978 if (GET_CODE (reg1) == SUBREG)
8979 reg1 = SUBREG_REG (reg1);
8980 if (GET_CODE (reg1) != REG)
8981 return 0;
8982 const int regno1 = REGNO (reg1);
8983
8984 if (GET_CODE (reg2) == SUBREG)
8985 reg2 = SUBREG_REG (reg2);
8986 if (GET_CODE (reg2) != REG)
8987 return 0;
8988 const int regno2 = REGNO (reg2);
8989
8990 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8991 return 1;
8992
8993 if (TARGET_VIS3)
8994 {
8995 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8996 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8997 return 1;
8998 }
8999
9000 return 0;
9001 }
9002
9003 /* Split a REG <--> REG move into a pair of moves in MODE. */
9004
9005 void
9006 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9007 {
9008 rtx dest1 = gen_highpart (mode, dest);
9009 rtx dest2 = gen_lowpart (mode, dest);
9010 rtx src1 = gen_highpart (mode, src);
9011 rtx src2 = gen_lowpart (mode, src);
9012
9013 /* Now emit using the real source and destination we found, swapping
9014 the order if we detect overlap. */
9015 if (reg_overlap_mentioned_p (dest1, src2))
9016 {
9017 emit_move_insn_1 (dest2, src2);
9018 emit_move_insn_1 (dest1, src1);
9019 }
9020 else
9021 {
9022 emit_move_insn_1 (dest1, src1);
9023 emit_move_insn_1 (dest2, src2);
9024 }
9025 }
9026
9027 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9028 This makes them candidates for using ldd and std insns.
9029
9030 Note reg1 and reg2 *must* be hard registers. */
9031
9032 int
9033 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9034 {
9035 /* We might have been passed a SUBREG. */
9036 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9037 return 0;
9038
9039 if (REGNO (reg1) % 2 != 0)
9040 return 0;
9041
9042 /* Integer ldd is deprecated in SPARC V9 */
9043 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9044 return 0;
9045
9046 return (REGNO (reg1) == REGNO (reg2) - 1);
9047 }
9048
9049 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9050 an ldd or std insn.
9051
9052 This can only happen when addr1 and addr2, the addresses in mem1
9053 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9054 addr1 must also be aligned on a 64-bit boundary.
9055
9056 Also iff dependent_reg_rtx is not null it should not be used to
9057 compute the address for mem1, i.e. we cannot optimize a sequence
9058 like:
9059 ld [%o0], %o0
9060 ld [%o0 + 4], %o1
9061 to
9062 ldd [%o0], %o0
9063 nor:
9064 ld [%g3 + 4], %g3
9065 ld [%g3], %g2
9066 to
9067 ldd [%g3], %g2
9068
9069 But, note that the transformation from:
9070 ld [%g2 + 4], %g3
9071 ld [%g2], %g2
9072 to
9073 ldd [%g2], %g2
9074 is perfectly fine. Thus, the peephole2 patterns always pass us
9075 the destination register of the first load, never the second one.
9076
9077 For stores we don't have a similar problem, so dependent_reg_rtx is
9078 NULL_RTX. */
9079
9080 int
9081 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9082 {
9083 rtx addr1, addr2;
9084 unsigned int reg1;
9085 HOST_WIDE_INT offset1;
9086
9087 /* The mems cannot be volatile. */
9088 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9089 return 0;
9090
9091 /* MEM1 should be aligned on a 64-bit boundary. */
9092 if (MEM_ALIGN (mem1) < 64)
9093 return 0;
9094
9095 addr1 = XEXP (mem1, 0);
9096 addr2 = XEXP (mem2, 0);
9097
9098 /* Extract a register number and offset (if used) from the first addr. */
9099 if (GET_CODE (addr1) == PLUS)
9100 {
9101 /* If not a REG, return zero. */
9102 if (GET_CODE (XEXP (addr1, 0)) != REG)
9103 return 0;
9104 else
9105 {
9106 reg1 = REGNO (XEXP (addr1, 0));
9107 /* The offset must be constant! */
9108 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9109 return 0;
9110 offset1 = INTVAL (XEXP (addr1, 1));
9111 }
9112 }
9113 else if (GET_CODE (addr1) != REG)
9114 return 0;
9115 else
9116 {
9117 reg1 = REGNO (addr1);
9118 /* This was a simple (mem (reg)) expression. Offset is 0. */
9119 offset1 = 0;
9120 }
9121
9122 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9123 if (GET_CODE (addr2) != PLUS)
9124 return 0;
9125
9126 if (GET_CODE (XEXP (addr2, 0)) != REG
9127 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9128 return 0;
9129
9130 if (reg1 != REGNO (XEXP (addr2, 0)))
9131 return 0;
9132
9133 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9134 return 0;
9135
9136 /* The first offset must be evenly divisible by 8 to ensure the
9137 address is 64-bit aligned. */
9138 if (offset1 % 8 != 0)
9139 return 0;
9140
9141 /* The offset for the second addr must be 4 more than the first addr. */
9142 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9143 return 0;
9144
9145 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9146 instructions. */
9147 return 1;
9148 }
9149
9150 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9151
9152 rtx
9153 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9154 {
9155 rtx x = widen_memory_access (mem1, mode, 0);
9156 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9157 return x;
9158 }
9159
9160 /* Return 1 if reg is a pseudo, or is the first register in
9161 a hard register pair. This makes it suitable for use in
9162 ldd and std insns. */
9163
9164 int
9165 register_ok_for_ldd (rtx reg)
9166 {
9167 /* We might have been passed a SUBREG. */
9168 if (!REG_P (reg))
9169 return 0;
9170
9171 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9172 return (REGNO (reg) % 2 == 0);
9173
9174 return 1;
9175 }
9176
9177 /* Return 1 if OP, a MEM, has an address which is known to be
9178 aligned to an 8-byte boundary. */
9179
9180 int
9181 memory_ok_for_ldd (rtx op)
9182 {
9183 /* In 64-bit mode, we assume that the address is word-aligned. */
9184 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9185 return 0;
9186
9187 if (! can_create_pseudo_p ()
9188 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9189 return 0;
9190
9191 return 1;
9192 }
9193 \f
9194 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9195
9196 static bool
9197 sparc_print_operand_punct_valid_p (unsigned char code)
9198 {
9199 if (code == '#'
9200 || code == '*'
9201 || code == '('
9202 || code == ')'
9203 || code == '_'
9204 || code == '&')
9205 return true;
9206
9207 return false;
9208 }
9209
9210 /* Implement TARGET_PRINT_OPERAND.
9211 Print operand X (an rtx) in assembler syntax to file FILE.
9212 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9213 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9214
9215 static void
9216 sparc_print_operand (FILE *file, rtx x, int code)
9217 {
9218 const char *s;
9219
9220 switch (code)
9221 {
9222 case '#':
9223 /* Output an insn in a delay slot. */
9224 if (final_sequence)
9225 sparc_indent_opcode = 1;
9226 else
9227 fputs ("\n\t nop", file);
9228 return;
9229 case '*':
9230 /* Output an annul flag if there's nothing for the delay slot and we
9231 are optimizing. This is always used with '(' below.
9232 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9233 this is a dbx bug. So, we only do this when optimizing.
9234 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9235 Always emit a nop in case the next instruction is a branch. */
9236 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9237 fputs (",a", file);
9238 return;
9239 case '(':
9240 /* Output a 'nop' if there's nothing for the delay slot and we are
9241 not optimizing. This is always used with '*' above. */
9242 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9243 fputs ("\n\t nop", file);
9244 else if (final_sequence)
9245 sparc_indent_opcode = 1;
9246 return;
9247 case ')':
9248 /* Output the right displacement from the saved PC on function return.
9249 The caller may have placed an "unimp" insn immediately after the call
9250 so we have to account for it. This insn is used in the 32-bit ABI
9251 when calling a function that returns a non zero-sized structure. The
9252 64-bit ABI doesn't have it. Be careful to have this test be the same
9253 as that for the call. The exception is when sparc_std_struct_return
9254 is enabled, the psABI is followed exactly and the adjustment is made
9255 by the code in sparc_struct_value_rtx. The call emitted is the same
9256 when sparc_std_struct_return is enabled. */
9257 if (!TARGET_ARCH64
9258 && cfun->returns_struct
9259 && !sparc_std_struct_return
9260 && DECL_SIZE (DECL_RESULT (current_function_decl))
9261 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9262 == INTEGER_CST
9263 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9264 fputs ("12", file);
9265 else
9266 fputc ('8', file);
9267 return;
9268 case '_':
9269 /* Output the Embedded Medium/Anywhere code model base register. */
9270 fputs (EMBMEDANY_BASE_REG, file);
9271 return;
9272 case '&':
9273 /* Print some local dynamic TLS name. */
9274 if (const char *name = get_some_local_dynamic_name ())
9275 assemble_name (file, name);
9276 else
9277 output_operand_lossage ("'%%&' used without any "
9278 "local dynamic TLS references");
9279 return;
9280
9281 case 'Y':
9282 /* Adjust the operand to take into account a RESTORE operation. */
9283 if (GET_CODE (x) == CONST_INT)
9284 break;
9285 else if (GET_CODE (x) != REG)
9286 output_operand_lossage ("invalid %%Y operand");
9287 else if (REGNO (x) < 8)
9288 fputs (reg_names[REGNO (x)], file);
9289 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9290 fputs (reg_names[REGNO (x)-16], file);
9291 else
9292 output_operand_lossage ("invalid %%Y operand");
9293 return;
9294 case 'L':
9295 /* Print out the low order register name of a register pair. */
9296 if (WORDS_BIG_ENDIAN)
9297 fputs (reg_names[REGNO (x)+1], file);
9298 else
9299 fputs (reg_names[REGNO (x)], file);
9300 return;
9301 case 'H':
9302 /* Print out the high order register name of a register pair. */
9303 if (WORDS_BIG_ENDIAN)
9304 fputs (reg_names[REGNO (x)], file);
9305 else
9306 fputs (reg_names[REGNO (x)+1], file);
9307 return;
9308 case 'R':
9309 /* Print out the second register name of a register pair or quad.
9310 I.e., R (%o0) => %o1. */
9311 fputs (reg_names[REGNO (x)+1], file);
9312 return;
9313 case 'S':
9314 /* Print out the third register name of a register quad.
9315 I.e., S (%o0) => %o2. */
9316 fputs (reg_names[REGNO (x)+2], file);
9317 return;
9318 case 'T':
9319 /* Print out the fourth register name of a register quad.
9320 I.e., T (%o0) => %o3. */
9321 fputs (reg_names[REGNO (x)+3], file);
9322 return;
9323 case 'x':
9324 /* Print a condition code register. */
9325 if (REGNO (x) == SPARC_ICC_REG)
9326 {
9327 switch (GET_MODE (x))
9328 {
9329 case E_CCmode:
9330 case E_CCNZmode:
9331 case E_CCCmode:
9332 case E_CCVmode:
9333 s = "%icc";
9334 break;
9335 case E_CCXmode:
9336 case E_CCXNZmode:
9337 case E_CCXCmode:
9338 case E_CCXVmode:
9339 s = "%xcc";
9340 break;
9341 default:
9342 gcc_unreachable ();
9343 }
9344 fputs (s, file);
9345 }
9346 else
9347 /* %fccN register */
9348 fputs (reg_names[REGNO (x)], file);
9349 return;
9350 case 'm':
9351 /* Print the operand's address only. */
9352 output_address (GET_MODE (x), XEXP (x, 0));
9353 return;
9354 case 'r':
9355 /* In this case we need a register. Use %g0 if the
9356 operand is const0_rtx. */
9357 if (x == const0_rtx
9358 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9359 {
9360 fputs ("%g0", file);
9361 return;
9362 }
9363 else
9364 break;
9365
9366 case 'A':
9367 switch (GET_CODE (x))
9368 {
9369 case IOR:
9370 s = "or";
9371 break;
9372 case AND:
9373 s = "and";
9374 break;
9375 case XOR:
9376 s = "xor";
9377 break;
9378 default:
9379 output_operand_lossage ("invalid %%A operand");
9380 s = "";
9381 break;
9382 }
9383 fputs (s, file);
9384 return;
9385
9386 case 'B':
9387 switch (GET_CODE (x))
9388 {
9389 case IOR:
9390 s = "orn";
9391 break;
9392 case AND:
9393 s = "andn";
9394 break;
9395 case XOR:
9396 s = "xnor";
9397 break;
9398 default:
9399 output_operand_lossage ("invalid %%B operand");
9400 s = "";
9401 break;
9402 }
9403 fputs (s, file);
9404 return;
9405
9406 /* This is used by the conditional move instructions. */
9407 case 'C':
9408 {
9409 machine_mode mode = GET_MODE (XEXP (x, 0));
9410 switch (GET_CODE (x))
9411 {
9412 case NE:
9413 if (mode == CCVmode || mode == CCXVmode)
9414 s = "vs";
9415 else
9416 s = "ne";
9417 break;
9418 case EQ:
9419 if (mode == CCVmode || mode == CCXVmode)
9420 s = "vc";
9421 else
9422 s = "e";
9423 break;
9424 case GE:
9425 if (mode == CCNZmode || mode == CCXNZmode)
9426 s = "pos";
9427 else
9428 s = "ge";
9429 break;
9430 case GT:
9431 s = "g";
9432 break;
9433 case LE:
9434 s = "le";
9435 break;
9436 case LT:
9437 if (mode == CCNZmode || mode == CCXNZmode)
9438 s = "neg";
9439 else
9440 s = "l";
9441 break;
9442 case GEU:
9443 s = "geu";
9444 break;
9445 case GTU:
9446 s = "gu";
9447 break;
9448 case LEU:
9449 s = "leu";
9450 break;
9451 case LTU:
9452 s = "lu";
9453 break;
9454 case LTGT:
9455 s = "lg";
9456 break;
9457 case UNORDERED:
9458 s = "u";
9459 break;
9460 case ORDERED:
9461 s = "o";
9462 break;
9463 case UNLT:
9464 s = "ul";
9465 break;
9466 case UNLE:
9467 s = "ule";
9468 break;
9469 case UNGT:
9470 s = "ug";
9471 break;
9472 case UNGE:
9473 s = "uge"
9474 ; break;
9475 case UNEQ:
9476 s = "ue";
9477 break;
9478 default:
9479 output_operand_lossage ("invalid %%C operand");
9480 s = "";
9481 break;
9482 }
9483 fputs (s, file);
9484 return;
9485 }
9486
9487 /* This are used by the movr instruction pattern. */
9488 case 'D':
9489 {
9490 switch (GET_CODE (x))
9491 {
9492 case NE:
9493 s = "ne";
9494 break;
9495 case EQ:
9496 s = "e";
9497 break;
9498 case GE:
9499 s = "gez";
9500 break;
9501 case LT:
9502 s = "lz";
9503 break;
9504 case LE:
9505 s = "lez";
9506 break;
9507 case GT:
9508 s = "gz";
9509 break;
9510 default:
9511 output_operand_lossage ("invalid %%D operand");
9512 s = "";
9513 break;
9514 }
9515 fputs (s, file);
9516 return;
9517 }
9518
9519 case 'b':
9520 {
9521 /* Print a sign-extended character. */
9522 int i = trunc_int_for_mode (INTVAL (x), QImode);
9523 fprintf (file, "%d", i);
9524 return;
9525 }
9526
9527 case 'f':
9528 /* Operand must be a MEM; write its address. */
9529 if (GET_CODE (x) != MEM)
9530 output_operand_lossage ("invalid %%f operand");
9531 output_address (GET_MODE (x), XEXP (x, 0));
9532 return;
9533
9534 case 's':
9535 {
9536 /* Print a sign-extended 32-bit value. */
9537 HOST_WIDE_INT i;
9538 if (GET_CODE(x) == CONST_INT)
9539 i = INTVAL (x);
9540 else
9541 {
9542 output_operand_lossage ("invalid %%s operand");
9543 return;
9544 }
9545 i = trunc_int_for_mode (i, SImode);
9546 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9547 return;
9548 }
9549
9550 case 0:
9551 /* Do nothing special. */
9552 break;
9553
9554 default:
9555 /* Undocumented flag. */
9556 output_operand_lossage ("invalid operand output code");
9557 }
9558
9559 if (GET_CODE (x) == REG)
9560 fputs (reg_names[REGNO (x)], file);
9561 else if (GET_CODE (x) == MEM)
9562 {
9563 fputc ('[', file);
9564 /* Poor Sun assembler doesn't understand absolute addressing. */
9565 if (CONSTANT_P (XEXP (x, 0)))
9566 fputs ("%g0+", file);
9567 output_address (GET_MODE (x), XEXP (x, 0));
9568 fputc (']', file);
9569 }
9570 else if (GET_CODE (x) == HIGH)
9571 {
9572 fputs ("%hi(", file);
9573 output_addr_const (file, XEXP (x, 0));
9574 fputc (')', file);
9575 }
9576 else if (GET_CODE (x) == LO_SUM)
9577 {
9578 sparc_print_operand (file, XEXP (x, 0), 0);
9579 if (TARGET_CM_MEDMID)
9580 fputs ("+%l44(", file);
9581 else
9582 fputs ("+%lo(", file);
9583 output_addr_const (file, XEXP (x, 1));
9584 fputc (')', file);
9585 }
9586 else if (GET_CODE (x) == CONST_DOUBLE)
9587 output_operand_lossage ("floating-point constant not a valid immediate operand");
9588 else
9589 output_addr_const (file, x);
9590 }
9591
9592 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9593
9594 static void
9595 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9596 {
9597 register rtx base, index = 0;
9598 int offset = 0;
9599 register rtx addr = x;
9600
9601 if (REG_P (addr))
9602 fputs (reg_names[REGNO (addr)], file);
9603 else if (GET_CODE (addr) == PLUS)
9604 {
9605 if (CONST_INT_P (XEXP (addr, 0)))
9606 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9607 else if (CONST_INT_P (XEXP (addr, 1)))
9608 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9609 else
9610 base = XEXP (addr, 0), index = XEXP (addr, 1);
9611 if (GET_CODE (base) == LO_SUM)
9612 {
9613 gcc_assert (USE_AS_OFFSETABLE_LO10
9614 && TARGET_ARCH64
9615 && ! TARGET_CM_MEDMID);
9616 output_operand (XEXP (base, 0), 0);
9617 fputs ("+%lo(", file);
9618 output_address (VOIDmode, XEXP (base, 1));
9619 fprintf (file, ")+%d", offset);
9620 }
9621 else
9622 {
9623 fputs (reg_names[REGNO (base)], file);
9624 if (index == 0)
9625 fprintf (file, "%+d", offset);
9626 else if (REG_P (index))
9627 fprintf (file, "+%s", reg_names[REGNO (index)]);
9628 else if (GET_CODE (index) == SYMBOL_REF
9629 || GET_CODE (index) == LABEL_REF
9630 || GET_CODE (index) == CONST)
9631 fputc ('+', file), output_addr_const (file, index);
9632 else gcc_unreachable ();
9633 }
9634 }
9635 else if (GET_CODE (addr) == MINUS
9636 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9637 {
9638 output_addr_const (file, XEXP (addr, 0));
9639 fputs ("-(", file);
9640 output_addr_const (file, XEXP (addr, 1));
9641 fputs ("-.)", file);
9642 }
9643 else if (GET_CODE (addr) == LO_SUM)
9644 {
9645 output_operand (XEXP (addr, 0), 0);
9646 if (TARGET_CM_MEDMID)
9647 fputs ("+%l44(", file);
9648 else
9649 fputs ("+%lo(", file);
9650 output_address (VOIDmode, XEXP (addr, 1));
9651 fputc (')', file);
9652 }
9653 else if (flag_pic
9654 && GET_CODE (addr) == CONST
9655 && GET_CODE (XEXP (addr, 0)) == MINUS
9656 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9657 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9658 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9659 {
9660 addr = XEXP (addr, 0);
9661 output_addr_const (file, XEXP (addr, 0));
9662 /* Group the args of the second CONST in parenthesis. */
9663 fputs ("-(", file);
9664 /* Skip past the second CONST--it does nothing for us. */
9665 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9666 /* Close the parenthesis. */
9667 fputc (')', file);
9668 }
9669 else
9670 {
9671 output_addr_const (file, addr);
9672 }
9673 }
9674 \f
9675 /* Target hook for assembling integer objects. The sparc version has
9676 special handling for aligned DI-mode objects. */
9677
9678 static bool
9679 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9680 {
9681 /* ??? We only output .xword's for symbols and only then in environments
9682 where the assembler can handle them. */
9683 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9684 {
9685 if (TARGET_V9)
9686 {
9687 assemble_integer_with_op ("\t.xword\t", x);
9688 return true;
9689 }
9690 else
9691 {
9692 assemble_aligned_integer (4, const0_rtx);
9693 assemble_aligned_integer (4, x);
9694 return true;
9695 }
9696 }
9697 return default_assemble_integer (x, size, aligned_p);
9698 }
9699 \f
9700 /* Return the value of a code used in the .proc pseudo-op that says
9701 what kind of result this function returns. For non-C types, we pick
9702 the closest C type. */
9703
9704 #ifndef SHORT_TYPE_SIZE
9705 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9706 #endif
9707
9708 #ifndef INT_TYPE_SIZE
9709 #define INT_TYPE_SIZE BITS_PER_WORD
9710 #endif
9711
9712 #ifndef LONG_TYPE_SIZE
9713 #define LONG_TYPE_SIZE BITS_PER_WORD
9714 #endif
9715
9716 #ifndef LONG_LONG_TYPE_SIZE
9717 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9718 #endif
9719
9720 #ifndef FLOAT_TYPE_SIZE
9721 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9722 #endif
9723
9724 #ifndef DOUBLE_TYPE_SIZE
9725 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9726 #endif
9727
9728 #ifndef LONG_DOUBLE_TYPE_SIZE
9729 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9730 #endif
9731
9732 unsigned long
9733 sparc_type_code (register tree type)
9734 {
9735 register unsigned long qualifiers = 0;
9736 register unsigned shift;
9737
9738 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9739 setting more, since some assemblers will give an error for this. Also,
9740 we must be careful to avoid shifts of 32 bits or more to avoid getting
9741 unpredictable results. */
9742
9743 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9744 {
9745 switch (TREE_CODE (type))
9746 {
9747 case ERROR_MARK:
9748 return qualifiers;
9749
9750 case ARRAY_TYPE:
9751 qualifiers |= (3 << shift);
9752 break;
9753
9754 case FUNCTION_TYPE:
9755 case METHOD_TYPE:
9756 qualifiers |= (2 << shift);
9757 break;
9758
9759 case POINTER_TYPE:
9760 case REFERENCE_TYPE:
9761 case OFFSET_TYPE:
9762 qualifiers |= (1 << shift);
9763 break;
9764
9765 case RECORD_TYPE:
9766 return (qualifiers | 8);
9767
9768 case UNION_TYPE:
9769 case QUAL_UNION_TYPE:
9770 return (qualifiers | 9);
9771
9772 case ENUMERAL_TYPE:
9773 return (qualifiers | 10);
9774
9775 case VOID_TYPE:
9776 return (qualifiers | 16);
9777
9778 case INTEGER_TYPE:
9779 /* If this is a range type, consider it to be the underlying
9780 type. */
9781 if (TREE_TYPE (type) != 0)
9782 break;
9783
9784 /* Carefully distinguish all the standard types of C,
9785 without messing up if the language is not C. We do this by
9786 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9787 look at both the names and the above fields, but that's redundant.
9788 Any type whose size is between two C types will be considered
9789 to be the wider of the two types. Also, we do not have a
9790 special code to use for "long long", so anything wider than
9791 long is treated the same. Note that we can't distinguish
9792 between "int" and "long" in this code if they are the same
9793 size, but that's fine, since neither can the assembler. */
9794
9795 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9796 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9797
9798 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9799 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9800
9801 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9802 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9803
9804 else
9805 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9806
9807 case REAL_TYPE:
9808 /* If this is a range type, consider it to be the underlying
9809 type. */
9810 if (TREE_TYPE (type) != 0)
9811 break;
9812
9813 /* Carefully distinguish all the standard types of C,
9814 without messing up if the language is not C. */
9815
9816 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9817 return (qualifiers | 6);
9818
9819 else
9820 return (qualifiers | 7);
9821
9822 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9823 /* ??? We need to distinguish between double and float complex types,
9824 but I don't know how yet because I can't reach this code from
9825 existing front-ends. */
9826 return (qualifiers | 7); /* Who knows? */
9827
9828 case VECTOR_TYPE:
9829 case BOOLEAN_TYPE: /* Boolean truth value type. */
9830 case LANG_TYPE:
9831 case NULLPTR_TYPE:
9832 return qualifiers;
9833
9834 default:
9835 gcc_unreachable (); /* Not a type! */
9836 }
9837 }
9838
9839 return qualifiers;
9840 }
9841 \f
9842 /* Nested function support. */
9843
9844 /* Emit RTL insns to initialize the variable parts of a trampoline.
9845 FNADDR is an RTX for the address of the function's pure code.
9846 CXT is an RTX for the static chain value for the function.
9847
9848 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9849 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9850 (to store insns). This is a bit excessive. Perhaps a different
9851 mechanism would be better here.
9852
9853 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9854
9855 static void
9856 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9857 {
9858 /* SPARC 32-bit trampoline:
9859
9860 sethi %hi(fn), %g1
9861 sethi %hi(static), %g2
9862 jmp %g1+%lo(fn)
9863 or %g2, %lo(static), %g2
9864
9865 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9866 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9867 */
9868
9869 emit_move_insn
9870 (adjust_address (m_tramp, SImode, 0),
9871 expand_binop (SImode, ior_optab,
9872 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9873 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9874 NULL_RTX, 1, OPTAB_DIRECT));
9875
9876 emit_move_insn
9877 (adjust_address (m_tramp, SImode, 4),
9878 expand_binop (SImode, ior_optab,
9879 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9880 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9881 NULL_RTX, 1, OPTAB_DIRECT));
9882
9883 emit_move_insn
9884 (adjust_address (m_tramp, SImode, 8),
9885 expand_binop (SImode, ior_optab,
9886 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9887 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9888 NULL_RTX, 1, OPTAB_DIRECT));
9889
9890 emit_move_insn
9891 (adjust_address (m_tramp, SImode, 12),
9892 expand_binop (SImode, ior_optab,
9893 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9894 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9895 NULL_RTX, 1, OPTAB_DIRECT));
9896
9897 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9898 aligned on a 16 byte boundary so one flush clears it all. */
9899 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9900 if (sparc_cpu != PROCESSOR_ULTRASPARC
9901 && sparc_cpu != PROCESSOR_ULTRASPARC3
9902 && sparc_cpu != PROCESSOR_NIAGARA
9903 && sparc_cpu != PROCESSOR_NIAGARA2
9904 && sparc_cpu != PROCESSOR_NIAGARA3
9905 && sparc_cpu != PROCESSOR_NIAGARA4
9906 && sparc_cpu != PROCESSOR_NIAGARA7
9907 && sparc_cpu != PROCESSOR_M8)
9908 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9909
9910 /* Call __enable_execute_stack after writing onto the stack to make sure
9911 the stack address is accessible. */
9912 #ifdef HAVE_ENABLE_EXECUTE_STACK
9913 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9914 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9915 #endif
9916
9917 }
9918
9919 /* The 64-bit version is simpler because it makes more sense to load the
9920 values as "immediate" data out of the trampoline. It's also easier since
9921 we can read the PC without clobbering a register. */
9922
9923 static void
9924 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9925 {
9926 /* SPARC 64-bit trampoline:
9927
9928 rd %pc, %g1
9929 ldx [%g1+24], %g5
9930 jmp %g5
9931 ldx [%g1+16], %g5
9932 +16 bytes data
9933 */
9934
9935 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9936 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9937 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9938 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9939 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9940 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9941 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9942 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9943 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9944 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9945 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9946
9947 if (sparc_cpu != PROCESSOR_ULTRASPARC
9948 && sparc_cpu != PROCESSOR_ULTRASPARC3
9949 && sparc_cpu != PROCESSOR_NIAGARA
9950 && sparc_cpu != PROCESSOR_NIAGARA2
9951 && sparc_cpu != PROCESSOR_NIAGARA3
9952 && sparc_cpu != PROCESSOR_NIAGARA4
9953 && sparc_cpu != PROCESSOR_NIAGARA7
9954 && sparc_cpu != PROCESSOR_M8)
9955 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9956
9957 /* Call __enable_execute_stack after writing onto the stack to make sure
9958 the stack address is accessible. */
9959 #ifdef HAVE_ENABLE_EXECUTE_STACK
9960 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9961 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9962 #endif
9963 }
9964
9965 /* Worker for TARGET_TRAMPOLINE_INIT. */
9966
9967 static void
9968 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9969 {
9970 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9971 cxt = force_reg (Pmode, cxt);
9972 if (TARGET_ARCH64)
9973 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9974 else
9975 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9976 }
9977 \f
9978 /* Adjust the cost of a scheduling dependency. Return the new cost of
9979 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9980
9981 static int
9982 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9983 int cost)
9984 {
9985 enum attr_type insn_type;
9986
9987 if (recog_memoized (insn) < 0)
9988 return cost;
9989
9990 insn_type = get_attr_type (insn);
9991
9992 if (dep_type == 0)
9993 {
9994 /* Data dependency; DEP_INSN writes a register that INSN reads some
9995 cycles later. */
9996
9997 /* if a load, then the dependence must be on the memory address;
9998 add an extra "cycle". Note that the cost could be two cycles
9999 if the reg was written late in an instruction group; we ca not tell
10000 here. */
10001 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10002 return cost + 3;
10003
10004 /* Get the delay only if the address of the store is the dependence. */
10005 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10006 {
10007 rtx pat = PATTERN(insn);
10008 rtx dep_pat = PATTERN (dep_insn);
10009
10010 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10011 return cost; /* This should not happen! */
10012
10013 /* The dependency between the two instructions was on the data that
10014 is being stored. Assume that this implies that the address of the
10015 store is not dependent. */
10016 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10017 return cost;
10018
10019 return cost + 3; /* An approximation. */
10020 }
10021
10022 /* A shift instruction cannot receive its data from an instruction
10023 in the same cycle; add a one cycle penalty. */
10024 if (insn_type == TYPE_SHIFT)
10025 return cost + 3; /* Split before cascade into shift. */
10026 }
10027 else
10028 {
10029 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10030 INSN writes some cycles later. */
10031
10032 /* These are only significant for the fpu unit; writing a fp reg before
10033 the fpu has finished with it stalls the processor. */
10034
10035 /* Reusing an integer register causes no problems. */
10036 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10037 return 0;
10038 }
10039
10040 return cost;
10041 }
10042
10043 static int
10044 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10045 int cost)
10046 {
10047 enum attr_type insn_type, dep_type;
10048 rtx pat = PATTERN(insn);
10049 rtx dep_pat = PATTERN (dep_insn);
10050
10051 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10052 return cost;
10053
10054 insn_type = get_attr_type (insn);
10055 dep_type = get_attr_type (dep_insn);
10056
10057 switch (dtype)
10058 {
10059 case 0:
10060 /* Data dependency; DEP_INSN writes a register that INSN reads some
10061 cycles later. */
10062
10063 switch (insn_type)
10064 {
10065 case TYPE_STORE:
10066 case TYPE_FPSTORE:
10067 /* Get the delay iff the address of the store is the dependence. */
10068 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10069 return cost;
10070
10071 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10072 return cost;
10073 return cost + 3;
10074
10075 case TYPE_LOAD:
10076 case TYPE_SLOAD:
10077 case TYPE_FPLOAD:
10078 /* If a load, then the dependence must be on the memory address. If
10079 the addresses aren't equal, then it might be a false dependency */
10080 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10081 {
10082 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10083 || GET_CODE (SET_DEST (dep_pat)) != MEM
10084 || GET_CODE (SET_SRC (pat)) != MEM
10085 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10086 XEXP (SET_SRC (pat), 0)))
10087 return cost + 2;
10088
10089 return cost + 8;
10090 }
10091 break;
10092
10093 case TYPE_BRANCH:
10094 /* Compare to branch latency is 0. There is no benefit from
10095 separating compare and branch. */
10096 if (dep_type == TYPE_COMPARE)
10097 return 0;
10098 /* Floating point compare to branch latency is less than
10099 compare to conditional move. */
10100 if (dep_type == TYPE_FPCMP)
10101 return cost - 1;
10102 break;
10103 default:
10104 break;
10105 }
10106 break;
10107
10108 case REG_DEP_ANTI:
10109 /* Anti-dependencies only penalize the fpu unit. */
10110 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10111 return 0;
10112 break;
10113
10114 default:
10115 break;
10116 }
10117
10118 return cost;
10119 }
10120
10121 static int
10122 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10123 unsigned int)
10124 {
10125 switch (sparc_cpu)
10126 {
10127 case PROCESSOR_SUPERSPARC:
10128 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10129 break;
10130 case PROCESSOR_HYPERSPARC:
10131 case PROCESSOR_SPARCLITE86X:
10132 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10133 break;
10134 default:
10135 break;
10136 }
10137 return cost;
10138 }
10139
10140 static void
10141 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10142 int sched_verbose ATTRIBUTE_UNUSED,
10143 int max_ready ATTRIBUTE_UNUSED)
10144 {}
10145
10146 static int
10147 sparc_use_sched_lookahead (void)
10148 {
10149 if (sparc_cpu == PROCESSOR_NIAGARA
10150 || sparc_cpu == PROCESSOR_NIAGARA2
10151 || sparc_cpu == PROCESSOR_NIAGARA3)
10152 return 0;
10153 if (sparc_cpu == PROCESSOR_NIAGARA4
10154 || sparc_cpu == PROCESSOR_NIAGARA7
10155 || sparc_cpu == PROCESSOR_M8)
10156 return 2;
10157 if (sparc_cpu == PROCESSOR_ULTRASPARC
10158 || sparc_cpu == PROCESSOR_ULTRASPARC3)
10159 return 4;
10160 if ((1 << sparc_cpu) &
10161 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
10162 (1 << PROCESSOR_SPARCLITE86X)))
10163 return 3;
10164 return 0;
10165 }
10166
10167 static int
10168 sparc_issue_rate (void)
10169 {
10170 switch (sparc_cpu)
10171 {
10172 case PROCESSOR_NIAGARA:
10173 case PROCESSOR_NIAGARA2:
10174 case PROCESSOR_NIAGARA3:
10175 default:
10176 return 1;
10177 case PROCESSOR_NIAGARA4:
10178 case PROCESSOR_NIAGARA7:
10179 case PROCESSOR_V9:
10180 /* Assume V9 processors are capable of at least dual-issue. */
10181 return 2;
10182 case PROCESSOR_SUPERSPARC:
10183 return 3;
10184 case PROCESSOR_HYPERSPARC:
10185 case PROCESSOR_SPARCLITE86X:
10186 return 2;
10187 case PROCESSOR_ULTRASPARC:
10188 case PROCESSOR_ULTRASPARC3:
10189 case PROCESSOR_M8:
10190 return 4;
10191 }
10192 }
10193
10194 static int
10195 set_extends (rtx_insn *insn)
10196 {
10197 register rtx pat = PATTERN (insn);
10198
10199 switch (GET_CODE (SET_SRC (pat)))
10200 {
10201 /* Load and some shift instructions zero extend. */
10202 case MEM:
10203 case ZERO_EXTEND:
10204 /* sethi clears the high bits */
10205 case HIGH:
10206 /* LO_SUM is used with sethi. sethi cleared the high
10207 bits and the values used with lo_sum are positive */
10208 case LO_SUM:
10209 /* Store flag stores 0 or 1 */
10210 case LT: case LTU:
10211 case GT: case GTU:
10212 case LE: case LEU:
10213 case GE: case GEU:
10214 case EQ:
10215 case NE:
10216 return 1;
10217 case AND:
10218 {
10219 rtx op0 = XEXP (SET_SRC (pat), 0);
10220 rtx op1 = XEXP (SET_SRC (pat), 1);
10221 if (GET_CODE (op1) == CONST_INT)
10222 return INTVAL (op1) >= 0;
10223 if (GET_CODE (op0) != REG)
10224 return 0;
10225 if (sparc_check_64 (op0, insn) == 1)
10226 return 1;
10227 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10228 }
10229 case IOR:
10230 case XOR:
10231 {
10232 rtx op0 = XEXP (SET_SRC (pat), 0);
10233 rtx op1 = XEXP (SET_SRC (pat), 1);
10234 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10235 return 0;
10236 if (GET_CODE (op1) == CONST_INT)
10237 return INTVAL (op1) >= 0;
10238 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10239 }
10240 case LSHIFTRT:
10241 return GET_MODE (SET_SRC (pat)) == SImode;
10242 /* Positive integers leave the high bits zero. */
10243 case CONST_INT:
10244 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10245 case ASHIFTRT:
10246 case SIGN_EXTEND:
10247 return - (GET_MODE (SET_SRC (pat)) == SImode);
10248 case REG:
10249 return sparc_check_64 (SET_SRC (pat), insn);
10250 default:
10251 return 0;
10252 }
10253 }
10254
10255 /* We _ought_ to have only one kind per function, but... */
10256 static GTY(()) rtx sparc_addr_diff_list;
10257 static GTY(()) rtx sparc_addr_list;
10258
10259 void
10260 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10261 {
10262 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10263 if (diff)
10264 sparc_addr_diff_list
10265 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10266 else
10267 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10268 }
10269
10270 static void
10271 sparc_output_addr_vec (rtx vec)
10272 {
10273 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10274 int idx, vlen = XVECLEN (body, 0);
10275
10276 #ifdef ASM_OUTPUT_ADDR_VEC_START
10277 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10278 #endif
10279
10280 #ifdef ASM_OUTPUT_CASE_LABEL
10281 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10282 NEXT_INSN (lab));
10283 #else
10284 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10285 #endif
10286
10287 for (idx = 0; idx < vlen; idx++)
10288 {
10289 ASM_OUTPUT_ADDR_VEC_ELT
10290 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10291 }
10292
10293 #ifdef ASM_OUTPUT_ADDR_VEC_END
10294 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10295 #endif
10296 }
10297
10298 static void
10299 sparc_output_addr_diff_vec (rtx vec)
10300 {
10301 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10302 rtx base = XEXP (XEXP (body, 0), 0);
10303 int idx, vlen = XVECLEN (body, 1);
10304
10305 #ifdef ASM_OUTPUT_ADDR_VEC_START
10306 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10307 #endif
10308
10309 #ifdef ASM_OUTPUT_CASE_LABEL
10310 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10311 NEXT_INSN (lab));
10312 #else
10313 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10314 #endif
10315
10316 for (idx = 0; idx < vlen; idx++)
10317 {
10318 ASM_OUTPUT_ADDR_DIFF_ELT
10319 (asm_out_file,
10320 body,
10321 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10322 CODE_LABEL_NUMBER (base));
10323 }
10324
10325 #ifdef ASM_OUTPUT_ADDR_VEC_END
10326 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10327 #endif
10328 }
10329
10330 static void
10331 sparc_output_deferred_case_vectors (void)
10332 {
10333 rtx t;
10334 int align;
10335
10336 if (sparc_addr_list == NULL_RTX
10337 && sparc_addr_diff_list == NULL_RTX)
10338 return;
10339
10340 /* Align to cache line in the function's code section. */
10341 switch_to_section (current_function_section ());
10342
10343 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10344 if (align > 0)
10345 ASM_OUTPUT_ALIGN (asm_out_file, align);
10346
10347 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10348 sparc_output_addr_vec (XEXP (t, 0));
10349 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10350 sparc_output_addr_diff_vec (XEXP (t, 0));
10351
10352 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10353 }
10354
10355 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10356 unknown. Return 1 if the high bits are zero, -1 if the register is
10357 sign extended. */
10358 int
10359 sparc_check_64 (rtx x, rtx_insn *insn)
10360 {
10361 /* If a register is set only once it is safe to ignore insns this
10362 code does not know how to handle. The loop will either recognize
10363 the single set and return the correct value or fail to recognize
10364 it and return 0. */
10365 int set_once = 0;
10366 rtx y = x;
10367
10368 gcc_assert (GET_CODE (x) == REG);
10369
10370 if (GET_MODE (x) == DImode)
10371 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10372
10373 if (flag_expensive_optimizations
10374 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10375 set_once = 1;
10376
10377 if (insn == 0)
10378 {
10379 if (set_once)
10380 insn = get_last_insn_anywhere ();
10381 else
10382 return 0;
10383 }
10384
10385 while ((insn = PREV_INSN (insn)))
10386 {
10387 switch (GET_CODE (insn))
10388 {
10389 case JUMP_INSN:
10390 case NOTE:
10391 break;
10392 case CODE_LABEL:
10393 case CALL_INSN:
10394 default:
10395 if (! set_once)
10396 return 0;
10397 break;
10398 case INSN:
10399 {
10400 rtx pat = PATTERN (insn);
10401 if (GET_CODE (pat) != SET)
10402 return 0;
10403 if (rtx_equal_p (x, SET_DEST (pat)))
10404 return set_extends (insn);
10405 if (y && rtx_equal_p (y, SET_DEST (pat)))
10406 return set_extends (insn);
10407 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10408 return 0;
10409 }
10410 }
10411 }
10412 return 0;
10413 }
10414
10415 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10416 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10417
10418 const char *
10419 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10420 {
10421 static char asm_code[60];
10422
10423 /* The scratch register is only required when the destination
10424 register is not a 64-bit global or out register. */
10425 if (which_alternative != 2)
10426 operands[3] = operands[0];
10427
10428 /* We can only shift by constants <= 63. */
10429 if (GET_CODE (operands[2]) == CONST_INT)
10430 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10431
10432 if (GET_CODE (operands[1]) == CONST_INT)
10433 {
10434 output_asm_insn ("mov\t%1, %3", operands);
10435 }
10436 else
10437 {
10438 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10439 if (sparc_check_64 (operands[1], insn) <= 0)
10440 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10441 output_asm_insn ("or\t%L1, %3, %3", operands);
10442 }
10443
10444 strcpy (asm_code, opcode);
10445
10446 if (which_alternative != 2)
10447 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10448 else
10449 return
10450 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10451 }
10452 \f
10453 /* Output rtl to increment the profiler label LABELNO
10454 for profiling a function entry. */
10455
10456 void
10457 sparc_profile_hook (int labelno)
10458 {
10459 char buf[32];
10460 rtx lab, fun;
10461
10462 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10463 if (NO_PROFILE_COUNTERS)
10464 {
10465 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10466 }
10467 else
10468 {
10469 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10470 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10471 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10472 }
10473 }
10474 \f
10475 #ifdef TARGET_SOLARIS
10476 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10477
10478 static void
10479 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10480 tree decl ATTRIBUTE_UNUSED)
10481 {
10482 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10483 {
10484 solaris_elf_asm_comdat_section (name, flags, decl);
10485 return;
10486 }
10487
10488 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10489
10490 if (!(flags & SECTION_DEBUG))
10491 fputs (",#alloc", asm_out_file);
10492 if (flags & SECTION_WRITE)
10493 fputs (",#write", asm_out_file);
10494 if (flags & SECTION_TLS)
10495 fputs (",#tls", asm_out_file);
10496 if (flags & SECTION_CODE)
10497 fputs (",#execinstr", asm_out_file);
10498
10499 if (flags & SECTION_NOTYPE)
10500 ;
10501 else if (flags & SECTION_BSS)
10502 fputs (",#nobits", asm_out_file);
10503 else
10504 fputs (",#progbits", asm_out_file);
10505
10506 fputc ('\n', asm_out_file);
10507 }
10508 #endif /* TARGET_SOLARIS */
10509
10510 /* We do not allow indirect calls to be optimized into sibling calls.
10511
10512 We cannot use sibling calls when delayed branches are disabled
10513 because they will likely require the call delay slot to be filled.
10514
10515 Also, on SPARC 32-bit we cannot emit a sibling call when the
10516 current function returns a structure. This is because the "unimp
10517 after call" convention would cause the callee to return to the
10518 wrong place. The generic code already disallows cases where the
10519 function being called returns a structure.
10520
10521 It may seem strange how this last case could occur. Usually there
10522 is code after the call which jumps to epilogue code which dumps the
10523 return value into the struct return area. That ought to invalidate
10524 the sibling call right? Well, in the C++ case we can end up passing
10525 the pointer to the struct return area to a constructor (which returns
10526 void) and then nothing else happens. Such a sibling call would look
10527 valid without the added check here.
10528
10529 VxWorks PIC PLT entries require the global pointer to be initialized
10530 on entry. We therefore can't emit sibling calls to them. */
10531 static bool
10532 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10533 {
10534 return (decl
10535 && flag_delayed_branch
10536 && (TARGET_ARCH64 || ! cfun->returns_struct)
10537 && !(TARGET_VXWORKS_RTP
10538 && flag_pic
10539 && !targetm.binds_local_p (decl)));
10540 }
10541 \f
10542 /* libfunc renaming. */
10543
10544 static void
10545 sparc_init_libfuncs (void)
10546 {
10547 if (TARGET_ARCH32)
10548 {
10549 /* Use the subroutines that Sun's library provides for integer
10550 multiply and divide. The `*' prevents an underscore from
10551 being prepended by the compiler. .umul is a little faster
10552 than .mul. */
10553 set_optab_libfunc (smul_optab, SImode, "*.umul");
10554 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10555 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10556 set_optab_libfunc (smod_optab, SImode, "*.rem");
10557 set_optab_libfunc (umod_optab, SImode, "*.urem");
10558
10559 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10560 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10561 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10562 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10563 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10564 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10565
10566 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10567 is because with soft-float, the SFmode and DFmode sqrt
10568 instructions will be absent, and the compiler will notice and
10569 try to use the TFmode sqrt instruction for calls to the
10570 builtin function sqrt, but this fails. */
10571 if (TARGET_FPU)
10572 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10573
10574 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10575 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10576 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10577 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10578 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10579 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10580
10581 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10582 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10583 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10584 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10585
10586 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10587 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10588 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10589 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10590
10591 if (DITF_CONVERSION_LIBFUNCS)
10592 {
10593 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10594 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10595 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10596 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10597 }
10598
10599 if (SUN_CONVERSION_LIBFUNCS)
10600 {
10601 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10602 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10603 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10604 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10605 }
10606 }
10607 if (TARGET_ARCH64)
10608 {
10609 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10610 do not exist in the library. Make sure the compiler does not
10611 emit calls to them by accident. (It should always use the
10612 hardware instructions.) */
10613 set_optab_libfunc (smul_optab, SImode, 0);
10614 set_optab_libfunc (sdiv_optab, SImode, 0);
10615 set_optab_libfunc (udiv_optab, SImode, 0);
10616 set_optab_libfunc (smod_optab, SImode, 0);
10617 set_optab_libfunc (umod_optab, SImode, 0);
10618
10619 if (SUN_INTEGER_MULTIPLY_64)
10620 {
10621 set_optab_libfunc (smul_optab, DImode, "__mul64");
10622 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10623 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10624 set_optab_libfunc (smod_optab, DImode, "__rem64");
10625 set_optab_libfunc (umod_optab, DImode, "__urem64");
10626 }
10627
10628 if (SUN_CONVERSION_LIBFUNCS)
10629 {
10630 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10631 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10632 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10633 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10634 }
10635 }
10636 }
10637 \f
10638 /* SPARC builtins. */
10639 enum sparc_builtins
10640 {
10641 /* FPU builtins. */
10642 SPARC_BUILTIN_LDFSR,
10643 SPARC_BUILTIN_STFSR,
10644
10645 /* VIS 1.0 builtins. */
10646 SPARC_BUILTIN_FPACK16,
10647 SPARC_BUILTIN_FPACK32,
10648 SPARC_BUILTIN_FPACKFIX,
10649 SPARC_BUILTIN_FEXPAND,
10650 SPARC_BUILTIN_FPMERGE,
10651 SPARC_BUILTIN_FMUL8X16,
10652 SPARC_BUILTIN_FMUL8X16AU,
10653 SPARC_BUILTIN_FMUL8X16AL,
10654 SPARC_BUILTIN_FMUL8SUX16,
10655 SPARC_BUILTIN_FMUL8ULX16,
10656 SPARC_BUILTIN_FMULD8SUX16,
10657 SPARC_BUILTIN_FMULD8ULX16,
10658 SPARC_BUILTIN_FALIGNDATAV4HI,
10659 SPARC_BUILTIN_FALIGNDATAV8QI,
10660 SPARC_BUILTIN_FALIGNDATAV2SI,
10661 SPARC_BUILTIN_FALIGNDATADI,
10662 SPARC_BUILTIN_WRGSR,
10663 SPARC_BUILTIN_RDGSR,
10664 SPARC_BUILTIN_ALIGNADDR,
10665 SPARC_BUILTIN_ALIGNADDRL,
10666 SPARC_BUILTIN_PDIST,
10667 SPARC_BUILTIN_EDGE8,
10668 SPARC_BUILTIN_EDGE8L,
10669 SPARC_BUILTIN_EDGE16,
10670 SPARC_BUILTIN_EDGE16L,
10671 SPARC_BUILTIN_EDGE32,
10672 SPARC_BUILTIN_EDGE32L,
10673 SPARC_BUILTIN_FCMPLE16,
10674 SPARC_BUILTIN_FCMPLE32,
10675 SPARC_BUILTIN_FCMPNE16,
10676 SPARC_BUILTIN_FCMPNE32,
10677 SPARC_BUILTIN_FCMPGT16,
10678 SPARC_BUILTIN_FCMPGT32,
10679 SPARC_BUILTIN_FCMPEQ16,
10680 SPARC_BUILTIN_FCMPEQ32,
10681 SPARC_BUILTIN_FPADD16,
10682 SPARC_BUILTIN_FPADD16S,
10683 SPARC_BUILTIN_FPADD32,
10684 SPARC_BUILTIN_FPADD32S,
10685 SPARC_BUILTIN_FPSUB16,
10686 SPARC_BUILTIN_FPSUB16S,
10687 SPARC_BUILTIN_FPSUB32,
10688 SPARC_BUILTIN_FPSUB32S,
10689 SPARC_BUILTIN_ARRAY8,
10690 SPARC_BUILTIN_ARRAY16,
10691 SPARC_BUILTIN_ARRAY32,
10692
10693 /* VIS 2.0 builtins. */
10694 SPARC_BUILTIN_EDGE8N,
10695 SPARC_BUILTIN_EDGE8LN,
10696 SPARC_BUILTIN_EDGE16N,
10697 SPARC_BUILTIN_EDGE16LN,
10698 SPARC_BUILTIN_EDGE32N,
10699 SPARC_BUILTIN_EDGE32LN,
10700 SPARC_BUILTIN_BMASK,
10701 SPARC_BUILTIN_BSHUFFLEV4HI,
10702 SPARC_BUILTIN_BSHUFFLEV8QI,
10703 SPARC_BUILTIN_BSHUFFLEV2SI,
10704 SPARC_BUILTIN_BSHUFFLEDI,
10705
10706 /* VIS 3.0 builtins. */
10707 SPARC_BUILTIN_CMASK8,
10708 SPARC_BUILTIN_CMASK16,
10709 SPARC_BUILTIN_CMASK32,
10710 SPARC_BUILTIN_FCHKSM16,
10711 SPARC_BUILTIN_FSLL16,
10712 SPARC_BUILTIN_FSLAS16,
10713 SPARC_BUILTIN_FSRL16,
10714 SPARC_BUILTIN_FSRA16,
10715 SPARC_BUILTIN_FSLL32,
10716 SPARC_BUILTIN_FSLAS32,
10717 SPARC_BUILTIN_FSRL32,
10718 SPARC_BUILTIN_FSRA32,
10719 SPARC_BUILTIN_PDISTN,
10720 SPARC_BUILTIN_FMEAN16,
10721 SPARC_BUILTIN_FPADD64,
10722 SPARC_BUILTIN_FPSUB64,
10723 SPARC_BUILTIN_FPADDS16,
10724 SPARC_BUILTIN_FPADDS16S,
10725 SPARC_BUILTIN_FPSUBS16,
10726 SPARC_BUILTIN_FPSUBS16S,
10727 SPARC_BUILTIN_FPADDS32,
10728 SPARC_BUILTIN_FPADDS32S,
10729 SPARC_BUILTIN_FPSUBS32,
10730 SPARC_BUILTIN_FPSUBS32S,
10731 SPARC_BUILTIN_FUCMPLE8,
10732 SPARC_BUILTIN_FUCMPNE8,
10733 SPARC_BUILTIN_FUCMPGT8,
10734 SPARC_BUILTIN_FUCMPEQ8,
10735 SPARC_BUILTIN_FHADDS,
10736 SPARC_BUILTIN_FHADDD,
10737 SPARC_BUILTIN_FHSUBS,
10738 SPARC_BUILTIN_FHSUBD,
10739 SPARC_BUILTIN_FNHADDS,
10740 SPARC_BUILTIN_FNHADDD,
10741 SPARC_BUILTIN_UMULXHI,
10742 SPARC_BUILTIN_XMULX,
10743 SPARC_BUILTIN_XMULXHI,
10744
10745 /* VIS 4.0 builtins. */
10746 SPARC_BUILTIN_FPADD8,
10747 SPARC_BUILTIN_FPADDS8,
10748 SPARC_BUILTIN_FPADDUS8,
10749 SPARC_BUILTIN_FPADDUS16,
10750 SPARC_BUILTIN_FPCMPLE8,
10751 SPARC_BUILTIN_FPCMPGT8,
10752 SPARC_BUILTIN_FPCMPULE16,
10753 SPARC_BUILTIN_FPCMPUGT16,
10754 SPARC_BUILTIN_FPCMPULE32,
10755 SPARC_BUILTIN_FPCMPUGT32,
10756 SPARC_BUILTIN_FPMAX8,
10757 SPARC_BUILTIN_FPMAX16,
10758 SPARC_BUILTIN_FPMAX32,
10759 SPARC_BUILTIN_FPMAXU8,
10760 SPARC_BUILTIN_FPMAXU16,
10761 SPARC_BUILTIN_FPMAXU32,
10762 SPARC_BUILTIN_FPMIN8,
10763 SPARC_BUILTIN_FPMIN16,
10764 SPARC_BUILTIN_FPMIN32,
10765 SPARC_BUILTIN_FPMINU8,
10766 SPARC_BUILTIN_FPMINU16,
10767 SPARC_BUILTIN_FPMINU32,
10768 SPARC_BUILTIN_FPSUB8,
10769 SPARC_BUILTIN_FPSUBS8,
10770 SPARC_BUILTIN_FPSUBUS8,
10771 SPARC_BUILTIN_FPSUBUS16,
10772
10773 /* VIS 4.0B builtins. */
10774
10775 /* Note that all the DICTUNPACK* entries should be kept
10776 contiguous. */
10777 SPARC_BUILTIN_FIRST_DICTUNPACK,
10778 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10779 SPARC_BUILTIN_DICTUNPACK16,
10780 SPARC_BUILTIN_DICTUNPACK32,
10781 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10782
10783 /* Note that all the FPCMP*SHL entries should be kept
10784 contiguous. */
10785 SPARC_BUILTIN_FIRST_FPCMPSHL,
10786 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10787 SPARC_BUILTIN_FPCMPGT8SHL,
10788 SPARC_BUILTIN_FPCMPEQ8SHL,
10789 SPARC_BUILTIN_FPCMPNE8SHL,
10790 SPARC_BUILTIN_FPCMPLE16SHL,
10791 SPARC_BUILTIN_FPCMPGT16SHL,
10792 SPARC_BUILTIN_FPCMPEQ16SHL,
10793 SPARC_BUILTIN_FPCMPNE16SHL,
10794 SPARC_BUILTIN_FPCMPLE32SHL,
10795 SPARC_BUILTIN_FPCMPGT32SHL,
10796 SPARC_BUILTIN_FPCMPEQ32SHL,
10797 SPARC_BUILTIN_FPCMPNE32SHL,
10798 SPARC_BUILTIN_FPCMPULE8SHL,
10799 SPARC_BUILTIN_FPCMPUGT8SHL,
10800 SPARC_BUILTIN_FPCMPULE16SHL,
10801 SPARC_BUILTIN_FPCMPUGT16SHL,
10802 SPARC_BUILTIN_FPCMPULE32SHL,
10803 SPARC_BUILTIN_FPCMPUGT32SHL,
10804 SPARC_BUILTIN_FPCMPDE8SHL,
10805 SPARC_BUILTIN_FPCMPDE16SHL,
10806 SPARC_BUILTIN_FPCMPDE32SHL,
10807 SPARC_BUILTIN_FPCMPUR8SHL,
10808 SPARC_BUILTIN_FPCMPUR16SHL,
10809 SPARC_BUILTIN_FPCMPUR32SHL,
10810 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10811
10812 SPARC_BUILTIN_MAX
10813 };
10814
10815 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10816 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10817
10818 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10819 The instruction should require a constant operand of some sort. The
10820 function prints an error if OPVAL is not valid. */
10821
10822 static int
10823 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10824 {
10825 if (GET_CODE (opval) != CONST_INT)
10826 {
10827 error ("%qs expects a constant argument", insn_data[icode].name);
10828 return false;
10829 }
10830
10831 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10832 {
10833 error ("constant argument out of range for %qs", insn_data[icode].name);
10834 return false;
10835 }
10836 return true;
10837 }
10838
10839 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10840 function decl or NULL_TREE if the builtin was not added. */
10841
10842 static tree
10843 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10844 tree type)
10845 {
10846 tree t
10847 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10848
10849 if (t)
10850 {
10851 sparc_builtins[code] = t;
10852 sparc_builtins_icode[code] = icode;
10853 }
10854
10855 return t;
10856 }
10857
10858 /* Likewise, but also marks the function as "const". */
10859
10860 static tree
10861 def_builtin_const (const char *name, enum insn_code icode,
10862 enum sparc_builtins code, tree type)
10863 {
10864 tree t = def_builtin (name, icode, code, type);
10865
10866 if (t)
10867 TREE_READONLY (t) = 1;
10868
10869 return t;
10870 }
10871
10872 /* Implement the TARGET_INIT_BUILTINS target hook.
10873 Create builtin functions for special SPARC instructions. */
10874
10875 static void
10876 sparc_init_builtins (void)
10877 {
10878 if (TARGET_FPU)
10879 sparc_fpu_init_builtins ();
10880
10881 if (TARGET_VIS)
10882 sparc_vis_init_builtins ();
10883 }
10884
10885 /* Create builtin functions for FPU instructions. */
10886
10887 static void
10888 sparc_fpu_init_builtins (void)
10889 {
10890 tree ftype
10891 = build_function_type_list (void_type_node,
10892 build_pointer_type (unsigned_type_node), 0);
10893 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10894 SPARC_BUILTIN_LDFSR, ftype);
10895 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10896 SPARC_BUILTIN_STFSR, ftype);
10897 }
10898
10899 /* Create builtin functions for VIS instructions. */
10900
10901 static void
10902 sparc_vis_init_builtins (void)
10903 {
10904 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10905 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10906 tree v4hi = build_vector_type (intHI_type_node, 4);
10907 tree v2hi = build_vector_type (intHI_type_node, 2);
10908 tree v2si = build_vector_type (intSI_type_node, 2);
10909 tree v1si = build_vector_type (intSI_type_node, 1);
10910
10911 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10912 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10913 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10914 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10915 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10916 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10917 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10918 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10919 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10920 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10921 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10922 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10923 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10924 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10925 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10926 v8qi, v8qi,
10927 intDI_type_node, 0);
10928 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10929 v8qi, v8qi, 0);
10930 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10931 v8qi, v8qi, 0);
10932 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10933 intSI_type_node, 0);
10934 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10935 intSI_type_node, 0);
10936 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10937 intDI_type_node, 0);
10938 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10939 intDI_type_node,
10940 intDI_type_node, 0);
10941 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10942 intSI_type_node,
10943 intSI_type_node, 0);
10944 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10945 ptr_type_node,
10946 intSI_type_node, 0);
10947 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10948 ptr_type_node,
10949 intDI_type_node, 0);
10950 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10951 ptr_type_node,
10952 ptr_type_node, 0);
10953 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10954 ptr_type_node,
10955 ptr_type_node, 0);
10956 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10957 v4hi, v4hi, 0);
10958 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10959 v2si, v2si, 0);
10960 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10961 v4hi, v4hi, 0);
10962 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10963 v2si, v2si, 0);
10964 tree void_ftype_di = build_function_type_list (void_type_node,
10965 intDI_type_node, 0);
10966 tree di_ftype_void = build_function_type_list (intDI_type_node,
10967 void_type_node, 0);
10968 tree void_ftype_si = build_function_type_list (void_type_node,
10969 intSI_type_node, 0);
10970 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10971 float_type_node,
10972 float_type_node, 0);
10973 tree df_ftype_df_df = build_function_type_list (double_type_node,
10974 double_type_node,
10975 double_type_node, 0);
10976
10977 /* Packing and expanding vectors. */
10978 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10979 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10980 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10981 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10982 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10983 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10984 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10985 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10986 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10987 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10988
10989 /* Multiplications. */
10990 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10991 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10992 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10993 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10994 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10995 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10996 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10997 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10998 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10999 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11000 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11001 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11002 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11003 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11004
11005 /* Data aligning. */
11006 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11007 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11008 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11009 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11010 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11011 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11012 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11013 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11014
11015 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11016 SPARC_BUILTIN_WRGSR, void_ftype_di);
11017 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11018 SPARC_BUILTIN_RDGSR, di_ftype_void);
11019
11020 if (TARGET_ARCH64)
11021 {
11022 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11023 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11024 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11025 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11026 }
11027 else
11028 {
11029 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11030 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11031 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11032 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11033 }
11034
11035 /* Pixel distance. */
11036 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11037 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11038
11039 /* Edge handling. */
11040 if (TARGET_ARCH64)
11041 {
11042 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11043 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11044 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11045 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11046 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11047 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11048 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11049 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11050 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11051 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11052 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11053 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11054 }
11055 else
11056 {
11057 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11058 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11059 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11060 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11061 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11062 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11063 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11064 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11065 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11066 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11067 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11068 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11069 }
11070
11071 /* Pixel compare. */
11072 if (TARGET_ARCH64)
11073 {
11074 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11075 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11076 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11077 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11078 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11079 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11080 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11081 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11082 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11083 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11084 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11085 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11086 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11087 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11088 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11089 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11090 }
11091 else
11092 {
11093 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11094 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11095 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11096 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11097 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11098 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11099 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11100 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11101 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11102 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11103 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11104 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11105 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11106 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11107 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11108 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11109 }
11110
11111 /* Addition and subtraction. */
11112 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11113 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11114 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11115 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11116 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11117 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11118 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11119 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11120 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11121 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11122 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11123 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11124 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11125 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11126 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11127 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11128
11129 /* Three-dimensional array addressing. */
11130 if (TARGET_ARCH64)
11131 {
11132 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11133 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11134 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11135 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11136 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11137 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11138 }
11139 else
11140 {
11141 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11142 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11143 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11144 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11145 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11146 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11147 }
11148
11149 if (TARGET_VIS2)
11150 {
11151 /* Edge handling. */
11152 if (TARGET_ARCH64)
11153 {
11154 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11155 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11156 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11157 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11158 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11159 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11160 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11161 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11162 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11163 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11164 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11165 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11166 }
11167 else
11168 {
11169 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11170 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11171 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11172 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11173 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11174 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11175 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11176 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11177 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11178 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11179 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11180 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11181 }
11182
11183 /* Byte mask and shuffle. */
11184 if (TARGET_ARCH64)
11185 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11186 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11187 else
11188 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11189 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11190 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11191 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11192 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11193 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11194 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11195 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11196 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11197 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11198 }
11199
11200 if (TARGET_VIS3)
11201 {
11202 if (TARGET_ARCH64)
11203 {
11204 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11205 SPARC_BUILTIN_CMASK8, void_ftype_di);
11206 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11207 SPARC_BUILTIN_CMASK16, void_ftype_di);
11208 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11209 SPARC_BUILTIN_CMASK32, void_ftype_di);
11210 }
11211 else
11212 {
11213 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11214 SPARC_BUILTIN_CMASK8, void_ftype_si);
11215 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11216 SPARC_BUILTIN_CMASK16, void_ftype_si);
11217 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11218 SPARC_BUILTIN_CMASK32, void_ftype_si);
11219 }
11220
11221 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11222 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11223
11224 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11225 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11226 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11227 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11228 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11229 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11230 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11231 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11232 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11233 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11234 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11235 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11236 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11237 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11238 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11239 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11240
11241 if (TARGET_ARCH64)
11242 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11243 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11244 else
11245 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11246 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11247
11248 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11249 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11250 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11251 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11252 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11253 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11254
11255 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11256 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11257 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11258 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11259 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11260 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11261 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11262 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11263 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11264 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11265 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11266 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11267 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11268 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11269 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11270 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11271
11272 if (TARGET_ARCH64)
11273 {
11274 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11275 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11276 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11277 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11278 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11279 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11280 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11281 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11282 }
11283 else
11284 {
11285 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11286 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11287 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11288 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11289 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11290 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11291 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11292 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11293 }
11294
11295 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11296 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11297 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11298 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11299 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11300 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11301 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11302 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11303 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11304 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11305 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11306 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11307
11308 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11309 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11310 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11311 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11312 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11313 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11314 }
11315
11316 if (TARGET_VIS4)
11317 {
11318 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11319 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11320 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11321 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11322 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11323 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11324 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11325 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11326
11327
11328 if (TARGET_ARCH64)
11329 {
11330 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11331 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11332 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11333 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11334 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11335 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11336 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11337 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11338 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11339 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11340 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11341 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11342 }
11343 else
11344 {
11345 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11346 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11347 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11348 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11349 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11350 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11351 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11352 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11353 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11354 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11355 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11356 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11357 }
11358
11359 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11360 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11361 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11362 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11363 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11364 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11365 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11366 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11367 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11368 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11369 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11370 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11371 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11372 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11373 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11374 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11375 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11376 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11377 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11378 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11379 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11380 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11381 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11382 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11383 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11384 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11385 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11386 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11387 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11388 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11389 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11390 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11391 }
11392
11393 if (TARGET_VIS4B)
11394 {
11395 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11396 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11397 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11398 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11399 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11400 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11401
11402 if (TARGET_ARCH64)
11403 {
11404 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11405 v8qi, v8qi,
11406 intSI_type_node, 0);
11407 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11408 v4hi, v4hi,
11409 intSI_type_node, 0);
11410 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11411 v2si, v2si,
11412 intSI_type_node, 0);
11413
11414 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11415 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11416 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11417 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11418 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11419 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11420 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11421 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11422
11423 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11424 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11425 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11426 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11427 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11428 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11429 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11430 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11431
11432 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11433 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11434 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11435 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11436 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11437 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11438 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11439 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11440
11441
11442 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11443 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11444 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11445 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11446
11447 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11448 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11449 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11450 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11451
11452 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11453 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11454 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11455 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11456
11457 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11458 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11459 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11460 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11461 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11462 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11463
11464 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11465 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11466 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11467 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11468 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11469 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11470
11471 }
11472 else
11473 {
11474 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11475 v8qi, v8qi,
11476 intSI_type_node, 0);
11477 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11478 v4hi, v4hi,
11479 intSI_type_node, 0);
11480 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11481 v2si, v2si,
11482 intSI_type_node, 0);
11483
11484 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11485 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11486 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11487 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11488 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11489 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11490 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11491 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11492
11493 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11494 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11495 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11496 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11497 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11498 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11499 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11500 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11501
11502 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11503 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11504 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11505 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11506 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11507 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11508 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11509 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11510
11511
11512 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11513 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11514 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11515 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11516
11517 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11518 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11519 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11520 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11521
11522 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11523 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11524 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11525 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11526
11527 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11528 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11529 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11530 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11531 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11532 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11533
11534 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11535 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11536 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11537 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11538 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11539 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11540 }
11541 }
11542 }
11543
11544 /* Implement TARGET_BUILTIN_DECL hook. */
11545
11546 static tree
11547 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11548 {
11549 if (code >= SPARC_BUILTIN_MAX)
11550 return error_mark_node;
11551
11552 return sparc_builtins[code];
11553 }
11554
11555 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11556
11557 static rtx
11558 sparc_expand_builtin (tree exp, rtx target,
11559 rtx subtarget ATTRIBUTE_UNUSED,
11560 machine_mode tmode ATTRIBUTE_UNUSED,
11561 int ignore ATTRIBUTE_UNUSED)
11562 {
11563 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11564 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11565 enum insn_code icode = sparc_builtins_icode[code];
11566 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11567 call_expr_arg_iterator iter;
11568 int arg_count = 0;
11569 rtx pat, op[4];
11570 tree arg;
11571
11572 if (nonvoid)
11573 {
11574 machine_mode tmode = insn_data[icode].operand[0].mode;
11575 if (!target
11576 || GET_MODE (target) != tmode
11577 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11578 op[0] = gen_reg_rtx (tmode);
11579 else
11580 op[0] = target;
11581 }
11582
11583 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11584 {
11585 const struct insn_operand_data *insn_op;
11586 int idx;
11587
11588 if (arg == error_mark_node)
11589 return NULL_RTX;
11590
11591 arg_count++;
11592 idx = arg_count - !nonvoid;
11593 insn_op = &insn_data[icode].operand[idx];
11594 op[arg_count] = expand_normal (arg);
11595
11596 /* Some of the builtins require constant arguments. We check
11597 for this here. */
11598 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11599 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11600 && arg_count == 3)
11601 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11602 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11603 && arg_count == 2))
11604 {
11605 if (!check_constant_argument (icode, idx, op[arg_count]))
11606 return const0_rtx;
11607 }
11608
11609 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11610 {
11611 if (!address_operand (op[arg_count], SImode))
11612 {
11613 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11614 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11615 }
11616 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11617 }
11618
11619 else if (insn_op->mode == V1DImode
11620 && GET_MODE (op[arg_count]) == DImode)
11621 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11622
11623 else if (insn_op->mode == V1SImode
11624 && GET_MODE (op[arg_count]) == SImode)
11625 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11626
11627 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11628 insn_op->mode))
11629 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11630 }
11631
11632 switch (arg_count)
11633 {
11634 case 0:
11635 pat = GEN_FCN (icode) (op[0]);
11636 break;
11637 case 1:
11638 if (nonvoid)
11639 pat = GEN_FCN (icode) (op[0], op[1]);
11640 else
11641 pat = GEN_FCN (icode) (op[1]);
11642 break;
11643 case 2:
11644 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11645 break;
11646 case 3:
11647 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11648 break;
11649 default:
11650 gcc_unreachable ();
11651 }
11652
11653 if (!pat)
11654 return NULL_RTX;
11655
11656 emit_insn (pat);
11657
11658 return (nonvoid ? op[0] : const0_rtx);
11659 }
11660
11661 /* Return the upper 16 bits of the 8x16 multiplication. */
11662
11663 static int
11664 sparc_vis_mul8x16 (int e8, int e16)
11665 {
11666 return (e8 * e16 + 128) / 256;
11667 }
11668
11669 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11670 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11671
11672 static void
11673 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11674 tree inner_type, tree cst0, tree cst1)
11675 {
11676 unsigned i, num = VECTOR_CST_NELTS (cst0);
11677 int scale;
11678
11679 switch (fncode)
11680 {
11681 case SPARC_BUILTIN_FMUL8X16:
11682 for (i = 0; i < num; ++i)
11683 {
11684 int val
11685 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11686 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11687 n_elts->quick_push (build_int_cst (inner_type, val));
11688 }
11689 break;
11690
11691 case SPARC_BUILTIN_FMUL8X16AU:
11692 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11693
11694 for (i = 0; i < num; ++i)
11695 {
11696 int val
11697 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11698 scale);
11699 n_elts->quick_push (build_int_cst (inner_type, val));
11700 }
11701 break;
11702
11703 case SPARC_BUILTIN_FMUL8X16AL:
11704 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11705
11706 for (i = 0; i < num; ++i)
11707 {
11708 int val
11709 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11710 scale);
11711 n_elts->quick_push (build_int_cst (inner_type, val));
11712 }
11713 break;
11714
11715 default:
11716 gcc_unreachable ();
11717 }
11718 }
11719
11720 /* Implement TARGET_FOLD_BUILTIN hook.
11721
11722 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11723 result of the function call is ignored. NULL_TREE is returned if the
11724 function could not be folded. */
11725
11726 static tree
11727 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11728 tree *args, bool ignore)
11729 {
11730 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11731 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11732 tree arg0, arg1, arg2;
11733
11734 if (ignore)
11735 switch (code)
11736 {
11737 case SPARC_BUILTIN_LDFSR:
11738 case SPARC_BUILTIN_STFSR:
11739 case SPARC_BUILTIN_ALIGNADDR:
11740 case SPARC_BUILTIN_WRGSR:
11741 case SPARC_BUILTIN_BMASK:
11742 case SPARC_BUILTIN_CMASK8:
11743 case SPARC_BUILTIN_CMASK16:
11744 case SPARC_BUILTIN_CMASK32:
11745 break;
11746
11747 default:
11748 return build_zero_cst (rtype);
11749 }
11750
11751 switch (code)
11752 {
11753 case SPARC_BUILTIN_FEXPAND:
11754 arg0 = args[0];
11755 STRIP_NOPS (arg0);
11756
11757 if (TREE_CODE (arg0) == VECTOR_CST)
11758 {
11759 tree inner_type = TREE_TYPE (rtype);
11760 unsigned i;
11761
11762 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11763 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11764 {
11765 unsigned HOST_WIDE_INT val
11766 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11767 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11768 }
11769 return n_elts.build ();
11770 }
11771 break;
11772
11773 case SPARC_BUILTIN_FMUL8X16:
11774 case SPARC_BUILTIN_FMUL8X16AU:
11775 case SPARC_BUILTIN_FMUL8X16AL:
11776 arg0 = args[0];
11777 arg1 = args[1];
11778 STRIP_NOPS (arg0);
11779 STRIP_NOPS (arg1);
11780
11781 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11782 {
11783 tree inner_type = TREE_TYPE (rtype);
11784 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11785 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11786 return n_elts.build ();
11787 }
11788 break;
11789
11790 case SPARC_BUILTIN_FPMERGE:
11791 arg0 = args[0];
11792 arg1 = args[1];
11793 STRIP_NOPS (arg0);
11794 STRIP_NOPS (arg1);
11795
11796 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11797 {
11798 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11799 unsigned i;
11800 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11801 {
11802 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11803 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11804 }
11805
11806 return n_elts.build ();
11807 }
11808 break;
11809
11810 case SPARC_BUILTIN_PDIST:
11811 case SPARC_BUILTIN_PDISTN:
11812 arg0 = args[0];
11813 arg1 = args[1];
11814 STRIP_NOPS (arg0);
11815 STRIP_NOPS (arg1);
11816 if (code == SPARC_BUILTIN_PDIST)
11817 {
11818 arg2 = args[2];
11819 STRIP_NOPS (arg2);
11820 }
11821 else
11822 arg2 = integer_zero_node;
11823
11824 if (TREE_CODE (arg0) == VECTOR_CST
11825 && TREE_CODE (arg1) == VECTOR_CST
11826 && TREE_CODE (arg2) == INTEGER_CST)
11827 {
11828 bool overflow = false;
11829 widest_int result = wi::to_widest (arg2);
11830 widest_int tmp;
11831 unsigned i;
11832
11833 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11834 {
11835 tree e0 = VECTOR_CST_ELT (arg0, i);
11836 tree e1 = VECTOR_CST_ELT (arg1, i);
11837
11838 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11839
11840 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11841 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11842 if (wi::neg_p (tmp))
11843 tmp = wi::neg (tmp, &neg2_ovf);
11844 else
11845 neg2_ovf = false;
11846 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11847 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11848 }
11849
11850 gcc_assert (!overflow);
11851
11852 return wide_int_to_tree (rtype, result);
11853 }
11854
11855 default:
11856 break;
11857 }
11858
11859 return NULL_TREE;
11860 }
11861 \f
11862 /* ??? This duplicates information provided to the compiler by the
11863 ??? scheduler description. Some day, teach genautomata to output
11864 ??? the latencies and then CSE will just use that. */
11865
11866 static bool
11867 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11868 int opno ATTRIBUTE_UNUSED,
11869 int *total, bool speed ATTRIBUTE_UNUSED)
11870 {
11871 int code = GET_CODE (x);
11872 bool float_mode_p = FLOAT_MODE_P (mode);
11873
11874 switch (code)
11875 {
11876 case CONST_INT:
11877 if (SMALL_INT (x))
11878 *total = 0;
11879 else
11880 *total = 2;
11881 return true;
11882
11883 case CONST_WIDE_INT:
11884 *total = 0;
11885 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11886 *total += 2;
11887 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11888 *total += 2;
11889 return true;
11890
11891 case HIGH:
11892 *total = 2;
11893 return true;
11894
11895 case CONST:
11896 case LABEL_REF:
11897 case SYMBOL_REF:
11898 *total = 4;
11899 return true;
11900
11901 case CONST_DOUBLE:
11902 *total = 8;
11903 return true;
11904
11905 case MEM:
11906 /* If outer-code was a sign or zero extension, a cost
11907 of COSTS_N_INSNS (1) was already added in. This is
11908 why we are subtracting it back out. */
11909 if (outer_code == ZERO_EXTEND)
11910 {
11911 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11912 }
11913 else if (outer_code == SIGN_EXTEND)
11914 {
11915 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11916 }
11917 else if (float_mode_p)
11918 {
11919 *total = sparc_costs->float_load;
11920 }
11921 else
11922 {
11923 *total = sparc_costs->int_load;
11924 }
11925
11926 return true;
11927
11928 case PLUS:
11929 case MINUS:
11930 if (float_mode_p)
11931 *total = sparc_costs->float_plusminus;
11932 else
11933 *total = COSTS_N_INSNS (1);
11934 return false;
11935
11936 case FMA:
11937 {
11938 rtx sub;
11939
11940 gcc_assert (float_mode_p);
11941 *total = sparc_costs->float_mul;
11942
11943 sub = XEXP (x, 0);
11944 if (GET_CODE (sub) == NEG)
11945 sub = XEXP (sub, 0);
11946 *total += rtx_cost (sub, mode, FMA, 0, speed);
11947
11948 sub = XEXP (x, 2);
11949 if (GET_CODE (sub) == NEG)
11950 sub = XEXP (sub, 0);
11951 *total += rtx_cost (sub, mode, FMA, 2, speed);
11952 return true;
11953 }
11954
11955 case MULT:
11956 if (float_mode_p)
11957 *total = sparc_costs->float_mul;
11958 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11959 *total = COSTS_N_INSNS (25);
11960 else
11961 {
11962 int bit_cost;
11963
11964 bit_cost = 0;
11965 if (sparc_costs->int_mul_bit_factor)
11966 {
11967 int nbits;
11968
11969 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11970 {
11971 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11972 for (nbits = 0; value != 0; value &= value - 1)
11973 nbits++;
11974 }
11975 else
11976 nbits = 7;
11977
11978 if (nbits < 3)
11979 nbits = 3;
11980 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11981 bit_cost = COSTS_N_INSNS (bit_cost);
11982 }
11983
11984 if (mode == DImode || !TARGET_HARD_MUL)
11985 *total = sparc_costs->int_mulX + bit_cost;
11986 else
11987 *total = sparc_costs->int_mul + bit_cost;
11988 }
11989 return false;
11990
11991 case ASHIFT:
11992 case ASHIFTRT:
11993 case LSHIFTRT:
11994 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11995 return false;
11996
11997 case DIV:
11998 case UDIV:
11999 case MOD:
12000 case UMOD:
12001 if (float_mode_p)
12002 {
12003 if (mode == DFmode)
12004 *total = sparc_costs->float_div_df;
12005 else
12006 *total = sparc_costs->float_div_sf;
12007 }
12008 else
12009 {
12010 if (mode == DImode)
12011 *total = sparc_costs->int_divX;
12012 else
12013 *total = sparc_costs->int_div;
12014 }
12015 return false;
12016
12017 case NEG:
12018 if (! float_mode_p)
12019 {
12020 *total = COSTS_N_INSNS (1);
12021 return false;
12022 }
12023 /* FALLTHRU */
12024
12025 case ABS:
12026 case FLOAT:
12027 case UNSIGNED_FLOAT:
12028 case FIX:
12029 case UNSIGNED_FIX:
12030 case FLOAT_EXTEND:
12031 case FLOAT_TRUNCATE:
12032 *total = sparc_costs->float_move;
12033 return false;
12034
12035 case SQRT:
12036 if (mode == DFmode)
12037 *total = sparc_costs->float_sqrt_df;
12038 else
12039 *total = sparc_costs->float_sqrt_sf;
12040 return false;
12041
12042 case COMPARE:
12043 if (float_mode_p)
12044 *total = sparc_costs->float_cmp;
12045 else
12046 *total = COSTS_N_INSNS (1);
12047 return false;
12048
12049 case IF_THEN_ELSE:
12050 if (float_mode_p)
12051 *total = sparc_costs->float_cmove;
12052 else
12053 *total = sparc_costs->int_cmove;
12054 return false;
12055
12056 case IOR:
12057 /* Handle the NAND vector patterns. */
12058 if (sparc_vector_mode_supported_p (mode)
12059 && GET_CODE (XEXP (x, 0)) == NOT
12060 && GET_CODE (XEXP (x, 1)) == NOT)
12061 {
12062 *total = COSTS_N_INSNS (1);
12063 return true;
12064 }
12065 else
12066 return false;
12067
12068 default:
12069 return false;
12070 }
12071 }
12072
12073 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12074
12075 static inline bool
12076 general_or_i64_p (reg_class_t rclass)
12077 {
12078 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12079 }
12080
12081 /* Implement TARGET_REGISTER_MOVE_COST. */
12082
12083 static int
12084 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12085 reg_class_t from, reg_class_t to)
12086 {
12087 bool need_memory = false;
12088
12089 /* This helps postreload CSE to eliminate redundant comparisons. */
12090 if (from == NO_REGS || to == NO_REGS)
12091 return 100;
12092
12093 if (from == FPCC_REGS || to == FPCC_REGS)
12094 need_memory = true;
12095 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12096 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12097 {
12098 if (TARGET_VIS3)
12099 {
12100 int size = GET_MODE_SIZE (mode);
12101 if (size == 8 || size == 4)
12102 {
12103 if (! TARGET_ARCH32 || size == 4)
12104 return 4;
12105 else
12106 return 6;
12107 }
12108 }
12109 need_memory = true;
12110 }
12111
12112 if (need_memory)
12113 {
12114 if (sparc_cpu == PROCESSOR_ULTRASPARC
12115 || sparc_cpu == PROCESSOR_ULTRASPARC3
12116 || sparc_cpu == PROCESSOR_NIAGARA
12117 || sparc_cpu == PROCESSOR_NIAGARA2
12118 || sparc_cpu == PROCESSOR_NIAGARA3
12119 || sparc_cpu == PROCESSOR_NIAGARA4
12120 || sparc_cpu == PROCESSOR_NIAGARA7
12121 || sparc_cpu == PROCESSOR_M8)
12122 return 12;
12123
12124 return 6;
12125 }
12126
12127 return 2;
12128 }
12129
12130 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12131 This is achieved by means of a manual dynamic stack space allocation in
12132 the current frame. We make the assumption that SEQ doesn't contain any
12133 function calls, with the possible exception of calls to the GOT helper. */
12134
12135 static void
12136 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12137 {
12138 /* We must preserve the lowest 16 words for the register save area. */
12139 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12140 /* We really need only 2 words of fresh stack space. */
12141 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12142
12143 rtx slot
12144 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12145 SPARC_STACK_BIAS + offset));
12146
12147 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12148 emit_insn (gen_rtx_SET (slot, reg));
12149 if (reg2)
12150 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12151 reg2));
12152 emit_insn (seq);
12153 if (reg2)
12154 emit_insn (gen_rtx_SET (reg2,
12155 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12156 emit_insn (gen_rtx_SET (reg, slot));
12157 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12158 }
12159
12160 /* Output the assembler code for a thunk function. THUNK_DECL is the
12161 declaration for the thunk function itself, FUNCTION is the decl for
12162 the target function. DELTA is an immediate constant offset to be
12163 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12164 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12165
12166 static void
12167 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12168 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12169 tree function)
12170 {
12171 rtx this_rtx, funexp;
12172 rtx_insn *insn;
12173 unsigned int int_arg_first;
12174
12175 reload_completed = 1;
12176 epilogue_completed = 1;
12177
12178 emit_note (NOTE_INSN_PROLOGUE_END);
12179
12180 if (TARGET_FLAT)
12181 {
12182 sparc_leaf_function_p = 1;
12183
12184 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12185 }
12186 else if (flag_delayed_branch)
12187 {
12188 /* We will emit a regular sibcall below, so we need to instruct
12189 output_sibcall that we are in a leaf function. */
12190 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12191
12192 /* This will cause final.c to invoke leaf_renumber_regs so we
12193 must behave as if we were in a not-yet-leafified function. */
12194 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12195 }
12196 else
12197 {
12198 /* We will emit the sibcall manually below, so we will need to
12199 manually spill non-leaf registers. */
12200 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12201
12202 /* We really are in a leaf function. */
12203 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12204 }
12205
12206 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12207 returns a structure, the structure return pointer is there instead. */
12208 if (TARGET_ARCH64
12209 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12210 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12211 else
12212 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12213
12214 /* Add DELTA. When possible use a plain add, otherwise load it into
12215 a register first. */
12216 if (delta)
12217 {
12218 rtx delta_rtx = GEN_INT (delta);
12219
12220 if (! SPARC_SIMM13_P (delta))
12221 {
12222 rtx scratch = gen_rtx_REG (Pmode, 1);
12223 emit_move_insn (scratch, delta_rtx);
12224 delta_rtx = scratch;
12225 }
12226
12227 /* THIS_RTX += DELTA. */
12228 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12229 }
12230
12231 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12232 if (vcall_offset)
12233 {
12234 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12235 rtx scratch = gen_rtx_REG (Pmode, 1);
12236
12237 gcc_assert (vcall_offset < 0);
12238
12239 /* SCRATCH = *THIS_RTX. */
12240 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12241
12242 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12243 may not have any available scratch register at this point. */
12244 if (SPARC_SIMM13_P (vcall_offset))
12245 ;
12246 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12247 else if (! fixed_regs[5]
12248 /* The below sequence is made up of at least 2 insns,
12249 while the default method may need only one. */
12250 && vcall_offset < -8192)
12251 {
12252 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12253 emit_move_insn (scratch2, vcall_offset_rtx);
12254 vcall_offset_rtx = scratch2;
12255 }
12256 else
12257 {
12258 rtx increment = GEN_INT (-4096);
12259
12260 /* VCALL_OFFSET is a negative number whose typical range can be
12261 estimated as -32768..0 in 32-bit mode. In almost all cases
12262 it is therefore cheaper to emit multiple add insns than
12263 spilling and loading the constant into a register (at least
12264 6 insns). */
12265 while (! SPARC_SIMM13_P (vcall_offset))
12266 {
12267 emit_insn (gen_add2_insn (scratch, increment));
12268 vcall_offset += 4096;
12269 }
12270 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12271 }
12272
12273 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12274 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12275 gen_rtx_PLUS (Pmode,
12276 scratch,
12277 vcall_offset_rtx)));
12278
12279 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12280 emit_insn (gen_add2_insn (this_rtx, scratch));
12281 }
12282
12283 /* Generate a tail call to the target function. */
12284 if (! TREE_USED (function))
12285 {
12286 assemble_external (function);
12287 TREE_USED (function) = 1;
12288 }
12289 funexp = XEXP (DECL_RTL (function), 0);
12290
12291 if (flag_delayed_branch)
12292 {
12293 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12294 insn = emit_call_insn (gen_sibcall (funexp));
12295 SIBLING_CALL_P (insn) = 1;
12296 }
12297 else
12298 {
12299 /* The hoops we have to jump through in order to generate a sibcall
12300 without using delay slots... */
12301 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12302
12303 if (flag_pic)
12304 {
12305 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12306 start_sequence ();
12307 load_got_register (); /* clobbers %o7 */
12308 scratch = sparc_legitimize_pic_address (funexp, scratch);
12309 seq = get_insns ();
12310 end_sequence ();
12311 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12312 }
12313 else if (TARGET_ARCH32)
12314 {
12315 emit_insn (gen_rtx_SET (scratch,
12316 gen_rtx_HIGH (SImode, funexp)));
12317 emit_insn (gen_rtx_SET (scratch,
12318 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12319 }
12320 else /* TARGET_ARCH64 */
12321 {
12322 switch (sparc_cmodel)
12323 {
12324 case CM_MEDLOW:
12325 case CM_MEDMID:
12326 /* The destination can serve as a temporary. */
12327 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12328 break;
12329
12330 case CM_MEDANY:
12331 case CM_EMBMEDANY:
12332 /* The destination cannot serve as a temporary. */
12333 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12334 start_sequence ();
12335 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12336 seq = get_insns ();
12337 end_sequence ();
12338 emit_and_preserve (seq, spill_reg, 0);
12339 break;
12340
12341 default:
12342 gcc_unreachable ();
12343 }
12344 }
12345
12346 emit_jump_insn (gen_indirect_jump (scratch));
12347 }
12348
12349 emit_barrier ();
12350
12351 /* Run just enough of rest_of_compilation to get the insns emitted.
12352 There's not really enough bulk here to make other passes such as
12353 instruction scheduling worth while. Note that use_thunk calls
12354 assemble_start_function and assemble_end_function. */
12355 insn = get_insns ();
12356 shorten_branches (insn);
12357 final_start_function (insn, file, 1);
12358 final (insn, file, 1);
12359 final_end_function ();
12360
12361 reload_completed = 0;
12362 epilogue_completed = 0;
12363 }
12364
12365 /* Return true if sparc_output_mi_thunk would be able to output the
12366 assembler code for the thunk function specified by the arguments
12367 it is passed, and false otherwise. */
12368 static bool
12369 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12370 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12371 HOST_WIDE_INT vcall_offset,
12372 const_tree function ATTRIBUTE_UNUSED)
12373 {
12374 /* Bound the loop used in the default method above. */
12375 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12376 }
12377
12378 /* How to allocate a 'struct machine_function'. */
12379
12380 static struct machine_function *
12381 sparc_init_machine_status (void)
12382 {
12383 return ggc_cleared_alloc<machine_function> ();
12384 }
12385
12386 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12387 We need to emit DTP-relative relocations. */
12388
12389 static void
12390 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12391 {
12392 switch (size)
12393 {
12394 case 4:
12395 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12396 break;
12397 case 8:
12398 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12399 break;
12400 default:
12401 gcc_unreachable ();
12402 }
12403 output_addr_const (file, x);
12404 fputs (")", file);
12405 }
12406
12407 /* Do whatever processing is required at the end of a file. */
12408
12409 static void
12410 sparc_file_end (void)
12411 {
12412 /* If we need to emit the special GOT helper function, do so now. */
12413 if (got_helper_rtx)
12414 {
12415 const char *name = XSTR (got_helper_rtx, 0);
12416 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12417 #ifdef DWARF2_UNWIND_INFO
12418 bool do_cfi;
12419 #endif
12420
12421 if (USE_HIDDEN_LINKONCE)
12422 {
12423 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12424 get_identifier (name),
12425 build_function_type_list (void_type_node,
12426 NULL_TREE));
12427 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12428 NULL_TREE, void_type_node);
12429 TREE_PUBLIC (decl) = 1;
12430 TREE_STATIC (decl) = 1;
12431 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12432 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12433 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12434 resolve_unique_section (decl, 0, flag_function_sections);
12435 allocate_struct_function (decl, true);
12436 cfun->is_thunk = 1;
12437 current_function_decl = decl;
12438 init_varasm_status ();
12439 assemble_start_function (decl, name);
12440 }
12441 else
12442 {
12443 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12444 switch_to_section (text_section);
12445 if (align > 0)
12446 ASM_OUTPUT_ALIGN (asm_out_file, align);
12447 ASM_OUTPUT_LABEL (asm_out_file, name);
12448 }
12449
12450 #ifdef DWARF2_UNWIND_INFO
12451 do_cfi = dwarf2out_do_cfi_asm ();
12452 if (do_cfi)
12453 fprintf (asm_out_file, "\t.cfi_startproc\n");
12454 #endif
12455 if (flag_delayed_branch)
12456 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12457 reg_name, reg_name);
12458 else
12459 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12460 reg_name, reg_name);
12461 #ifdef DWARF2_UNWIND_INFO
12462 if (do_cfi)
12463 fprintf (asm_out_file, "\t.cfi_endproc\n");
12464 #endif
12465 }
12466
12467 if (NEED_INDICATE_EXEC_STACK)
12468 file_end_indicate_exec_stack ();
12469
12470 #ifdef TARGET_SOLARIS
12471 solaris_file_end ();
12472 #endif
12473 }
12474
12475 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12476 /* Implement TARGET_MANGLE_TYPE. */
12477
12478 static const char *
12479 sparc_mangle_type (const_tree type)
12480 {
12481 if (TARGET_ARCH32
12482 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12483 && TARGET_LONG_DOUBLE_128)
12484 return "g";
12485
12486 /* For all other types, use normal C++ mangling. */
12487 return NULL;
12488 }
12489 #endif
12490
12491 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12492 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12493 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12494
12495 void
12496 sparc_emit_membar_for_model (enum memmodel model,
12497 int load_store, int before_after)
12498 {
12499 /* Bits for the MEMBAR mmask field. */
12500 const int LoadLoad = 1;
12501 const int StoreLoad = 2;
12502 const int LoadStore = 4;
12503 const int StoreStore = 8;
12504
12505 int mm = 0, implied = 0;
12506
12507 switch (sparc_memory_model)
12508 {
12509 case SMM_SC:
12510 /* Sequential Consistency. All memory transactions are immediately
12511 visible in sequential execution order. No barriers needed. */
12512 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12513 break;
12514
12515 case SMM_TSO:
12516 /* Total Store Ordering: all memory transactions with store semantics
12517 are followed by an implied StoreStore. */
12518 implied |= StoreStore;
12519
12520 /* If we're not looking for a raw barrer (before+after), then atomic
12521 operations get the benefit of being both load and store. */
12522 if (load_store == 3 && before_after == 1)
12523 implied |= StoreLoad;
12524 /* FALLTHRU */
12525
12526 case SMM_PSO:
12527 /* Partial Store Ordering: all memory transactions with load semantics
12528 are followed by an implied LoadLoad | LoadStore. */
12529 implied |= LoadLoad | LoadStore;
12530
12531 /* If we're not looking for a raw barrer (before+after), then atomic
12532 operations get the benefit of being both load and store. */
12533 if (load_store == 3 && before_after == 2)
12534 implied |= StoreLoad | StoreStore;
12535 /* FALLTHRU */
12536
12537 case SMM_RMO:
12538 /* Relaxed Memory Ordering: no implicit bits. */
12539 break;
12540
12541 default:
12542 gcc_unreachable ();
12543 }
12544
12545 if (before_after & 1)
12546 {
12547 if (is_mm_release (model) || is_mm_acq_rel (model)
12548 || is_mm_seq_cst (model))
12549 {
12550 if (load_store & 1)
12551 mm |= LoadLoad | StoreLoad;
12552 if (load_store & 2)
12553 mm |= LoadStore | StoreStore;
12554 }
12555 }
12556 if (before_after & 2)
12557 {
12558 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12559 || is_mm_seq_cst (model))
12560 {
12561 if (load_store & 1)
12562 mm |= LoadLoad | LoadStore;
12563 if (load_store & 2)
12564 mm |= StoreLoad | StoreStore;
12565 }
12566 }
12567
12568 /* Remove the bits implied by the system memory model. */
12569 mm &= ~implied;
12570
12571 /* For raw barriers (before+after), always emit a barrier.
12572 This will become a compile-time barrier if needed. */
12573 if (mm || before_after == 3)
12574 emit_insn (gen_membar (GEN_INT (mm)));
12575 }
12576
12577 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12578 compare and swap on the word containing the byte or half-word. */
12579
12580 static void
12581 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12582 rtx oldval, rtx newval)
12583 {
12584 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12585 rtx addr = gen_reg_rtx (Pmode);
12586 rtx off = gen_reg_rtx (SImode);
12587 rtx oldv = gen_reg_rtx (SImode);
12588 rtx newv = gen_reg_rtx (SImode);
12589 rtx oldvalue = gen_reg_rtx (SImode);
12590 rtx newvalue = gen_reg_rtx (SImode);
12591 rtx res = gen_reg_rtx (SImode);
12592 rtx resv = gen_reg_rtx (SImode);
12593 rtx memsi, val, mask, cc;
12594
12595 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12596
12597 if (Pmode != SImode)
12598 addr1 = gen_lowpart (SImode, addr1);
12599 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12600
12601 memsi = gen_rtx_MEM (SImode, addr);
12602 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12603 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12604
12605 val = copy_to_reg (memsi);
12606
12607 emit_insn (gen_rtx_SET (off,
12608 gen_rtx_XOR (SImode, off,
12609 GEN_INT (GET_MODE (mem) == QImode
12610 ? 3 : 2))));
12611
12612 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12613
12614 if (GET_MODE (mem) == QImode)
12615 mask = force_reg (SImode, GEN_INT (0xff));
12616 else
12617 mask = force_reg (SImode, GEN_INT (0xffff));
12618
12619 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12620
12621 emit_insn (gen_rtx_SET (val,
12622 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12623 val)));
12624
12625 oldval = gen_lowpart (SImode, oldval);
12626 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12627
12628 newval = gen_lowpart_common (SImode, newval);
12629 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12630
12631 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12632
12633 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12634
12635 rtx_code_label *end_label = gen_label_rtx ();
12636 rtx_code_label *loop_label = gen_label_rtx ();
12637 emit_label (loop_label);
12638
12639 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12640
12641 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12642
12643 emit_move_insn (bool_result, const1_rtx);
12644
12645 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12646
12647 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12648
12649 emit_insn (gen_rtx_SET (resv,
12650 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12651 res)));
12652
12653 emit_move_insn (bool_result, const0_rtx);
12654
12655 cc = gen_compare_reg_1 (NE, resv, val);
12656 emit_insn (gen_rtx_SET (val, resv));
12657
12658 /* Use cbranchcc4 to separate the compare and branch! */
12659 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12660 cc, const0_rtx, loop_label));
12661
12662 emit_label (end_label);
12663
12664 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12665
12666 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12667
12668 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12669 }
12670
12671 /* Expand code to perform a compare-and-swap. */
12672
12673 void
12674 sparc_expand_compare_and_swap (rtx operands[])
12675 {
12676 rtx bval, retval, mem, oldval, newval;
12677 machine_mode mode;
12678 enum memmodel model;
12679
12680 bval = operands[0];
12681 retval = operands[1];
12682 mem = operands[2];
12683 oldval = operands[3];
12684 newval = operands[4];
12685 model = (enum memmodel) INTVAL (operands[6]);
12686 mode = GET_MODE (mem);
12687
12688 sparc_emit_membar_for_model (model, 3, 1);
12689
12690 if (reg_overlap_mentioned_p (retval, oldval))
12691 oldval = copy_to_reg (oldval);
12692
12693 if (mode == QImode || mode == HImode)
12694 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12695 else
12696 {
12697 rtx (*gen) (rtx, rtx, rtx, rtx);
12698 rtx x;
12699
12700 if (mode == SImode)
12701 gen = gen_atomic_compare_and_swapsi_1;
12702 else
12703 gen = gen_atomic_compare_and_swapdi_1;
12704 emit_insn (gen (retval, mem, oldval, newval));
12705
12706 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12707 if (x != bval)
12708 convert_move (bval, x, 1);
12709 }
12710
12711 sparc_emit_membar_for_model (model, 3, 2);
12712 }
12713
12714 void
12715 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12716 {
12717 rtx t_1, t_2, t_3;
12718
12719 sel = gen_lowpart (DImode, sel);
12720 switch (vmode)
12721 {
12722 case E_V2SImode:
12723 /* inp = xxxxxxxAxxxxxxxB */
12724 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12725 NULL_RTX, 1, OPTAB_DIRECT);
12726 /* t_1 = ....xxxxxxxAxxx. */
12727 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12728 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12729 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12730 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12731 /* sel = .......B */
12732 /* t_1 = ...A.... */
12733 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12734 /* sel = ...A...B */
12735 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12736 /* sel = AAAABBBB * 4 */
12737 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12738 /* sel = { A*4, A*4+1, A*4+2, ... } */
12739 break;
12740
12741 case E_V4HImode:
12742 /* inp = xxxAxxxBxxxCxxxD */
12743 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12744 NULL_RTX, 1, OPTAB_DIRECT);
12745 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12746 NULL_RTX, 1, OPTAB_DIRECT);
12747 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12748 NULL_RTX, 1, OPTAB_DIRECT);
12749 /* t_1 = ..xxxAxxxBxxxCxx */
12750 /* t_2 = ....xxxAxxxBxxxC */
12751 /* t_3 = ......xxxAxxxBxx */
12752 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12753 GEN_INT (0x07),
12754 NULL_RTX, 1, OPTAB_DIRECT);
12755 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12756 GEN_INT (0x0700),
12757 NULL_RTX, 1, OPTAB_DIRECT);
12758 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12759 GEN_INT (0x070000),
12760 NULL_RTX, 1, OPTAB_DIRECT);
12761 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12762 GEN_INT (0x07000000),
12763 NULL_RTX, 1, OPTAB_DIRECT);
12764 /* sel = .......D */
12765 /* t_1 = .....C.. */
12766 /* t_2 = ...B.... */
12767 /* t_3 = .A...... */
12768 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12769 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12770 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12771 /* sel = .A.B.C.D */
12772 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12773 /* sel = AABBCCDD * 2 */
12774 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12775 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12776 break;
12777
12778 case E_V8QImode:
12779 /* input = xAxBxCxDxExFxGxH */
12780 sel = expand_simple_binop (DImode, AND, sel,
12781 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12782 | 0x0f0f0f0f),
12783 NULL_RTX, 1, OPTAB_DIRECT);
12784 /* sel = .A.B.C.D.E.F.G.H */
12785 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12786 NULL_RTX, 1, OPTAB_DIRECT);
12787 /* t_1 = ..A.B.C.D.E.F.G. */
12788 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12789 NULL_RTX, 1, OPTAB_DIRECT);
12790 /* sel = .AABBCCDDEEFFGGH */
12791 sel = expand_simple_binop (DImode, AND, sel,
12792 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12793 | 0xff00ff),
12794 NULL_RTX, 1, OPTAB_DIRECT);
12795 /* sel = ..AB..CD..EF..GH */
12796 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12797 NULL_RTX, 1, OPTAB_DIRECT);
12798 /* t_1 = ....AB..CD..EF.. */
12799 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12800 NULL_RTX, 1, OPTAB_DIRECT);
12801 /* sel = ..ABABCDCDEFEFGH */
12802 sel = expand_simple_binop (DImode, AND, sel,
12803 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12804 NULL_RTX, 1, OPTAB_DIRECT);
12805 /* sel = ....ABCD....EFGH */
12806 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12807 NULL_RTX, 1, OPTAB_DIRECT);
12808 /* t_1 = ........ABCD.... */
12809 sel = gen_lowpart (SImode, sel);
12810 t_1 = gen_lowpart (SImode, t_1);
12811 break;
12812
12813 default:
12814 gcc_unreachable ();
12815 }
12816
12817 /* Always perform the final addition/merge within the bmask insn. */
12818 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12819 }
12820
12821 /* Implement TARGET_VEC_PERM_CONST. */
12822
12823 static bool
12824 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12825 rtx op1, const vec_perm_indices &sel)
12826 {
12827 if (!TARGET_VIS2)
12828 return false;
12829
12830 /* All permutes are supported. */
12831 if (!target)
12832 return true;
12833
12834 /* Force target-independent code to convert constant permutations on other
12835 modes down to V8QI. Rely on this to avoid the complexity of the byte
12836 order of the permutation. */
12837 if (vmode != V8QImode)
12838 return false;
12839
12840 unsigned int i, mask;
12841 for (i = mask = 0; i < 8; ++i)
12842 mask |= (sel[i] & 0xf) << (28 - i*4);
12843 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12844
12845 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12846 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12847 return true;
12848 }
12849
12850 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12851
12852 static bool
12853 sparc_frame_pointer_required (void)
12854 {
12855 /* If the stack pointer is dynamically modified in the function, it cannot
12856 serve as the frame pointer. */
12857 if (cfun->calls_alloca)
12858 return true;
12859
12860 /* If the function receives nonlocal gotos, it needs to save the frame
12861 pointer in the nonlocal_goto_save_area object. */
12862 if (cfun->has_nonlocal_label)
12863 return true;
12864
12865 /* In flat mode, that's it. */
12866 if (TARGET_FLAT)
12867 return false;
12868
12869 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12870 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12871 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12872 }
12873
12874 /* The way this is structured, we can't eliminate SFP in favor of SP
12875 if the frame pointer is required: we want to use the SFP->HFP elimination
12876 in that case. But the test in update_eliminables doesn't know we are
12877 assuming below that we only do the former elimination. */
12878
12879 static bool
12880 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12881 {
12882 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12883 }
12884
12885 /* Return the hard frame pointer directly to bypass the stack bias. */
12886
12887 static rtx
12888 sparc_builtin_setjmp_frame_value (void)
12889 {
12890 return hard_frame_pointer_rtx;
12891 }
12892
12893 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12894 they won't be allocated. */
12895
12896 static void
12897 sparc_conditional_register_usage (void)
12898 {
12899 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12900 {
12901 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12902 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12903 }
12904 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12905 /* then honor it. */
12906 if (TARGET_ARCH32 && fixed_regs[5])
12907 fixed_regs[5] = 1;
12908 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12909 fixed_regs[5] = 0;
12910 if (! TARGET_V9)
12911 {
12912 int regno;
12913 for (regno = SPARC_FIRST_V9_FP_REG;
12914 regno <= SPARC_LAST_V9_FP_REG;
12915 regno++)
12916 fixed_regs[regno] = 1;
12917 /* %fcc0 is used by v8 and v9. */
12918 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12919 regno <= SPARC_LAST_V9_FCC_REG;
12920 regno++)
12921 fixed_regs[regno] = 1;
12922 }
12923 if (! TARGET_FPU)
12924 {
12925 int regno;
12926 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12927 fixed_regs[regno] = 1;
12928 }
12929 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12930 /* then honor it. Likewise with g3 and g4. */
12931 if (fixed_regs[2] == 2)
12932 fixed_regs[2] = ! TARGET_APP_REGS;
12933 if (fixed_regs[3] == 2)
12934 fixed_regs[3] = ! TARGET_APP_REGS;
12935 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12936 fixed_regs[4] = ! TARGET_APP_REGS;
12937 else if (TARGET_CM_EMBMEDANY)
12938 fixed_regs[4] = 1;
12939 else if (fixed_regs[4] == 2)
12940 fixed_regs[4] = 0;
12941 if (TARGET_FLAT)
12942 {
12943 int regno;
12944 /* Disable leaf functions. */
12945 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12946 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12947 leaf_reg_remap [regno] = regno;
12948 }
12949 if (TARGET_VIS)
12950 global_regs[SPARC_GSR_REG] = 1;
12951 }
12952
12953 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12954
12955 - We can't load constants into FP registers.
12956 - We can't load FP constants into integer registers when soft-float,
12957 because there is no soft-float pattern with a r/F constraint.
12958 - We can't load FP constants into integer registers for TFmode unless
12959 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12960 - Try and reload integer constants (symbolic or otherwise) back into
12961 registers directly, rather than having them dumped to memory. */
12962
12963 static reg_class_t
12964 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12965 {
12966 machine_mode mode = GET_MODE (x);
12967 if (CONSTANT_P (x))
12968 {
12969 if (FP_REG_CLASS_P (rclass)
12970 || rclass == GENERAL_OR_FP_REGS
12971 || rclass == GENERAL_OR_EXTRA_FP_REGS
12972 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12973 || (mode == TFmode && ! const_zero_operand (x, mode)))
12974 return NO_REGS;
12975
12976 if (GET_MODE_CLASS (mode) == MODE_INT)
12977 return GENERAL_REGS;
12978
12979 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12980 {
12981 if (! FP_REG_CLASS_P (rclass)
12982 || !(const_zero_operand (x, mode)
12983 || const_all_ones_operand (x, mode)))
12984 return NO_REGS;
12985 }
12986 }
12987
12988 if (TARGET_VIS3
12989 && ! TARGET_ARCH64
12990 && (rclass == EXTRA_FP_REGS
12991 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12992 {
12993 int regno = true_regnum (x);
12994
12995 if (SPARC_INT_REG_P (regno))
12996 return (rclass == EXTRA_FP_REGS
12997 ? FP_REGS : GENERAL_OR_FP_REGS);
12998 }
12999
13000 return rclass;
13001 }
13002
13003 /* Return true if we use LRA instead of reload pass. */
13004
13005 static bool
13006 sparc_lra_p (void)
13007 {
13008 return TARGET_LRA;
13009 }
13010
13011 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13012 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13013
13014 const char *
13015 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13016 {
13017 char mulstr[32];
13018
13019 gcc_assert (! TARGET_ARCH64);
13020
13021 if (sparc_check_64 (operands[1], insn) <= 0)
13022 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13023 if (which_alternative == 1)
13024 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13025 if (GET_CODE (operands[2]) == CONST_INT)
13026 {
13027 if (which_alternative == 1)
13028 {
13029 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13030 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13031 output_asm_insn (mulstr, operands);
13032 return "srlx\t%L0, 32, %H0";
13033 }
13034 else
13035 {
13036 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13037 output_asm_insn ("or\t%L1, %3, %3", operands);
13038 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13039 output_asm_insn (mulstr, operands);
13040 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13041 return "mov\t%3, %L0";
13042 }
13043 }
13044 else if (rtx_equal_p (operands[1], operands[2]))
13045 {
13046 if (which_alternative == 1)
13047 {
13048 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13049 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13050 output_asm_insn (mulstr, operands);
13051 return "srlx\t%L0, 32, %H0";
13052 }
13053 else
13054 {
13055 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13056 output_asm_insn ("or\t%L1, %3, %3", operands);
13057 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13058 output_asm_insn (mulstr, operands);
13059 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13060 return "mov\t%3, %L0";
13061 }
13062 }
13063 if (sparc_check_64 (operands[2], insn) <= 0)
13064 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13065 if (which_alternative == 1)
13066 {
13067 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13068 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13069 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13070 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13071 output_asm_insn (mulstr, operands);
13072 return "srlx\t%L0, 32, %H0";
13073 }
13074 else
13075 {
13076 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13077 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13078 output_asm_insn ("or\t%L1, %3, %3", operands);
13079 output_asm_insn ("or\t%L2, %4, %4", operands);
13080 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13081 output_asm_insn (mulstr, operands);
13082 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13083 return "mov\t%3, %L0";
13084 }
13085 }
13086
13087 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13088 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13089 and INNER_MODE are the modes describing TARGET. */
13090
13091 static void
13092 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13093 machine_mode inner_mode)
13094 {
13095 rtx t1, final_insn, sel;
13096 int bmask;
13097
13098 t1 = gen_reg_rtx (mode);
13099
13100 elt = convert_modes (SImode, inner_mode, elt, true);
13101 emit_move_insn (gen_lowpart(SImode, t1), elt);
13102
13103 switch (mode)
13104 {
13105 case E_V2SImode:
13106 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13107 bmask = 0x45674567;
13108 break;
13109 case E_V4HImode:
13110 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13111 bmask = 0x67676767;
13112 break;
13113 case E_V8QImode:
13114 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13115 bmask = 0x77777777;
13116 break;
13117 default:
13118 gcc_unreachable ();
13119 }
13120
13121 sel = force_reg (SImode, GEN_INT (bmask));
13122 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13123 emit_insn (final_insn);
13124 }
13125
13126 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13127 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13128
13129 static void
13130 vector_init_fpmerge (rtx target, rtx elt)
13131 {
13132 rtx t1, t2, t2_low, t3, t3_low;
13133
13134 t1 = gen_reg_rtx (V4QImode);
13135 elt = convert_modes (SImode, QImode, elt, true);
13136 emit_move_insn (gen_lowpart (SImode, t1), elt);
13137
13138 t2 = gen_reg_rtx (V8QImode);
13139 t2_low = gen_lowpart (V4QImode, t2);
13140 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13141
13142 t3 = gen_reg_rtx (V8QImode);
13143 t3_low = gen_lowpart (V4QImode, t3);
13144 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13145
13146 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13147 }
13148
13149 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13150 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13151
13152 static void
13153 vector_init_faligndata (rtx target, rtx elt)
13154 {
13155 rtx t1 = gen_reg_rtx (V4HImode);
13156 int i;
13157
13158 elt = convert_modes (SImode, HImode, elt, true);
13159 emit_move_insn (gen_lowpart (SImode, t1), elt);
13160
13161 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13162 force_reg (SImode, GEN_INT (6)),
13163 const0_rtx));
13164
13165 for (i = 0; i < 4; i++)
13166 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13167 }
13168
13169 /* Emit code to initialize TARGET to values for individual fields VALS. */
13170
13171 void
13172 sparc_expand_vector_init (rtx target, rtx vals)
13173 {
13174 const machine_mode mode = GET_MODE (target);
13175 const machine_mode inner_mode = GET_MODE_INNER (mode);
13176 const int n_elts = GET_MODE_NUNITS (mode);
13177 int i, n_var = 0;
13178 bool all_same = true;
13179 rtx mem;
13180
13181 for (i = 0; i < n_elts; i++)
13182 {
13183 rtx x = XVECEXP (vals, 0, i);
13184 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13185 n_var++;
13186
13187 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13188 all_same = false;
13189 }
13190
13191 if (n_var == 0)
13192 {
13193 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13194 return;
13195 }
13196
13197 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13198 {
13199 if (GET_MODE_SIZE (inner_mode) == 4)
13200 {
13201 emit_move_insn (gen_lowpart (SImode, target),
13202 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13203 return;
13204 }
13205 else if (GET_MODE_SIZE (inner_mode) == 8)
13206 {
13207 emit_move_insn (gen_lowpart (DImode, target),
13208 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13209 return;
13210 }
13211 }
13212 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13213 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13214 {
13215 emit_move_insn (gen_highpart (word_mode, target),
13216 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13217 emit_move_insn (gen_lowpart (word_mode, target),
13218 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13219 return;
13220 }
13221
13222 if (all_same && GET_MODE_SIZE (mode) == 8)
13223 {
13224 if (TARGET_VIS2)
13225 {
13226 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13227 return;
13228 }
13229 if (mode == V8QImode)
13230 {
13231 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13232 return;
13233 }
13234 if (mode == V4HImode)
13235 {
13236 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13237 return;
13238 }
13239 }
13240
13241 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13242 for (i = 0; i < n_elts; i++)
13243 emit_move_insn (adjust_address_nv (mem, inner_mode,
13244 i * GET_MODE_SIZE (inner_mode)),
13245 XVECEXP (vals, 0, i));
13246 emit_move_insn (target, mem);
13247 }
13248
13249 /* Implement TARGET_SECONDARY_RELOAD. */
13250
13251 static reg_class_t
13252 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13253 machine_mode mode, secondary_reload_info *sri)
13254 {
13255 enum reg_class rclass = (enum reg_class) rclass_i;
13256
13257 sri->icode = CODE_FOR_nothing;
13258 sri->extra_cost = 0;
13259
13260 /* We need a temporary when loading/storing a HImode/QImode value
13261 between memory and the FPU registers. This can happen when combine puts
13262 a paradoxical subreg in a float/fix conversion insn. */
13263 if (FP_REG_CLASS_P (rclass)
13264 && (mode == HImode || mode == QImode)
13265 && (GET_CODE (x) == MEM
13266 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13267 && true_regnum (x) == -1)))
13268 return GENERAL_REGS;
13269
13270 /* On 32-bit we need a temporary when loading/storing a DFmode value
13271 between unaligned memory and the upper FPU registers. */
13272 if (TARGET_ARCH32
13273 && rclass == EXTRA_FP_REGS
13274 && mode == DFmode
13275 && GET_CODE (x) == MEM
13276 && ! mem_min_alignment (x, 8))
13277 return FP_REGS;
13278
13279 if (((TARGET_CM_MEDANY
13280 && symbolic_operand (x, mode))
13281 || (TARGET_CM_EMBMEDANY
13282 && text_segment_operand (x, mode)))
13283 && ! flag_pic)
13284 {
13285 if (in_p)
13286 sri->icode = direct_optab_handler (reload_in_optab, mode);
13287 else
13288 sri->icode = direct_optab_handler (reload_out_optab, mode);
13289 return NO_REGS;
13290 }
13291
13292 if (TARGET_VIS3 && TARGET_ARCH32)
13293 {
13294 int regno = true_regnum (x);
13295
13296 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13297 to move 8-byte values in 4-byte pieces. This only works via
13298 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13299 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13300 an FP_REGS intermediate move. */
13301 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13302 || ((general_or_i64_p (rclass)
13303 || rclass == GENERAL_OR_FP_REGS)
13304 && SPARC_FP_REG_P (regno)))
13305 {
13306 sri->extra_cost = 2;
13307 return FP_REGS;
13308 }
13309 }
13310
13311 return NO_REGS;
13312 }
13313
13314 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13315
13316 On SPARC when not VIS3 it is not possible to directly move data
13317 between GENERAL_REGS and FP_REGS. */
13318
13319 static bool
13320 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13321 reg_class_t class2)
13322 {
13323 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13324 && (! TARGET_VIS3
13325 || GET_MODE_SIZE (mode) > 8
13326 || GET_MODE_SIZE (mode) < 4));
13327 }
13328
13329 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13330
13331 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13332 because the movsi and movsf patterns don't handle r/f moves.
13333 For v8 we copy the default definition. */
13334
13335 static machine_mode
13336 sparc_secondary_memory_needed_mode (machine_mode mode)
13337 {
13338 if (TARGET_ARCH64)
13339 {
13340 if (GET_MODE_BITSIZE (mode) < 32)
13341 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13342 return mode;
13343 }
13344 else
13345 {
13346 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13347 return mode_for_size (BITS_PER_WORD,
13348 GET_MODE_CLASS (mode), 0).require ();
13349 return mode;
13350 }
13351 }
13352
13353 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13354 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13355
13356 bool
13357 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13358 {
13359 enum rtx_code rc = GET_CODE (operands[1]);
13360 machine_mode cmp_mode;
13361 rtx cc_reg, dst, cmp;
13362
13363 cmp = operands[1];
13364 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13365 return false;
13366
13367 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13368 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13369
13370 cmp_mode = GET_MODE (XEXP (cmp, 0));
13371 rc = GET_CODE (cmp);
13372
13373 dst = operands[0];
13374 if (! rtx_equal_p (operands[2], dst)
13375 && ! rtx_equal_p (operands[3], dst))
13376 {
13377 if (reg_overlap_mentioned_p (dst, cmp))
13378 dst = gen_reg_rtx (mode);
13379
13380 emit_move_insn (dst, operands[3]);
13381 }
13382 else if (operands[2] == dst)
13383 {
13384 operands[2] = operands[3];
13385
13386 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13387 rc = reverse_condition_maybe_unordered (rc);
13388 else
13389 rc = reverse_condition (rc);
13390 }
13391
13392 if (XEXP (cmp, 1) == const0_rtx
13393 && GET_CODE (XEXP (cmp, 0)) == REG
13394 && cmp_mode == DImode
13395 && v9_regcmp_p (rc))
13396 cc_reg = XEXP (cmp, 0);
13397 else
13398 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13399
13400 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13401
13402 emit_insn (gen_rtx_SET (dst,
13403 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13404
13405 if (dst != operands[0])
13406 emit_move_insn (operands[0], dst);
13407
13408 return true;
13409 }
13410
13411 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13412 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13413 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13414 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13415 code to be used for the condition mask. */
13416
13417 void
13418 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13419 {
13420 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13421 enum rtx_code code = GET_CODE (operands[3]);
13422
13423 mask = gen_reg_rtx (Pmode);
13424 cop0 = operands[4];
13425 cop1 = operands[5];
13426 if (code == LT || code == GE)
13427 {
13428 rtx t;
13429
13430 code = swap_condition (code);
13431 t = cop0; cop0 = cop1; cop1 = t;
13432 }
13433
13434 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13435
13436 fcmp = gen_rtx_UNSPEC (Pmode,
13437 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13438 fcode);
13439
13440 cmask = gen_rtx_UNSPEC (DImode,
13441 gen_rtvec (2, mask, gsr),
13442 ccode);
13443
13444 bshuf = gen_rtx_UNSPEC (mode,
13445 gen_rtvec (3, operands[1], operands[2], gsr),
13446 UNSPEC_BSHUFFLE);
13447
13448 emit_insn (gen_rtx_SET (mask, fcmp));
13449 emit_insn (gen_rtx_SET (gsr, cmask));
13450
13451 emit_insn (gen_rtx_SET (operands[0], bshuf));
13452 }
13453
13454 /* On sparc, any mode which naturally allocates into the float
13455 registers should return 4 here. */
13456
13457 unsigned int
13458 sparc_regmode_natural_size (machine_mode mode)
13459 {
13460 int size = UNITS_PER_WORD;
13461
13462 if (TARGET_ARCH64)
13463 {
13464 enum mode_class mclass = GET_MODE_CLASS (mode);
13465
13466 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13467 size = 4;
13468 }
13469
13470 return size;
13471 }
13472
13473 /* Implement TARGET_HARD_REGNO_NREGS.
13474
13475 On SPARC, ordinary registers hold 32 bits worth; this means both
13476 integer and floating point registers. On v9, integer regs hold 64
13477 bits worth; floating point regs hold 32 bits worth (this includes the
13478 new fp regs as even the odd ones are included in the hard register
13479 count). */
13480
13481 static unsigned int
13482 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13483 {
13484 if (regno == SPARC_GSR_REG)
13485 return 1;
13486 if (TARGET_ARCH64)
13487 {
13488 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13489 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13490 return CEIL (GET_MODE_SIZE (mode), 4);
13491 }
13492 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13493 }
13494
13495 /* Implement TARGET_HARD_REGNO_MODE_OK.
13496
13497 ??? Because of the funny way we pass parameters we should allow certain
13498 ??? types of float/complex values to be in integer registers during
13499 ??? RTL generation. This only matters on arch32. */
13500
13501 static bool
13502 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13503 {
13504 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13505 }
13506
13507 /* Implement TARGET_MODES_TIEABLE_P.
13508
13509 For V9 we have to deal with the fact that only the lower 32 floating
13510 point registers are 32-bit addressable. */
13511
13512 static bool
13513 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13514 {
13515 enum mode_class mclass1, mclass2;
13516 unsigned short size1, size2;
13517
13518 if (mode1 == mode2)
13519 return true;
13520
13521 mclass1 = GET_MODE_CLASS (mode1);
13522 mclass2 = GET_MODE_CLASS (mode2);
13523 if (mclass1 != mclass2)
13524 return false;
13525
13526 if (! TARGET_V9)
13527 return true;
13528
13529 /* Classes are the same and we are V9 so we have to deal with upper
13530 vs. lower floating point registers. If one of the modes is a
13531 4-byte mode, and the other is not, we have to mark them as not
13532 tieable because only the lower 32 floating point register are
13533 addressable 32-bits at a time.
13534
13535 We can't just test explicitly for SFmode, otherwise we won't
13536 cover the vector mode cases properly. */
13537
13538 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13539 return true;
13540
13541 size1 = GET_MODE_SIZE (mode1);
13542 size2 = GET_MODE_SIZE (mode2);
13543 if ((size1 > 4 && size2 == 4)
13544 || (size2 > 4 && size1 == 4))
13545 return false;
13546
13547 return true;
13548 }
13549
13550 /* Implement TARGET_CSTORE_MODE. */
13551
13552 static scalar_int_mode
13553 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13554 {
13555 return (TARGET_ARCH64 ? DImode : SImode);
13556 }
13557
13558 /* Return the compound expression made of T1 and T2. */
13559
13560 static inline tree
13561 compound_expr (tree t1, tree t2)
13562 {
13563 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13564 }
13565
13566 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13567
13568 static void
13569 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13570 {
13571 if (!TARGET_FPU)
13572 return;
13573
13574 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13575 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13576
13577 /* We generate the equivalent of feholdexcept (&fenv_var):
13578
13579 unsigned int fenv_var;
13580 __builtin_store_fsr (&fenv_var);
13581
13582 unsigned int tmp1_var;
13583 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13584
13585 __builtin_load_fsr (&tmp1_var); */
13586
13587 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13588 TREE_ADDRESSABLE (fenv_var) = 1;
13589 tree fenv_addr = build_fold_addr_expr (fenv_var);
13590 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13591 tree hold_stfsr
13592 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13593 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13594
13595 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13596 TREE_ADDRESSABLE (tmp1_var) = 1;
13597 tree masked_fenv_var
13598 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13599 build_int_cst (unsigned_type_node,
13600 ~(accrued_exception_mask | trap_enable_mask)));
13601 tree hold_mask
13602 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13603 NULL_TREE, NULL_TREE);
13604
13605 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13606 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13607 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13608
13609 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13610
13611 /* We reload the value of tmp1_var to clear the exceptions:
13612
13613 __builtin_load_fsr (&tmp1_var); */
13614
13615 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13616
13617 /* We generate the equivalent of feupdateenv (&fenv_var):
13618
13619 unsigned int tmp2_var;
13620 __builtin_store_fsr (&tmp2_var);
13621
13622 __builtin_load_fsr (&fenv_var);
13623
13624 if (SPARC_LOW_FE_EXCEPT_VALUES)
13625 tmp2_var >>= 5;
13626 __atomic_feraiseexcept ((int) tmp2_var); */
13627
13628 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13629 TREE_ADDRESSABLE (tmp2_var) = 1;
13630 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13631 tree update_stfsr
13632 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13633 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13634
13635 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13636
13637 tree atomic_feraiseexcept
13638 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13639 tree update_call
13640 = build_call_expr (atomic_feraiseexcept, 1,
13641 fold_convert (integer_type_node, tmp2_var));
13642
13643 if (SPARC_LOW_FE_EXCEPT_VALUES)
13644 {
13645 tree shifted_tmp2_var
13646 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13647 build_int_cst (unsigned_type_node, 5));
13648 tree update_shift
13649 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13650 update_call = compound_expr (update_shift, update_call);
13651 }
13652
13653 *update
13654 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13655 }
13656
13657 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13658
13659 SImode loads to floating-point registers are not zero-extended.
13660 The definition for LOAD_EXTEND_OP specifies that integer loads
13661 narrower than BITS_PER_WORD will be zero-extended. As a result,
13662 we inhibit changes from SImode unless they are to a mode that is
13663 identical in size.
13664
13665 Likewise for SFmode, since word-mode paradoxical subregs are
13666 problematic on big-endian architectures. */
13667
13668 static bool
13669 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13670 reg_class_t rclass)
13671 {
13672 if (TARGET_ARCH64
13673 && GET_MODE_SIZE (from) == 4
13674 && GET_MODE_SIZE (to) != 4)
13675 return !reg_classes_intersect_p (rclass, FP_REGS);
13676 return true;
13677 }
13678
13679 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13680
13681 static HOST_WIDE_INT
13682 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13683 {
13684 if (TREE_CODE (exp) == STRING_CST)
13685 return MAX (align, FASTEST_ALIGNMENT);
13686 return align;
13687 }
13688
13689 #include "gt-sparc.h"