]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Convert STARTING_FRAME_OFFSET to a hook
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
60
61 /* This file should be included last. */
62 #include "target-def.h"
63
64 /* Processor costs */
65
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
69
70 /* Integer signed load */
71 const int int_sload;
72
73 /* Integer zeroed load */
74 const int int_zload;
75
76 /* Float load */
77 const int float_load;
78
79 /* fmov, fneg, fabs */
80 const int float_move;
81
82 /* fadd, fsub */
83 const int float_plusminus;
84
85 /* fcmp */
86 const int float_cmp;
87
88 /* fmov, fmovr */
89 const int float_cmove;
90
91 /* fmul */
92 const int float_mul;
93
94 /* fdivs */
95 const int float_div_sf;
96
97 /* fdivd */
98 const int float_div_df;
99
100 /* fsqrts */
101 const int float_sqrt_sf;
102
103 /* fsqrtd */
104 const int float_sqrt_df;
105
106 /* umul/smul */
107 const int int_mul;
108
109 /* mulX */
110 const int int_mulX;
111
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
114
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
122
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
126
127 /* udiv/sdiv */
128 const int int_div;
129
130 /* divX */
131 const int int_divX;
132
133 /* movcc, movr */
134 const int int_cmove;
135
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
138 };
139
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
162 };
163
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
186 };
187
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
210 };
211
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
234 };
235
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
258 };
259
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
282 };
283
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
306 };
307
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
330 };
331
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
354 };
355
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
378 };
379
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
402 };
403
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
426 };
427
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
450 };
451
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
474 };
475
476 static const struct processor_costs *sparc_costs = &cypress_costs;
477
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
488
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
497
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
507
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
525
526 struct GTY(()) machine_function
527 {
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
530
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
534
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
540
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
543
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
549
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
552
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
555 };
556
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
565
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
568
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
573
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
576
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
598
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
604
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
676 reg_class_t);
677 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
678 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
679 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
680 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
681 static unsigned int sparc_min_arithmetic_precision (void);
682 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
683 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
684 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
685 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
686 reg_class_t);
687 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
688 \f
689 #ifdef SUBTARGET_ATTRIBUTE_TABLE
690 /* Table of valid machine attributes. */
691 static const struct attribute_spec sparc_attribute_table[] =
692 {
693 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
694 do_diagnostic } */
695 SUBTARGET_ATTRIBUTE_TABLE,
696 { NULL, 0, 0, false, false, false, NULL, false }
697 };
698 #endif
699 \f
700 /* Option handling. */
701
702 /* Parsed value. */
703 enum cmodel sparc_cmodel;
704
705 char sparc_hard_reg_printed[8];
706
707 /* Initialize the GCC target structure. */
708
709 /* The default is to use .half rather than .short for aligned HI objects. */
710 #undef TARGET_ASM_ALIGNED_HI_OP
711 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
712
713 #undef TARGET_ASM_UNALIGNED_HI_OP
714 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
715 #undef TARGET_ASM_UNALIGNED_SI_OP
716 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
717 #undef TARGET_ASM_UNALIGNED_DI_OP
718 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
719
720 /* The target hook has to handle DI-mode values. */
721 #undef TARGET_ASM_INTEGER
722 #define TARGET_ASM_INTEGER sparc_assemble_integer
723
724 #undef TARGET_ASM_FUNCTION_PROLOGUE
725 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
726 #undef TARGET_ASM_FUNCTION_EPILOGUE
727 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
728
729 #undef TARGET_SCHED_ADJUST_COST
730 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
731 #undef TARGET_SCHED_ISSUE_RATE
732 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
733 #undef TARGET_SCHED_INIT
734 #define TARGET_SCHED_INIT sparc_sched_init
735 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
736 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
737
738 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
739 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
740
741 #undef TARGET_INIT_LIBFUNCS
742 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
743
744 #undef TARGET_LEGITIMIZE_ADDRESS
745 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
746 #undef TARGET_DELEGITIMIZE_ADDRESS
747 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
748 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
749 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
750
751 #undef TARGET_INIT_BUILTINS
752 #define TARGET_INIT_BUILTINS sparc_init_builtins
753 #undef TARGET_BUILTIN_DECL
754 #define TARGET_BUILTIN_DECL sparc_builtin_decl
755 #undef TARGET_EXPAND_BUILTIN
756 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
757 #undef TARGET_FOLD_BUILTIN
758 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
759
760 #if TARGET_TLS
761 #undef TARGET_HAVE_TLS
762 #define TARGET_HAVE_TLS true
763 #endif
764
765 #undef TARGET_CANNOT_FORCE_CONST_MEM
766 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
767
768 #undef TARGET_ASM_OUTPUT_MI_THUNK
769 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
770 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
771 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
772
773 #undef TARGET_RTX_COSTS
774 #define TARGET_RTX_COSTS sparc_rtx_costs
775 #undef TARGET_ADDRESS_COST
776 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
777 #undef TARGET_REGISTER_MOVE_COST
778 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
779
780 #undef TARGET_PROMOTE_FUNCTION_MODE
781 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
782
783 #undef TARGET_FUNCTION_VALUE
784 #define TARGET_FUNCTION_VALUE sparc_function_value
785 #undef TARGET_LIBCALL_VALUE
786 #define TARGET_LIBCALL_VALUE sparc_libcall_value
787 #undef TARGET_FUNCTION_VALUE_REGNO_P
788 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
789
790 #undef TARGET_STRUCT_VALUE_RTX
791 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
792 #undef TARGET_RETURN_IN_MEMORY
793 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
794 #undef TARGET_MUST_PASS_IN_STACK
795 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
796 #undef TARGET_PASS_BY_REFERENCE
797 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
798 #undef TARGET_ARG_PARTIAL_BYTES
799 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
800 #undef TARGET_FUNCTION_ARG_ADVANCE
801 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
802 #undef TARGET_FUNCTION_ARG
803 #define TARGET_FUNCTION_ARG sparc_function_arg
804 #undef TARGET_FUNCTION_INCOMING_ARG
805 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
806 #undef TARGET_FUNCTION_ARG_PADDING
807 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
808 #undef TARGET_FUNCTION_ARG_BOUNDARY
809 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
810
811 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
812 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
813 #undef TARGET_STRICT_ARGUMENT_NAMING
814 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
815
816 #undef TARGET_EXPAND_BUILTIN_VA_START
817 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
818 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
819 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
820
821 #undef TARGET_VECTOR_MODE_SUPPORTED_P
822 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
823
824 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
825 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
826
827 #ifdef SUBTARGET_INSERT_ATTRIBUTES
828 #undef TARGET_INSERT_ATTRIBUTES
829 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
830 #endif
831
832 #ifdef SUBTARGET_ATTRIBUTE_TABLE
833 #undef TARGET_ATTRIBUTE_TABLE
834 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
835 #endif
836
837 #undef TARGET_OPTION_OVERRIDE
838 #define TARGET_OPTION_OVERRIDE sparc_option_override
839
840 #ifdef TARGET_THREAD_SSP_OFFSET
841 #undef TARGET_STACK_PROTECT_GUARD
842 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
843 #endif
844
845 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
846 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
847 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
848 #endif
849
850 #undef TARGET_ASM_FILE_END
851 #define TARGET_ASM_FILE_END sparc_file_end
852
853 #undef TARGET_FRAME_POINTER_REQUIRED
854 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
855
856 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
857 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
858
859 #undef TARGET_CAN_ELIMINATE
860 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
861
862 #undef TARGET_PREFERRED_RELOAD_CLASS
863 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
864
865 #undef TARGET_SECONDARY_RELOAD
866 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
867 #undef TARGET_SECONDARY_MEMORY_NEEDED
868 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
869 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
870 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
871
872 #undef TARGET_CONDITIONAL_REGISTER_USAGE
873 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
874
875 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
876 #undef TARGET_MANGLE_TYPE
877 #define TARGET_MANGLE_TYPE sparc_mangle_type
878 #endif
879
880 #undef TARGET_LRA_P
881 #define TARGET_LRA_P sparc_lra_p
882
883 #undef TARGET_LEGITIMATE_ADDRESS_P
884 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
885
886 #undef TARGET_LEGITIMATE_CONSTANT_P
887 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
888
889 #undef TARGET_TRAMPOLINE_INIT
890 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
891
892 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
893 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
894 #undef TARGET_PRINT_OPERAND
895 #define TARGET_PRINT_OPERAND sparc_print_operand
896 #undef TARGET_PRINT_OPERAND_ADDRESS
897 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
898
899 /* The value stored by LDSTUB. */
900 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
901 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
902
903 #undef TARGET_CSTORE_MODE
904 #define TARGET_CSTORE_MODE sparc_cstore_mode
905
906 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
907 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
908
909 #undef TARGET_FIXED_CONDITION_CODE_REGS
910 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
911
912 #undef TARGET_MIN_ARITHMETIC_PRECISION
913 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
914
915 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
916 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
917
918 #undef TARGET_HARD_REGNO_NREGS
919 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
920 #undef TARGET_HARD_REGNO_MODE_OK
921 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
922
923 #undef TARGET_MODES_TIEABLE_P
924 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
925
926 #undef TARGET_CAN_CHANGE_MODE_CLASS
927 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
928
929 #undef TARGET_CONSTANT_ALIGNMENT
930 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
931
932 struct gcc_target targetm = TARGET_INITIALIZER;
933
934 /* Return the memory reference contained in X if any, zero otherwise. */
935
936 static rtx
937 mem_ref (rtx x)
938 {
939 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
940 x = XEXP (x, 0);
941
942 if (MEM_P (x))
943 return x;
944
945 return NULL_RTX;
946 }
947
948 /* We use a machine specific pass to enable workarounds for errata.
949
950 We need to have the (essentially) final form of the insn stream in order
951 to properly detect the various hazards. Therefore, this machine specific
952 pass runs as late as possible. */
953
954 /* True if INSN is a md pattern or asm statement. */
955 #define USEFUL_INSN_P(INSN) \
956 (NONDEBUG_INSN_P (INSN) \
957 && GET_CODE (PATTERN (INSN)) != USE \
958 && GET_CODE (PATTERN (INSN)) != CLOBBER)
959
960 static unsigned int
961 sparc_do_work_around_errata (void)
962 {
963 rtx_insn *insn, *next;
964
965 /* Force all instructions to be split into their final form. */
966 split_all_insns_noflow ();
967
968 /* Now look for specific patterns in the insn stream. */
969 for (insn = get_insns (); insn; insn = next)
970 {
971 bool insert_nop = false;
972 rtx set;
973
974 /* Look into the instruction in a delay slot. */
975 if (NONJUMP_INSN_P (insn))
976 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
977 insn = seq->insn (1);
978
979 /* Look for either of these two sequences:
980
981 Sequence A:
982 1. store of word size or less (e.g. st / stb / sth / stf)
983 2. any single instruction that is not a load or store
984 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
985
986 Sequence B:
987 1. store of double word size (e.g. std / stdf)
988 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
989 if (sparc_fix_b2bst
990 && NONJUMP_INSN_P (insn)
991 && (set = single_set (insn)) != NULL_RTX
992 && MEM_P (SET_DEST (set)))
993 {
994 /* Sequence B begins with a double-word store. */
995 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
996 rtx_insn *after;
997 int i;
998
999 next = next_active_insn (insn);
1000 if (!next)
1001 break;
1002
1003 for (after = next, i = 0; i < 2; i++)
1004 {
1005 /* Skip empty assembly statements. */
1006 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1007 || (USEFUL_INSN_P (after)
1008 && (asm_noperands (PATTERN (after))>=0)
1009 && !strcmp (decode_asm_operands (PATTERN (after),
1010 NULL, NULL, NULL,
1011 NULL, NULL), "")))
1012 after = next_active_insn (after);
1013 if (!after)
1014 break;
1015
1016 /* If the insn is a branch, then it cannot be problematic. */
1017 if (!NONJUMP_INSN_P (after)
1018 || GET_CODE (PATTERN (after)) == SEQUENCE)
1019 break;
1020
1021 /* Sequence B is only two instructions long. */
1022 if (seq_b)
1023 {
1024 /* Add NOP if followed by a store. */
1025 if ((set = single_set (after)) != NULL_RTX
1026 && MEM_P (SET_DEST (set)))
1027 insert_nop = true;
1028
1029 /* Otherwise it is ok. */
1030 break;
1031 }
1032
1033 /* If the second instruction is a load or a store,
1034 then the sequence cannot be problematic. */
1035 if (i == 0)
1036 {
1037 if (((set = single_set (after)) != NULL_RTX)
1038 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1039 break;
1040
1041 after = next_active_insn (after);
1042 if (!after)
1043 break;
1044 }
1045
1046 /* Add NOP if third instruction is a store. */
1047 if (i == 1
1048 && ((set = single_set (after)) != NULL_RTX)
1049 && MEM_P (SET_DEST (set)))
1050 insert_nop = true;
1051 }
1052 }
1053 else
1054 /* Look for a single-word load into an odd-numbered FP register. */
1055 if (sparc_fix_at697f
1056 && NONJUMP_INSN_P (insn)
1057 && (set = single_set (insn)) != NULL_RTX
1058 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1059 && MEM_P (SET_SRC (set))
1060 && REG_P (SET_DEST (set))
1061 && REGNO (SET_DEST (set)) > 31
1062 && REGNO (SET_DEST (set)) % 2 != 0)
1063 {
1064 /* The wrong dependency is on the enclosing double register. */
1065 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1066 unsigned int src1, src2, dest;
1067 int code;
1068
1069 next = next_active_insn (insn);
1070 if (!next)
1071 break;
1072 /* If the insn is a branch, then it cannot be problematic. */
1073 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1074 continue;
1075
1076 extract_insn (next);
1077 code = INSN_CODE (next);
1078
1079 switch (code)
1080 {
1081 case CODE_FOR_adddf3:
1082 case CODE_FOR_subdf3:
1083 case CODE_FOR_muldf3:
1084 case CODE_FOR_divdf3:
1085 dest = REGNO (recog_data.operand[0]);
1086 src1 = REGNO (recog_data.operand[1]);
1087 src2 = REGNO (recog_data.operand[2]);
1088 if (src1 != src2)
1089 {
1090 /* Case [1-4]:
1091 ld [address], %fx+1
1092 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1093 if ((src1 == x || src2 == x)
1094 && (dest == src1 || dest == src2))
1095 insert_nop = true;
1096 }
1097 else
1098 {
1099 /* Case 5:
1100 ld [address], %fx+1
1101 FPOPd %fx, %fx, %fx */
1102 if (src1 == x
1103 && dest == src1
1104 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1105 insert_nop = true;
1106 }
1107 break;
1108
1109 case CODE_FOR_sqrtdf2:
1110 dest = REGNO (recog_data.operand[0]);
1111 src1 = REGNO (recog_data.operand[1]);
1112 /* Case 6:
1113 ld [address], %fx+1
1114 fsqrtd %fx, %fx */
1115 if (src1 == x && dest == src1)
1116 insert_nop = true;
1117 break;
1118
1119 default:
1120 break;
1121 }
1122 }
1123
1124 /* Look for a single-word load into an integer register. */
1125 else if (sparc_fix_ut699
1126 && NONJUMP_INSN_P (insn)
1127 && (set = single_set (insn)) != NULL_RTX
1128 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1129 && mem_ref (SET_SRC (set)) != NULL_RTX
1130 && REG_P (SET_DEST (set))
1131 && REGNO (SET_DEST (set)) < 32)
1132 {
1133 /* There is no problem if the second memory access has a data
1134 dependency on the first single-cycle load. */
1135 rtx x = SET_DEST (set);
1136
1137 next = next_active_insn (insn);
1138 if (!next)
1139 break;
1140 /* If the insn is a branch, then it cannot be problematic. */
1141 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1142 continue;
1143
1144 /* Look for a second memory access to/from an integer register. */
1145 if ((set = single_set (next)) != NULL_RTX)
1146 {
1147 rtx src = SET_SRC (set);
1148 rtx dest = SET_DEST (set);
1149 rtx mem;
1150
1151 /* LDD is affected. */
1152 if ((mem = mem_ref (src)) != NULL_RTX
1153 && REG_P (dest)
1154 && REGNO (dest) < 32
1155 && !reg_mentioned_p (x, XEXP (mem, 0)))
1156 insert_nop = true;
1157
1158 /* STD is *not* affected. */
1159 else if (MEM_P (dest)
1160 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1161 && (src == CONST0_RTX (GET_MODE (dest))
1162 || (REG_P (src)
1163 && REGNO (src) < 32
1164 && REGNO (src) != REGNO (x)))
1165 && !reg_mentioned_p (x, XEXP (dest, 0)))
1166 insert_nop = true;
1167 }
1168 }
1169
1170 /* Look for a single-word load/operation into an FP register. */
1171 else if (sparc_fix_ut699
1172 && NONJUMP_INSN_P (insn)
1173 && (set = single_set (insn)) != NULL_RTX
1174 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1175 && REG_P (SET_DEST (set))
1176 && REGNO (SET_DEST (set)) > 31)
1177 {
1178 /* Number of instructions in the problematic window. */
1179 const int n_insns = 4;
1180 /* The problematic combination is with the sibling FP register. */
1181 const unsigned int x = REGNO (SET_DEST (set));
1182 const unsigned int y = x ^ 1;
1183 rtx_insn *after;
1184 int i;
1185
1186 next = next_active_insn (insn);
1187 if (!next)
1188 break;
1189 /* If the insn is a branch, then it cannot be problematic. */
1190 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1191 continue;
1192
1193 /* Look for a second load/operation into the sibling FP register. */
1194 if (!((set = single_set (next)) != NULL_RTX
1195 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1196 && REG_P (SET_DEST (set))
1197 && REGNO (SET_DEST (set)) == y))
1198 continue;
1199
1200 /* Look for a (possible) store from the FP register in the next N
1201 instructions, but bail out if it is again modified or if there
1202 is a store from the sibling FP register before this store. */
1203 for (after = next, i = 0; i < n_insns; i++)
1204 {
1205 bool branch_p;
1206
1207 after = next_active_insn (after);
1208 if (!after)
1209 break;
1210
1211 /* This is a branch with an empty delay slot. */
1212 if (!NONJUMP_INSN_P (after))
1213 {
1214 if (++i == n_insns)
1215 break;
1216 branch_p = true;
1217 after = NULL;
1218 }
1219 /* This is a branch with a filled delay slot. */
1220 else if (rtx_sequence *seq =
1221 dyn_cast <rtx_sequence *> (PATTERN (after)))
1222 {
1223 if (++i == n_insns)
1224 break;
1225 branch_p = true;
1226 after = seq->insn (1);
1227 }
1228 /* This is a regular instruction. */
1229 else
1230 branch_p = false;
1231
1232 if (after && (set = single_set (after)) != NULL_RTX)
1233 {
1234 const rtx src = SET_SRC (set);
1235 const rtx dest = SET_DEST (set);
1236 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1237
1238 /* If the FP register is again modified before the store,
1239 then the store isn't affected. */
1240 if (REG_P (dest)
1241 && (REGNO (dest) == x
1242 || (REGNO (dest) == y && size == 8)))
1243 break;
1244
1245 if (MEM_P (dest) && REG_P (src))
1246 {
1247 /* If there is a store from the sibling FP register
1248 before the store, then the store is not affected. */
1249 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1250 break;
1251
1252 /* Otherwise, the store is affected. */
1253 if (REGNO (src) == x && size == 4)
1254 {
1255 insert_nop = true;
1256 break;
1257 }
1258 }
1259 }
1260
1261 /* If we have a branch in the first M instructions, then we
1262 cannot see the (M+2)th instruction so we play safe. */
1263 if (branch_p && i <= (n_insns - 2))
1264 {
1265 insert_nop = true;
1266 break;
1267 }
1268 }
1269 }
1270
1271 else
1272 next = NEXT_INSN (insn);
1273
1274 if (insert_nop)
1275 emit_insn_before (gen_nop (), next);
1276 }
1277
1278 return 0;
1279 }
1280
1281 namespace {
1282
1283 const pass_data pass_data_work_around_errata =
1284 {
1285 RTL_PASS, /* type */
1286 "errata", /* name */
1287 OPTGROUP_NONE, /* optinfo_flags */
1288 TV_MACH_DEP, /* tv_id */
1289 0, /* properties_required */
1290 0, /* properties_provided */
1291 0, /* properties_destroyed */
1292 0, /* todo_flags_start */
1293 0, /* todo_flags_finish */
1294 };
1295
1296 class pass_work_around_errata : public rtl_opt_pass
1297 {
1298 public:
1299 pass_work_around_errata(gcc::context *ctxt)
1300 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1301 {}
1302
1303 /* opt_pass methods: */
1304 virtual bool gate (function *)
1305 {
1306 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1307 }
1308
1309 virtual unsigned int execute (function *)
1310 {
1311 return sparc_do_work_around_errata ();
1312 }
1313
1314 }; // class pass_work_around_errata
1315
1316 } // anon namespace
1317
1318 rtl_opt_pass *
1319 make_pass_work_around_errata (gcc::context *ctxt)
1320 {
1321 return new pass_work_around_errata (ctxt);
1322 }
1323
1324 /* Helpers for TARGET_DEBUG_OPTIONS. */
1325 static void
1326 dump_target_flag_bits (const int flags)
1327 {
1328 if (flags & MASK_64BIT)
1329 fprintf (stderr, "64BIT ");
1330 if (flags & MASK_APP_REGS)
1331 fprintf (stderr, "APP_REGS ");
1332 if (flags & MASK_FASTER_STRUCTS)
1333 fprintf (stderr, "FASTER_STRUCTS ");
1334 if (flags & MASK_FLAT)
1335 fprintf (stderr, "FLAT ");
1336 if (flags & MASK_FMAF)
1337 fprintf (stderr, "FMAF ");
1338 if (flags & MASK_FSMULD)
1339 fprintf (stderr, "FSMULD ");
1340 if (flags & MASK_FPU)
1341 fprintf (stderr, "FPU ");
1342 if (flags & MASK_HARD_QUAD)
1343 fprintf (stderr, "HARD_QUAD ");
1344 if (flags & MASK_POPC)
1345 fprintf (stderr, "POPC ");
1346 if (flags & MASK_PTR64)
1347 fprintf (stderr, "PTR64 ");
1348 if (flags & MASK_STACK_BIAS)
1349 fprintf (stderr, "STACK_BIAS ");
1350 if (flags & MASK_UNALIGNED_DOUBLES)
1351 fprintf (stderr, "UNALIGNED_DOUBLES ");
1352 if (flags & MASK_V8PLUS)
1353 fprintf (stderr, "V8PLUS ");
1354 if (flags & MASK_VIS)
1355 fprintf (stderr, "VIS ");
1356 if (flags & MASK_VIS2)
1357 fprintf (stderr, "VIS2 ");
1358 if (flags & MASK_VIS3)
1359 fprintf (stderr, "VIS3 ");
1360 if (flags & MASK_VIS4)
1361 fprintf (stderr, "VIS4 ");
1362 if (flags & MASK_VIS4B)
1363 fprintf (stderr, "VIS4B ");
1364 if (flags & MASK_CBCOND)
1365 fprintf (stderr, "CBCOND ");
1366 if (flags & MASK_DEPRECATED_V8_INSNS)
1367 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1368 if (flags & MASK_SPARCLET)
1369 fprintf (stderr, "SPARCLET ");
1370 if (flags & MASK_SPARCLITE)
1371 fprintf (stderr, "SPARCLITE ");
1372 if (flags & MASK_V8)
1373 fprintf (stderr, "V8 ");
1374 if (flags & MASK_V9)
1375 fprintf (stderr, "V9 ");
1376 }
1377
1378 static void
1379 dump_target_flags (const char *prefix, const int flags)
1380 {
1381 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1382 dump_target_flag_bits (flags);
1383 fprintf(stderr, "]\n");
1384 }
1385
1386 /* Validate and override various options, and do some machine dependent
1387 initialization. */
1388
1389 static void
1390 sparc_option_override (void)
1391 {
1392 static struct code_model {
1393 const char *const name;
1394 const enum cmodel value;
1395 } const cmodels[] = {
1396 { "32", CM_32 },
1397 { "medlow", CM_MEDLOW },
1398 { "medmid", CM_MEDMID },
1399 { "medany", CM_MEDANY },
1400 { "embmedany", CM_EMBMEDANY },
1401 { NULL, (enum cmodel) 0 }
1402 };
1403 const struct code_model *cmodel;
1404 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1405 static struct cpu_default {
1406 const int cpu;
1407 const enum processor_type processor;
1408 } const cpu_default[] = {
1409 /* There must be one entry here for each TARGET_CPU value. */
1410 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1411 { TARGET_CPU_v8, PROCESSOR_V8 },
1412 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1413 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1414 { TARGET_CPU_leon, PROCESSOR_LEON },
1415 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1416 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1417 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1418 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1419 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1420 { TARGET_CPU_v9, PROCESSOR_V9 },
1421 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1422 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1423 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1424 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1425 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1426 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1427 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1428 { TARGET_CPU_m8, PROCESSOR_M8 },
1429 { -1, PROCESSOR_V7 }
1430 };
1431 const struct cpu_default *def;
1432 /* Table of values for -m{cpu,tune}=. This must match the order of
1433 the enum processor_type in sparc-opts.h. */
1434 static struct cpu_table {
1435 const char *const name;
1436 const int disable;
1437 const int enable;
1438 } const cpu_table[] = {
1439 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1440 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1441 { "v8", MASK_ISA, MASK_V8 },
1442 /* TI TMS390Z55 supersparc */
1443 { "supersparc", MASK_ISA, MASK_V8 },
1444 { "hypersparc", MASK_ISA, MASK_V8 },
1445 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1446 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1447 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1448 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1449 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1450 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1451 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1452 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1453 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1454 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1455 /* TEMIC sparclet */
1456 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1457 { "v9", MASK_ISA, MASK_V9 },
1458 /* UltraSPARC I, II, IIi */
1459 { "ultrasparc", MASK_ISA,
1460 /* Although insns using %y are deprecated, it is a clear win. */
1461 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1462 /* UltraSPARC III */
1463 /* ??? Check if %y issue still holds true. */
1464 { "ultrasparc3", MASK_ISA,
1465 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1466 /* UltraSPARC T1 */
1467 { "niagara", MASK_ISA,
1468 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1469 /* UltraSPARC T2 */
1470 { "niagara2", MASK_ISA,
1471 MASK_V9|MASK_POPC|MASK_VIS2 },
1472 /* UltraSPARC T3 */
1473 { "niagara3", MASK_ISA,
1474 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1475 /* UltraSPARC T4 */
1476 { "niagara4", MASK_ISA,
1477 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1478 /* UltraSPARC M7 */
1479 { "niagara7", MASK_ISA,
1480 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1481 /* UltraSPARC M8 */
1482 { "m8", MASK_ISA,
1483 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1484 };
1485 const struct cpu_table *cpu;
1486 unsigned int i;
1487
1488 if (sparc_debug_string != NULL)
1489 {
1490 const char *q;
1491 char *p;
1492
1493 p = ASTRDUP (sparc_debug_string);
1494 while ((q = strtok (p, ",")) != NULL)
1495 {
1496 bool invert;
1497 int mask;
1498
1499 p = NULL;
1500 if (*q == '!')
1501 {
1502 invert = true;
1503 q++;
1504 }
1505 else
1506 invert = false;
1507
1508 if (! strcmp (q, "all"))
1509 mask = MASK_DEBUG_ALL;
1510 else if (! strcmp (q, "options"))
1511 mask = MASK_DEBUG_OPTIONS;
1512 else
1513 error ("unknown -mdebug-%s switch", q);
1514
1515 if (invert)
1516 sparc_debug &= ~mask;
1517 else
1518 sparc_debug |= mask;
1519 }
1520 }
1521
1522 /* Enable the FsMULd instruction by default if not explicitly specified by
1523 the user. It may be later disabled by the CPU (explicitly or not). */
1524 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1525 target_flags |= MASK_FSMULD;
1526
1527 if (TARGET_DEBUG_OPTIONS)
1528 {
1529 dump_target_flags("Initial target_flags", target_flags);
1530 dump_target_flags("target_flags_explicit", target_flags_explicit);
1531 }
1532
1533 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1534 SUBTARGET_OVERRIDE_OPTIONS;
1535 #endif
1536
1537 #ifndef SPARC_BI_ARCH
1538 /* Check for unsupported architecture size. */
1539 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1540 error ("%s is not supported by this configuration",
1541 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1542 #endif
1543
1544 /* We force all 64bit archs to use 128 bit long double */
1545 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1546 {
1547 error ("-mlong-double-64 not allowed with -m64");
1548 target_flags |= MASK_LONG_DOUBLE_128;
1549 }
1550
1551 /* Code model selection. */
1552 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1553
1554 #ifdef SPARC_BI_ARCH
1555 if (TARGET_ARCH32)
1556 sparc_cmodel = CM_32;
1557 #endif
1558
1559 if (sparc_cmodel_string != NULL)
1560 {
1561 if (TARGET_ARCH64)
1562 {
1563 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1564 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1565 break;
1566 if (cmodel->name == NULL)
1567 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1568 else
1569 sparc_cmodel = cmodel->value;
1570 }
1571 else
1572 error ("-mcmodel= is not supported on 32-bit systems");
1573 }
1574
1575 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1576 for (i = 8; i < 16; i++)
1577 if (!call_used_regs [i])
1578 {
1579 error ("-fcall-saved-REG is not supported for out registers");
1580 call_used_regs [i] = 1;
1581 }
1582
1583 /* Set the default CPU if no -mcpu option was specified. */
1584 if (!global_options_set.x_sparc_cpu_and_features)
1585 {
1586 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1587 if (def->cpu == TARGET_CPU_DEFAULT)
1588 break;
1589 gcc_assert (def->cpu != -1);
1590 sparc_cpu_and_features = def->processor;
1591 }
1592
1593 /* Set the default CPU if no -mtune option was specified. */
1594 if (!global_options_set.x_sparc_cpu)
1595 sparc_cpu = sparc_cpu_and_features;
1596
1597 cpu = &cpu_table[(int) sparc_cpu_and_features];
1598
1599 if (TARGET_DEBUG_OPTIONS)
1600 {
1601 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1602 dump_target_flags ("cpu->disable", cpu->disable);
1603 dump_target_flags ("cpu->enable", cpu->enable);
1604 }
1605
1606 target_flags &= ~cpu->disable;
1607 target_flags |= (cpu->enable
1608 #ifndef HAVE_AS_FMAF_HPC_VIS3
1609 & ~(MASK_FMAF | MASK_VIS3)
1610 #endif
1611 #ifndef HAVE_AS_SPARC4
1612 & ~MASK_CBCOND
1613 #endif
1614 #ifndef HAVE_AS_SPARC5_VIS4
1615 & ~(MASK_VIS4 | MASK_SUBXC)
1616 #endif
1617 #ifndef HAVE_AS_SPARC6
1618 & ~(MASK_VIS4B)
1619 #endif
1620 #ifndef HAVE_AS_LEON
1621 & ~(MASK_LEON | MASK_LEON3)
1622 #endif
1623 & ~(target_flags_explicit & MASK_FEATURES)
1624 );
1625
1626 /* -mvis2 implies -mvis. */
1627 if (TARGET_VIS2)
1628 target_flags |= MASK_VIS;
1629
1630 /* -mvis3 implies -mvis2 and -mvis. */
1631 if (TARGET_VIS3)
1632 target_flags |= MASK_VIS2 | MASK_VIS;
1633
1634 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1635 if (TARGET_VIS4)
1636 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1637
1638 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1639 if (TARGET_VIS4B)
1640 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1641
1642 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1643 FPU is disabled. */
1644 if (!TARGET_FPU)
1645 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1646 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1647
1648 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1649 are available; -m64 also implies v9. */
1650 if (TARGET_VIS || TARGET_ARCH64)
1651 {
1652 target_flags |= MASK_V9;
1653 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1654 }
1655
1656 /* -mvis also implies -mv8plus on 32-bit. */
1657 if (TARGET_VIS && !TARGET_ARCH64)
1658 target_flags |= MASK_V8PLUS;
1659
1660 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1661 if (TARGET_V9 && TARGET_ARCH32)
1662 target_flags |= MASK_DEPRECATED_V8_INSNS;
1663
1664 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1665 if (!TARGET_V9 || TARGET_ARCH64)
1666 target_flags &= ~MASK_V8PLUS;
1667
1668 /* Don't use stack biasing in 32-bit mode. */
1669 if (TARGET_ARCH32)
1670 target_flags &= ~MASK_STACK_BIAS;
1671
1672 /* Use LRA instead of reload, unless otherwise instructed. */
1673 if (!(target_flags_explicit & MASK_LRA))
1674 target_flags |= MASK_LRA;
1675
1676 /* Enable the back-to-back store errata workaround for LEON3FT. */
1677 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1678 sparc_fix_b2bst = 1;
1679
1680 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1681 if (sparc_fix_ut699)
1682 target_flags &= ~MASK_FSMULD;
1683
1684 /* Supply a default value for align_functions. */
1685 if (align_functions == 0)
1686 {
1687 if (sparc_cpu == PROCESSOR_ULTRASPARC
1688 || sparc_cpu == PROCESSOR_ULTRASPARC3
1689 || sparc_cpu == PROCESSOR_NIAGARA
1690 || sparc_cpu == PROCESSOR_NIAGARA2
1691 || sparc_cpu == PROCESSOR_NIAGARA3
1692 || sparc_cpu == PROCESSOR_NIAGARA4)
1693 align_functions = 32;
1694 else if (sparc_cpu == PROCESSOR_NIAGARA7
1695 || sparc_cpu == PROCESSOR_M8)
1696 align_functions = 64;
1697 }
1698
1699 /* Validate PCC_STRUCT_RETURN. */
1700 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1701 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1702
1703 /* Only use .uaxword when compiling for a 64-bit target. */
1704 if (!TARGET_ARCH64)
1705 targetm.asm_out.unaligned_op.di = NULL;
1706
1707 /* Do various machine dependent initializations. */
1708 sparc_init_modes ();
1709
1710 /* Set up function hooks. */
1711 init_machine_status = sparc_init_machine_status;
1712
1713 switch (sparc_cpu)
1714 {
1715 case PROCESSOR_V7:
1716 case PROCESSOR_CYPRESS:
1717 sparc_costs = &cypress_costs;
1718 break;
1719 case PROCESSOR_V8:
1720 case PROCESSOR_SPARCLITE:
1721 case PROCESSOR_SUPERSPARC:
1722 sparc_costs = &supersparc_costs;
1723 break;
1724 case PROCESSOR_F930:
1725 case PROCESSOR_F934:
1726 case PROCESSOR_HYPERSPARC:
1727 case PROCESSOR_SPARCLITE86X:
1728 sparc_costs = &hypersparc_costs;
1729 break;
1730 case PROCESSOR_LEON:
1731 sparc_costs = &leon_costs;
1732 break;
1733 case PROCESSOR_LEON3:
1734 case PROCESSOR_LEON3V7:
1735 sparc_costs = &leon3_costs;
1736 break;
1737 case PROCESSOR_SPARCLET:
1738 case PROCESSOR_TSC701:
1739 sparc_costs = &sparclet_costs;
1740 break;
1741 case PROCESSOR_V9:
1742 case PROCESSOR_ULTRASPARC:
1743 sparc_costs = &ultrasparc_costs;
1744 break;
1745 case PROCESSOR_ULTRASPARC3:
1746 sparc_costs = &ultrasparc3_costs;
1747 break;
1748 case PROCESSOR_NIAGARA:
1749 sparc_costs = &niagara_costs;
1750 break;
1751 case PROCESSOR_NIAGARA2:
1752 sparc_costs = &niagara2_costs;
1753 break;
1754 case PROCESSOR_NIAGARA3:
1755 sparc_costs = &niagara3_costs;
1756 break;
1757 case PROCESSOR_NIAGARA4:
1758 sparc_costs = &niagara4_costs;
1759 break;
1760 case PROCESSOR_NIAGARA7:
1761 sparc_costs = &niagara7_costs;
1762 break;
1763 case PROCESSOR_M8:
1764 sparc_costs = &m8_costs;
1765 break;
1766 case PROCESSOR_NATIVE:
1767 gcc_unreachable ();
1768 };
1769
1770 if (sparc_memory_model == SMM_DEFAULT)
1771 {
1772 /* Choose the memory model for the operating system. */
1773 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1774 if (os_default != SMM_DEFAULT)
1775 sparc_memory_model = os_default;
1776 /* Choose the most relaxed model for the processor. */
1777 else if (TARGET_V9)
1778 sparc_memory_model = SMM_RMO;
1779 else if (TARGET_LEON3)
1780 sparc_memory_model = SMM_TSO;
1781 else if (TARGET_LEON)
1782 sparc_memory_model = SMM_SC;
1783 else if (TARGET_V8)
1784 sparc_memory_model = SMM_PSO;
1785 else
1786 sparc_memory_model = SMM_SC;
1787 }
1788
1789 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1790 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1791 target_flags |= MASK_LONG_DOUBLE_128;
1792 #endif
1793
1794 if (TARGET_DEBUG_OPTIONS)
1795 dump_target_flags ("Final target_flags", target_flags);
1796
1797 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1798 can run at the same time. More important, it is the threshold
1799 defining when additional prefetches will be dropped by the
1800 hardware.
1801
1802 The UltraSPARC-III features a documented prefetch queue with a
1803 size of 8. Additional prefetches issued in the cpu are
1804 dropped.
1805
1806 Niagara processors are different. In these processors prefetches
1807 are handled much like regular loads. The L1 miss buffer is 32
1808 entries, but prefetches start getting affected when 30 entries
1809 become occupied. That occupation could be a mix of regular loads
1810 and prefetches though. And that buffer is shared by all threads.
1811 Once the threshold is reached, if the core is running a single
1812 thread the prefetch will retry. If more than one thread is
1813 running, the prefetch will be dropped.
1814
1815 All this makes it very difficult to determine how many
1816 simultaneous prefetches can be issued simultaneously, even in a
1817 single-threaded program. Experimental results show that setting
1818 this parameter to 32 works well when the number of threads is not
1819 high. */
1820 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1821 ((sparc_cpu == PROCESSOR_ULTRASPARC
1822 || sparc_cpu == PROCESSOR_NIAGARA
1823 || sparc_cpu == PROCESSOR_NIAGARA2
1824 || sparc_cpu == PROCESSOR_NIAGARA3
1825 || sparc_cpu == PROCESSOR_NIAGARA4)
1826 ? 2
1827 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1828 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1829 || sparc_cpu == PROCESSOR_M8)
1830 ? 32 : 3))),
1831 global_options.x_param_values,
1832 global_options_set.x_param_values);
1833
1834 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1835 bytes.
1836
1837 The Oracle SPARC Architecture (previously the UltraSPARC
1838 Architecture) specification states that when a PREFETCH[A]
1839 instruction is executed an implementation-specific amount of data
1840 is prefetched, and that it is at least 64 bytes long (aligned to
1841 at least 64 bytes).
1842
1843 However, this is not correct. The M7 (and implementations prior
1844 to that) does not guarantee a 64B prefetch into a cache if the
1845 line size is smaller. A single cache line is all that is ever
1846 prefetched. So for the M7, where the L1D$ has 32B lines and the
1847 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1848 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1849 is a read_n prefetch, which is the only type which allocates to
1850 the L1.) */
1851 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1852 (sparc_cpu == PROCESSOR_M8
1853 ? 64 : 32),
1854 global_options.x_param_values,
1855 global_options_set.x_param_values);
1856
1857 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1858 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1859 Niagara processors feature a L1D$ of 16KB. */
1860 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1861 ((sparc_cpu == PROCESSOR_ULTRASPARC
1862 || sparc_cpu == PROCESSOR_ULTRASPARC3
1863 || sparc_cpu == PROCESSOR_NIAGARA
1864 || sparc_cpu == PROCESSOR_NIAGARA2
1865 || sparc_cpu == PROCESSOR_NIAGARA3
1866 || sparc_cpu == PROCESSOR_NIAGARA4
1867 || sparc_cpu == PROCESSOR_NIAGARA7
1868 || sparc_cpu == PROCESSOR_M8)
1869 ? 16 : 64),
1870 global_options.x_param_values,
1871 global_options_set.x_param_values);
1872
1873
1874 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1875 that 512 is the default in params.def. */
1876 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1877 ((sparc_cpu == PROCESSOR_NIAGARA4
1878 || sparc_cpu == PROCESSOR_M8)
1879 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1880 ? 256 : 512)),
1881 global_options.x_param_values,
1882 global_options_set.x_param_values);
1883
1884
1885 /* Disable save slot sharing for call-clobbered registers by default.
1886 The IRA sharing algorithm works on single registers only and this
1887 pessimizes for double floating-point registers. */
1888 if (!global_options_set.x_flag_ira_share_save_slots)
1889 flag_ira_share_save_slots = 0;
1890
1891 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1892 redundant 32-to-64-bit extensions. */
1893 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1894 flag_ree = 0;
1895 }
1896 \f
1897 /* Miscellaneous utilities. */
1898
1899 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1900 or branch on register contents instructions. */
1901
1902 int
1903 v9_regcmp_p (enum rtx_code code)
1904 {
1905 return (code == EQ || code == NE || code == GE || code == LT
1906 || code == LE || code == GT);
1907 }
1908
1909 /* Nonzero if OP is a floating point constant which can
1910 be loaded into an integer register using a single
1911 sethi instruction. */
1912
1913 int
1914 fp_sethi_p (rtx op)
1915 {
1916 if (GET_CODE (op) == CONST_DOUBLE)
1917 {
1918 long i;
1919
1920 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1921 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1922 }
1923
1924 return 0;
1925 }
1926
1927 /* Nonzero if OP is a floating point constant which can
1928 be loaded into an integer register using a single
1929 mov instruction. */
1930
1931 int
1932 fp_mov_p (rtx op)
1933 {
1934 if (GET_CODE (op) == CONST_DOUBLE)
1935 {
1936 long i;
1937
1938 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1939 return SPARC_SIMM13_P (i);
1940 }
1941
1942 return 0;
1943 }
1944
1945 /* Nonzero if OP is a floating point constant which can
1946 be loaded into an integer register using a high/losum
1947 instruction sequence. */
1948
1949 int
1950 fp_high_losum_p (rtx op)
1951 {
1952 /* The constraints calling this should only be in
1953 SFmode move insns, so any constant which cannot
1954 be moved using a single insn will do. */
1955 if (GET_CODE (op) == CONST_DOUBLE)
1956 {
1957 long i;
1958
1959 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1960 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1961 }
1962
1963 return 0;
1964 }
1965
1966 /* Return true if the address of LABEL can be loaded by means of the
1967 mov{si,di}_pic_label_ref patterns in PIC mode. */
1968
1969 static bool
1970 can_use_mov_pic_label_ref (rtx label)
1971 {
1972 /* VxWorks does not impose a fixed gap between segments; the run-time
1973 gap can be different from the object-file gap. We therefore can't
1974 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1975 are absolutely sure that X is in the same segment as the GOT.
1976 Unfortunately, the flexibility of linker scripts means that we
1977 can't be sure of that in general, so assume that GOT-relative
1978 accesses are never valid on VxWorks. */
1979 if (TARGET_VXWORKS_RTP)
1980 return false;
1981
1982 /* Similarly, if the label is non-local, it might end up being placed
1983 in a different section than the current one; now mov_pic_label_ref
1984 requires the label and the code to be in the same section. */
1985 if (LABEL_REF_NONLOCAL_P (label))
1986 return false;
1987
1988 /* Finally, if we are reordering basic blocks and partition into hot
1989 and cold sections, this might happen for any label. */
1990 if (flag_reorder_blocks_and_partition)
1991 return false;
1992
1993 return true;
1994 }
1995
1996 /* Expand a move instruction. Return true if all work is done. */
1997
1998 bool
1999 sparc_expand_move (machine_mode mode, rtx *operands)
2000 {
2001 /* Handle sets of MEM first. */
2002 if (GET_CODE (operands[0]) == MEM)
2003 {
2004 /* 0 is a register (or a pair of registers) on SPARC. */
2005 if (register_or_zero_operand (operands[1], mode))
2006 return false;
2007
2008 if (!reload_in_progress)
2009 {
2010 operands[0] = validize_mem (operands[0]);
2011 operands[1] = force_reg (mode, operands[1]);
2012 }
2013 }
2014
2015 /* Fixup TLS cases. */
2016 if (TARGET_HAVE_TLS
2017 && CONSTANT_P (operands[1])
2018 && sparc_tls_referenced_p (operands [1]))
2019 {
2020 operands[1] = sparc_legitimize_tls_address (operands[1]);
2021 return false;
2022 }
2023
2024 /* Fixup PIC cases. */
2025 if (flag_pic && CONSTANT_P (operands[1]))
2026 {
2027 if (pic_address_needs_scratch (operands[1]))
2028 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2029
2030 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2031 if (GET_CODE (operands[1]) == LABEL_REF
2032 && can_use_mov_pic_label_ref (operands[1]))
2033 {
2034 if (mode == SImode)
2035 {
2036 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2037 return true;
2038 }
2039
2040 if (mode == DImode)
2041 {
2042 gcc_assert (TARGET_ARCH64);
2043 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2044 return true;
2045 }
2046 }
2047
2048 if (symbolic_operand (operands[1], mode))
2049 {
2050 operands[1]
2051 = sparc_legitimize_pic_address (operands[1],
2052 reload_in_progress
2053 ? operands[0] : NULL_RTX);
2054 return false;
2055 }
2056 }
2057
2058 /* If we are trying to toss an integer constant into FP registers,
2059 or loading a FP or vector constant, force it into memory. */
2060 if (CONSTANT_P (operands[1])
2061 && REG_P (operands[0])
2062 && (SPARC_FP_REG_P (REGNO (operands[0]))
2063 || SCALAR_FLOAT_MODE_P (mode)
2064 || VECTOR_MODE_P (mode)))
2065 {
2066 /* emit_group_store will send such bogosity to us when it is
2067 not storing directly into memory. So fix this up to avoid
2068 crashes in output_constant_pool. */
2069 if (operands [1] == const0_rtx)
2070 operands[1] = CONST0_RTX (mode);
2071
2072 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2073 always other regs. */
2074 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2075 && (const_zero_operand (operands[1], mode)
2076 || const_all_ones_operand (operands[1], mode)))
2077 return false;
2078
2079 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2080 /* We are able to build any SF constant in integer registers
2081 with at most 2 instructions. */
2082 && (mode == SFmode
2083 /* And any DF constant in integer registers if needed. */
2084 || (mode == DFmode && !can_create_pseudo_p ())))
2085 return false;
2086
2087 operands[1] = force_const_mem (mode, operands[1]);
2088 if (!reload_in_progress)
2089 operands[1] = validize_mem (operands[1]);
2090 return false;
2091 }
2092
2093 /* Accept non-constants and valid constants unmodified. */
2094 if (!CONSTANT_P (operands[1])
2095 || GET_CODE (operands[1]) == HIGH
2096 || input_operand (operands[1], mode))
2097 return false;
2098
2099 switch (mode)
2100 {
2101 case E_QImode:
2102 /* All QImode constants require only one insn, so proceed. */
2103 break;
2104
2105 case E_HImode:
2106 case E_SImode:
2107 sparc_emit_set_const32 (operands[0], operands[1]);
2108 return true;
2109
2110 case E_DImode:
2111 /* input_operand should have filtered out 32-bit mode. */
2112 sparc_emit_set_const64 (operands[0], operands[1]);
2113 return true;
2114
2115 case E_TImode:
2116 {
2117 rtx high, low;
2118 /* TImode isn't available in 32-bit mode. */
2119 split_double (operands[1], &high, &low);
2120 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2121 high));
2122 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2123 low));
2124 }
2125 return true;
2126
2127 default:
2128 gcc_unreachable ();
2129 }
2130
2131 return false;
2132 }
2133
2134 /* Load OP1, a 32-bit constant, into OP0, a register.
2135 We know it can't be done in one insn when we get
2136 here, the move expander guarantees this. */
2137
2138 static void
2139 sparc_emit_set_const32 (rtx op0, rtx op1)
2140 {
2141 machine_mode mode = GET_MODE (op0);
2142 rtx temp = op0;
2143
2144 if (can_create_pseudo_p ())
2145 temp = gen_reg_rtx (mode);
2146
2147 if (GET_CODE (op1) == CONST_INT)
2148 {
2149 gcc_assert (!small_int_operand (op1, mode)
2150 && !const_high_operand (op1, mode));
2151
2152 /* Emit them as real moves instead of a HIGH/LO_SUM,
2153 this way CSE can see everything and reuse intermediate
2154 values if it wants. */
2155 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2156 & ~(HOST_WIDE_INT) 0x3ff)));
2157
2158 emit_insn (gen_rtx_SET (op0,
2159 gen_rtx_IOR (mode, temp,
2160 GEN_INT (INTVAL (op1) & 0x3ff))));
2161 }
2162 else
2163 {
2164 /* A symbol, emit in the traditional way. */
2165 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2166 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2167 }
2168 }
2169
2170 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2171 If TEMP is nonzero, we are forbidden to use any other scratch
2172 registers. Otherwise, we are allowed to generate them as needed.
2173
2174 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2175 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2176
2177 void
2178 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2179 {
2180 rtx cst, temp1, temp2, temp3, temp4, temp5;
2181 rtx ti_temp = 0;
2182
2183 /* Deal with too large offsets. */
2184 if (GET_CODE (op1) == CONST
2185 && GET_CODE (XEXP (op1, 0)) == PLUS
2186 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2187 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2188 {
2189 gcc_assert (!temp);
2190 temp1 = gen_reg_rtx (DImode);
2191 temp2 = gen_reg_rtx (DImode);
2192 sparc_emit_set_const64 (temp2, cst);
2193 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2194 NULL_RTX);
2195 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2196 return;
2197 }
2198
2199 if (temp && GET_MODE (temp) == TImode)
2200 {
2201 ti_temp = temp;
2202 temp = gen_rtx_REG (DImode, REGNO (temp));
2203 }
2204
2205 /* SPARC-V9 code-model support. */
2206 switch (sparc_cmodel)
2207 {
2208 case CM_MEDLOW:
2209 /* The range spanned by all instructions in the object is less
2210 than 2^31 bytes (2GB) and the distance from any instruction
2211 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2212 than 2^31 bytes (2GB).
2213
2214 The executable must be in the low 4TB of the virtual address
2215 space.
2216
2217 sethi %hi(symbol), %temp1
2218 or %temp1, %lo(symbol), %reg */
2219 if (temp)
2220 temp1 = temp; /* op0 is allowed. */
2221 else
2222 temp1 = gen_reg_rtx (DImode);
2223
2224 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2225 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2226 break;
2227
2228 case CM_MEDMID:
2229 /* The range spanned by all instructions in the object is less
2230 than 2^31 bytes (2GB) and the distance from any instruction
2231 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2232 than 2^31 bytes (2GB).
2233
2234 The executable must be in the low 16TB of the virtual address
2235 space.
2236
2237 sethi %h44(symbol), %temp1
2238 or %temp1, %m44(symbol), %temp2
2239 sllx %temp2, 12, %temp3
2240 or %temp3, %l44(symbol), %reg */
2241 if (temp)
2242 {
2243 temp1 = op0;
2244 temp2 = op0;
2245 temp3 = temp; /* op0 is allowed. */
2246 }
2247 else
2248 {
2249 temp1 = gen_reg_rtx (DImode);
2250 temp2 = gen_reg_rtx (DImode);
2251 temp3 = gen_reg_rtx (DImode);
2252 }
2253
2254 emit_insn (gen_seth44 (temp1, op1));
2255 emit_insn (gen_setm44 (temp2, temp1, op1));
2256 emit_insn (gen_rtx_SET (temp3,
2257 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2258 emit_insn (gen_setl44 (op0, temp3, op1));
2259 break;
2260
2261 case CM_MEDANY:
2262 /* The range spanned by all instructions in the object is less
2263 than 2^31 bytes (2GB) and the distance from any instruction
2264 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2265 than 2^31 bytes (2GB).
2266
2267 The executable can be placed anywhere in the virtual address
2268 space.
2269
2270 sethi %hh(symbol), %temp1
2271 sethi %lm(symbol), %temp2
2272 or %temp1, %hm(symbol), %temp3
2273 sllx %temp3, 32, %temp4
2274 or %temp4, %temp2, %temp5
2275 or %temp5, %lo(symbol), %reg */
2276 if (temp)
2277 {
2278 /* It is possible that one of the registers we got for operands[2]
2279 might coincide with that of operands[0] (which is why we made
2280 it TImode). Pick the other one to use as our scratch. */
2281 if (rtx_equal_p (temp, op0))
2282 {
2283 gcc_assert (ti_temp);
2284 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2285 }
2286 temp1 = op0;
2287 temp2 = temp; /* op0 is _not_ allowed, see above. */
2288 temp3 = op0;
2289 temp4 = op0;
2290 temp5 = op0;
2291 }
2292 else
2293 {
2294 temp1 = gen_reg_rtx (DImode);
2295 temp2 = gen_reg_rtx (DImode);
2296 temp3 = gen_reg_rtx (DImode);
2297 temp4 = gen_reg_rtx (DImode);
2298 temp5 = gen_reg_rtx (DImode);
2299 }
2300
2301 emit_insn (gen_sethh (temp1, op1));
2302 emit_insn (gen_setlm (temp2, op1));
2303 emit_insn (gen_sethm (temp3, temp1, op1));
2304 emit_insn (gen_rtx_SET (temp4,
2305 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2306 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2307 emit_insn (gen_setlo (op0, temp5, op1));
2308 break;
2309
2310 case CM_EMBMEDANY:
2311 /* Old old old backwards compatibility kruft here.
2312 Essentially it is MEDLOW with a fixed 64-bit
2313 virtual base added to all data segment addresses.
2314 Text-segment stuff is computed like MEDANY, we can't
2315 reuse the code above because the relocation knobs
2316 look different.
2317
2318 Data segment: sethi %hi(symbol), %temp1
2319 add %temp1, EMBMEDANY_BASE_REG, %temp2
2320 or %temp2, %lo(symbol), %reg */
2321 if (data_segment_operand (op1, GET_MODE (op1)))
2322 {
2323 if (temp)
2324 {
2325 temp1 = temp; /* op0 is allowed. */
2326 temp2 = op0;
2327 }
2328 else
2329 {
2330 temp1 = gen_reg_rtx (DImode);
2331 temp2 = gen_reg_rtx (DImode);
2332 }
2333
2334 emit_insn (gen_embmedany_sethi (temp1, op1));
2335 emit_insn (gen_embmedany_brsum (temp2, temp1));
2336 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2337 }
2338
2339 /* Text segment: sethi %uhi(symbol), %temp1
2340 sethi %hi(symbol), %temp2
2341 or %temp1, %ulo(symbol), %temp3
2342 sllx %temp3, 32, %temp4
2343 or %temp4, %temp2, %temp5
2344 or %temp5, %lo(symbol), %reg */
2345 else
2346 {
2347 if (temp)
2348 {
2349 /* It is possible that one of the registers we got for operands[2]
2350 might coincide with that of operands[0] (which is why we made
2351 it TImode). Pick the other one to use as our scratch. */
2352 if (rtx_equal_p (temp, op0))
2353 {
2354 gcc_assert (ti_temp);
2355 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2356 }
2357 temp1 = op0;
2358 temp2 = temp; /* op0 is _not_ allowed, see above. */
2359 temp3 = op0;
2360 temp4 = op0;
2361 temp5 = op0;
2362 }
2363 else
2364 {
2365 temp1 = gen_reg_rtx (DImode);
2366 temp2 = gen_reg_rtx (DImode);
2367 temp3 = gen_reg_rtx (DImode);
2368 temp4 = gen_reg_rtx (DImode);
2369 temp5 = gen_reg_rtx (DImode);
2370 }
2371
2372 emit_insn (gen_embmedany_textuhi (temp1, op1));
2373 emit_insn (gen_embmedany_texthi (temp2, op1));
2374 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2375 emit_insn (gen_rtx_SET (temp4,
2376 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2377 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2378 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2379 }
2380 break;
2381
2382 default:
2383 gcc_unreachable ();
2384 }
2385 }
2386
2387 /* These avoid problems when cross compiling. If we do not
2388 go through all this hair then the optimizer will see
2389 invalid REG_EQUAL notes or in some cases none at all. */
2390 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2391 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2392 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2393 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2394
2395 /* The optimizer is not to assume anything about exactly
2396 which bits are set for a HIGH, they are unspecified.
2397 Unfortunately this leads to many missed optimizations
2398 during CSE. We mask out the non-HIGH bits, and matches
2399 a plain movdi, to alleviate this problem. */
2400 static rtx
2401 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2402 {
2403 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2404 }
2405
2406 static rtx
2407 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2408 {
2409 return gen_rtx_SET (dest, GEN_INT (val));
2410 }
2411
2412 static rtx
2413 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2414 {
2415 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2416 }
2417
2418 static rtx
2419 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2420 {
2421 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2422 }
2423
2424 /* Worker routines for 64-bit constant formation on arch64.
2425 One of the key things to be doing in these emissions is
2426 to create as many temp REGs as possible. This makes it
2427 possible for half-built constants to be used later when
2428 such values are similar to something required later on.
2429 Without doing this, the optimizer cannot see such
2430 opportunities. */
2431
2432 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2433 unsigned HOST_WIDE_INT, int);
2434
2435 static void
2436 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2437 unsigned HOST_WIDE_INT low_bits, int is_neg)
2438 {
2439 unsigned HOST_WIDE_INT high_bits;
2440
2441 if (is_neg)
2442 high_bits = (~low_bits) & 0xffffffff;
2443 else
2444 high_bits = low_bits;
2445
2446 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2447 if (!is_neg)
2448 {
2449 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2450 }
2451 else
2452 {
2453 /* If we are XOR'ing with -1, then we should emit a one's complement
2454 instead. This way the combiner will notice logical operations
2455 such as ANDN later on and substitute. */
2456 if ((low_bits & 0x3ff) == 0x3ff)
2457 {
2458 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2459 }
2460 else
2461 {
2462 emit_insn (gen_rtx_SET (op0,
2463 gen_safe_XOR64 (temp,
2464 (-(HOST_WIDE_INT)0x400
2465 | (low_bits & 0x3ff)))));
2466 }
2467 }
2468 }
2469
2470 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2471 unsigned HOST_WIDE_INT, int);
2472
2473 static void
2474 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2475 unsigned HOST_WIDE_INT high_bits,
2476 unsigned HOST_WIDE_INT low_immediate,
2477 int shift_count)
2478 {
2479 rtx temp2 = op0;
2480
2481 if ((high_bits & 0xfffffc00) != 0)
2482 {
2483 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2484 if ((high_bits & ~0xfffffc00) != 0)
2485 emit_insn (gen_rtx_SET (op0,
2486 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2487 else
2488 temp2 = temp;
2489 }
2490 else
2491 {
2492 emit_insn (gen_safe_SET64 (temp, high_bits));
2493 temp2 = temp;
2494 }
2495
2496 /* Now shift it up into place. */
2497 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2498 GEN_INT (shift_count))));
2499
2500 /* If there is a low immediate part piece, finish up by
2501 putting that in as well. */
2502 if (low_immediate != 0)
2503 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2504 }
2505
2506 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2507 unsigned HOST_WIDE_INT);
2508
2509 /* Full 64-bit constant decomposition. Even though this is the
2510 'worst' case, we still optimize a few things away. */
2511 static void
2512 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2513 unsigned HOST_WIDE_INT high_bits,
2514 unsigned HOST_WIDE_INT low_bits)
2515 {
2516 rtx sub_temp = op0;
2517
2518 if (can_create_pseudo_p ())
2519 sub_temp = gen_reg_rtx (DImode);
2520
2521 if ((high_bits & 0xfffffc00) != 0)
2522 {
2523 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2524 if ((high_bits & ~0xfffffc00) != 0)
2525 emit_insn (gen_rtx_SET (sub_temp,
2526 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2527 else
2528 sub_temp = temp;
2529 }
2530 else
2531 {
2532 emit_insn (gen_safe_SET64 (temp, high_bits));
2533 sub_temp = temp;
2534 }
2535
2536 if (can_create_pseudo_p ())
2537 {
2538 rtx temp2 = gen_reg_rtx (DImode);
2539 rtx temp3 = gen_reg_rtx (DImode);
2540 rtx temp4 = gen_reg_rtx (DImode);
2541
2542 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2543 GEN_INT (32))));
2544
2545 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2546 if ((low_bits & ~0xfffffc00) != 0)
2547 {
2548 emit_insn (gen_rtx_SET (temp3,
2549 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2550 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2551 }
2552 else
2553 {
2554 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2555 }
2556 }
2557 else
2558 {
2559 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2560 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2561 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2562 int to_shift = 12;
2563
2564 /* We are in the middle of reload, so this is really
2565 painful. However we do still make an attempt to
2566 avoid emitting truly stupid code. */
2567 if (low1 != const0_rtx)
2568 {
2569 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2570 GEN_INT (to_shift))));
2571 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2572 sub_temp = op0;
2573 to_shift = 12;
2574 }
2575 else
2576 {
2577 to_shift += 12;
2578 }
2579 if (low2 != const0_rtx)
2580 {
2581 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2582 GEN_INT (to_shift))));
2583 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2584 sub_temp = op0;
2585 to_shift = 8;
2586 }
2587 else
2588 {
2589 to_shift += 8;
2590 }
2591 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2592 GEN_INT (to_shift))));
2593 if (low3 != const0_rtx)
2594 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2595 /* phew... */
2596 }
2597 }
2598
2599 /* Analyze a 64-bit constant for certain properties. */
2600 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2601 unsigned HOST_WIDE_INT,
2602 int *, int *, int *);
2603
2604 static void
2605 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2606 unsigned HOST_WIDE_INT low_bits,
2607 int *hbsp, int *lbsp, int *abbasp)
2608 {
2609 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2610 int i;
2611
2612 lowest_bit_set = highest_bit_set = -1;
2613 i = 0;
2614 do
2615 {
2616 if ((lowest_bit_set == -1)
2617 && ((low_bits >> i) & 1))
2618 lowest_bit_set = i;
2619 if ((highest_bit_set == -1)
2620 && ((high_bits >> (32 - i - 1)) & 1))
2621 highest_bit_set = (64 - i - 1);
2622 }
2623 while (++i < 32
2624 && ((highest_bit_set == -1)
2625 || (lowest_bit_set == -1)));
2626 if (i == 32)
2627 {
2628 i = 0;
2629 do
2630 {
2631 if ((lowest_bit_set == -1)
2632 && ((high_bits >> i) & 1))
2633 lowest_bit_set = i + 32;
2634 if ((highest_bit_set == -1)
2635 && ((low_bits >> (32 - i - 1)) & 1))
2636 highest_bit_set = 32 - i - 1;
2637 }
2638 while (++i < 32
2639 && ((highest_bit_set == -1)
2640 || (lowest_bit_set == -1)));
2641 }
2642 /* If there are no bits set this should have gone out
2643 as one instruction! */
2644 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2645 all_bits_between_are_set = 1;
2646 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2647 {
2648 if (i < 32)
2649 {
2650 if ((low_bits & (1 << i)) != 0)
2651 continue;
2652 }
2653 else
2654 {
2655 if ((high_bits & (1 << (i - 32))) != 0)
2656 continue;
2657 }
2658 all_bits_between_are_set = 0;
2659 break;
2660 }
2661 *hbsp = highest_bit_set;
2662 *lbsp = lowest_bit_set;
2663 *abbasp = all_bits_between_are_set;
2664 }
2665
2666 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2667
2668 static int
2669 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2670 unsigned HOST_WIDE_INT low_bits)
2671 {
2672 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2673
2674 if (high_bits == 0
2675 || high_bits == 0xffffffff)
2676 return 1;
2677
2678 analyze_64bit_constant (high_bits, low_bits,
2679 &highest_bit_set, &lowest_bit_set,
2680 &all_bits_between_are_set);
2681
2682 if ((highest_bit_set == 63
2683 || lowest_bit_set == 0)
2684 && all_bits_between_are_set != 0)
2685 return 1;
2686
2687 if ((highest_bit_set - lowest_bit_set) < 21)
2688 return 1;
2689
2690 return 0;
2691 }
2692
2693 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2694 unsigned HOST_WIDE_INT,
2695 int, int);
2696
2697 static unsigned HOST_WIDE_INT
2698 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2699 unsigned HOST_WIDE_INT low_bits,
2700 int lowest_bit_set, int shift)
2701 {
2702 HOST_WIDE_INT hi, lo;
2703
2704 if (lowest_bit_set < 32)
2705 {
2706 lo = (low_bits >> lowest_bit_set) << shift;
2707 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2708 }
2709 else
2710 {
2711 lo = 0;
2712 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2713 }
2714 gcc_assert (! (hi & lo));
2715 return (hi | lo);
2716 }
2717
2718 /* Here we are sure to be arch64 and this is an integer constant
2719 being loaded into a register. Emit the most efficient
2720 insn sequence possible. Detection of all the 1-insn cases
2721 has been done already. */
2722 static void
2723 sparc_emit_set_const64 (rtx op0, rtx op1)
2724 {
2725 unsigned HOST_WIDE_INT high_bits, low_bits;
2726 int lowest_bit_set, highest_bit_set;
2727 int all_bits_between_are_set;
2728 rtx temp = 0;
2729
2730 /* Sanity check that we know what we are working with. */
2731 gcc_assert (TARGET_ARCH64
2732 && (GET_CODE (op0) == SUBREG
2733 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2734
2735 if (! can_create_pseudo_p ())
2736 temp = op0;
2737
2738 if (GET_CODE (op1) != CONST_INT)
2739 {
2740 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2741 return;
2742 }
2743
2744 if (! temp)
2745 temp = gen_reg_rtx (DImode);
2746
2747 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2748 low_bits = (INTVAL (op1) & 0xffffffff);
2749
2750 /* low_bits bits 0 --> 31
2751 high_bits bits 32 --> 63 */
2752
2753 analyze_64bit_constant (high_bits, low_bits,
2754 &highest_bit_set, &lowest_bit_set,
2755 &all_bits_between_are_set);
2756
2757 /* First try for a 2-insn sequence. */
2758
2759 /* These situations are preferred because the optimizer can
2760 * do more things with them:
2761 * 1) mov -1, %reg
2762 * sllx %reg, shift, %reg
2763 * 2) mov -1, %reg
2764 * srlx %reg, shift, %reg
2765 * 3) mov some_small_const, %reg
2766 * sllx %reg, shift, %reg
2767 */
2768 if (((highest_bit_set == 63
2769 || lowest_bit_set == 0)
2770 && all_bits_between_are_set != 0)
2771 || ((highest_bit_set - lowest_bit_set) < 12))
2772 {
2773 HOST_WIDE_INT the_const = -1;
2774 int shift = lowest_bit_set;
2775
2776 if ((highest_bit_set != 63
2777 && lowest_bit_set != 0)
2778 || all_bits_between_are_set == 0)
2779 {
2780 the_const =
2781 create_simple_focus_bits (high_bits, low_bits,
2782 lowest_bit_set, 0);
2783 }
2784 else if (lowest_bit_set == 0)
2785 shift = -(63 - highest_bit_set);
2786
2787 gcc_assert (SPARC_SIMM13_P (the_const));
2788 gcc_assert (shift != 0);
2789
2790 emit_insn (gen_safe_SET64 (temp, the_const));
2791 if (shift > 0)
2792 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2793 GEN_INT (shift))));
2794 else if (shift < 0)
2795 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2796 GEN_INT (-shift))));
2797 return;
2798 }
2799
2800 /* Now a range of 22 or less bits set somewhere.
2801 * 1) sethi %hi(focus_bits), %reg
2802 * sllx %reg, shift, %reg
2803 * 2) sethi %hi(focus_bits), %reg
2804 * srlx %reg, shift, %reg
2805 */
2806 if ((highest_bit_set - lowest_bit_set) < 21)
2807 {
2808 unsigned HOST_WIDE_INT focus_bits =
2809 create_simple_focus_bits (high_bits, low_bits,
2810 lowest_bit_set, 10);
2811
2812 gcc_assert (SPARC_SETHI_P (focus_bits));
2813 gcc_assert (lowest_bit_set != 10);
2814
2815 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2816
2817 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2818 if (lowest_bit_set < 10)
2819 emit_insn (gen_rtx_SET (op0,
2820 gen_rtx_LSHIFTRT (DImode, temp,
2821 GEN_INT (10 - lowest_bit_set))));
2822 else if (lowest_bit_set > 10)
2823 emit_insn (gen_rtx_SET (op0,
2824 gen_rtx_ASHIFT (DImode, temp,
2825 GEN_INT (lowest_bit_set - 10))));
2826 return;
2827 }
2828
2829 /* 1) sethi %hi(low_bits), %reg
2830 * or %reg, %lo(low_bits), %reg
2831 * 2) sethi %hi(~low_bits), %reg
2832 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2833 */
2834 if (high_bits == 0
2835 || high_bits == 0xffffffff)
2836 {
2837 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2838 (high_bits == 0xffffffff));
2839 return;
2840 }
2841
2842 /* Now, try 3-insn sequences. */
2843
2844 /* 1) sethi %hi(high_bits), %reg
2845 * or %reg, %lo(high_bits), %reg
2846 * sllx %reg, 32, %reg
2847 */
2848 if (low_bits == 0)
2849 {
2850 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2851 return;
2852 }
2853
2854 /* We may be able to do something quick
2855 when the constant is negated, so try that. */
2856 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2857 (~low_bits) & 0xfffffc00))
2858 {
2859 /* NOTE: The trailing bits get XOR'd so we need the
2860 non-negated bits, not the negated ones. */
2861 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2862
2863 if ((((~high_bits) & 0xffffffff) == 0
2864 && ((~low_bits) & 0x80000000) == 0)
2865 || (((~high_bits) & 0xffffffff) == 0xffffffff
2866 && ((~low_bits) & 0x80000000) != 0))
2867 {
2868 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2869
2870 if ((SPARC_SETHI_P (fast_int)
2871 && (~high_bits & 0xffffffff) == 0)
2872 || SPARC_SIMM13_P (fast_int))
2873 emit_insn (gen_safe_SET64 (temp, fast_int));
2874 else
2875 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2876 }
2877 else
2878 {
2879 rtx negated_const;
2880 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2881 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2882 sparc_emit_set_const64 (temp, negated_const);
2883 }
2884
2885 /* If we are XOR'ing with -1, then we should emit a one's complement
2886 instead. This way the combiner will notice logical operations
2887 such as ANDN later on and substitute. */
2888 if (trailing_bits == 0x3ff)
2889 {
2890 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2891 }
2892 else
2893 {
2894 emit_insn (gen_rtx_SET (op0,
2895 gen_safe_XOR64 (temp,
2896 (-0x400 | trailing_bits))));
2897 }
2898 return;
2899 }
2900
2901 /* 1) sethi %hi(xxx), %reg
2902 * or %reg, %lo(xxx), %reg
2903 * sllx %reg, yyy, %reg
2904 *
2905 * ??? This is just a generalized version of the low_bits==0
2906 * thing above, FIXME...
2907 */
2908 if ((highest_bit_set - lowest_bit_set) < 32)
2909 {
2910 unsigned HOST_WIDE_INT focus_bits =
2911 create_simple_focus_bits (high_bits, low_bits,
2912 lowest_bit_set, 0);
2913
2914 /* We can't get here in this state. */
2915 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2916
2917 /* So what we know is that the set bits straddle the
2918 middle of the 64-bit word. */
2919 sparc_emit_set_const64_quick2 (op0, temp,
2920 focus_bits, 0,
2921 lowest_bit_set);
2922 return;
2923 }
2924
2925 /* 1) sethi %hi(high_bits), %reg
2926 * or %reg, %lo(high_bits), %reg
2927 * sllx %reg, 32, %reg
2928 * or %reg, low_bits, %reg
2929 */
2930 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2931 {
2932 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2933 return;
2934 }
2935
2936 /* The easiest way when all else fails, is full decomposition. */
2937 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2938 }
2939
2940 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2941
2942 static bool
2943 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2944 {
2945 *p1 = SPARC_ICC_REG;
2946 *p2 = SPARC_FCC_REG;
2947 return true;
2948 }
2949
2950 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2951
2952 static unsigned int
2953 sparc_min_arithmetic_precision (void)
2954 {
2955 return 32;
2956 }
2957
2958 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2959 return the mode to be used for the comparison. For floating-point,
2960 CCFP[E]mode is used. CCNZmode should be used when the first operand
2961 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2962 processing is needed. */
2963
2964 machine_mode
2965 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2966 {
2967 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2968 {
2969 switch (op)
2970 {
2971 case EQ:
2972 case NE:
2973 case UNORDERED:
2974 case ORDERED:
2975 case UNLT:
2976 case UNLE:
2977 case UNGT:
2978 case UNGE:
2979 case UNEQ:
2980 case LTGT:
2981 return CCFPmode;
2982
2983 case LT:
2984 case LE:
2985 case GT:
2986 case GE:
2987 return CCFPEmode;
2988
2989 default:
2990 gcc_unreachable ();
2991 }
2992 }
2993 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2994 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2995 && y == const0_rtx)
2996 {
2997 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2998 return CCXNZmode;
2999 else
3000 return CCNZmode;
3001 }
3002 else
3003 {
3004 /* This is for the cmp<mode>_sne pattern. */
3005 if (GET_CODE (x) == NOT && y == constm1_rtx)
3006 {
3007 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3008 return CCXCmode;
3009 else
3010 return CCCmode;
3011 }
3012
3013 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3014 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3015 {
3016 if (GET_CODE (y) == UNSPEC
3017 && (XINT (y, 1) == UNSPEC_ADDV
3018 || XINT (y, 1) == UNSPEC_SUBV
3019 || XINT (y, 1) == UNSPEC_NEGV))
3020 return CCVmode;
3021 else
3022 return CCCmode;
3023 }
3024
3025 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3026 return CCXmode;
3027 else
3028 return CCmode;
3029 }
3030 }
3031
3032 /* Emit the compare insn and return the CC reg for a CODE comparison
3033 with operands X and Y. */
3034
3035 static rtx
3036 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3037 {
3038 machine_mode mode;
3039 rtx cc_reg;
3040
3041 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3042 return x;
3043
3044 mode = SELECT_CC_MODE (code, x, y);
3045
3046 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3047 fcc regs (cse can't tell they're really call clobbered regs and will
3048 remove a duplicate comparison even if there is an intervening function
3049 call - it will then try to reload the cc reg via an int reg which is why
3050 we need the movcc patterns). It is possible to provide the movcc
3051 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3052 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3053 to tell cse that CCFPE mode registers (even pseudos) are call
3054 clobbered. */
3055
3056 /* ??? This is an experiment. Rather than making changes to cse which may
3057 or may not be easy/clean, we do our own cse. This is possible because
3058 we will generate hard registers. Cse knows they're call clobbered (it
3059 doesn't know the same thing about pseudos). If we guess wrong, no big
3060 deal, but if we win, great! */
3061
3062 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3063 #if 1 /* experiment */
3064 {
3065 int reg;
3066 /* We cycle through the registers to ensure they're all exercised. */
3067 static int next_fcc_reg = 0;
3068 /* Previous x,y for each fcc reg. */
3069 static rtx prev_args[4][2];
3070
3071 /* Scan prev_args for x,y. */
3072 for (reg = 0; reg < 4; reg++)
3073 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3074 break;
3075 if (reg == 4)
3076 {
3077 reg = next_fcc_reg;
3078 prev_args[reg][0] = x;
3079 prev_args[reg][1] = y;
3080 next_fcc_reg = (next_fcc_reg + 1) & 3;
3081 }
3082 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3083 }
3084 #else
3085 cc_reg = gen_reg_rtx (mode);
3086 #endif /* ! experiment */
3087 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3088 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3089 else
3090 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3091
3092 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3093 will only result in an unrecognizable insn so no point in asserting. */
3094 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3095
3096 return cc_reg;
3097 }
3098
3099
3100 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3101
3102 rtx
3103 gen_compare_reg (rtx cmp)
3104 {
3105 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3106 }
3107
3108 /* This function is used for v9 only.
3109 DEST is the target of the Scc insn.
3110 CODE is the code for an Scc's comparison.
3111 X and Y are the values we compare.
3112
3113 This function is needed to turn
3114
3115 (set (reg:SI 110)
3116 (gt (reg:CCX 100 %icc)
3117 (const_int 0)))
3118 into
3119 (set (reg:SI 110)
3120 (gt:DI (reg:CCX 100 %icc)
3121 (const_int 0)))
3122
3123 IE: The instruction recognizer needs to see the mode of the comparison to
3124 find the right instruction. We could use "gt:DI" right in the
3125 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3126
3127 static int
3128 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3129 {
3130 if (! TARGET_ARCH64
3131 && (GET_MODE (x) == DImode
3132 || GET_MODE (dest) == DImode))
3133 return 0;
3134
3135 /* Try to use the movrCC insns. */
3136 if (TARGET_ARCH64
3137 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3138 && y == const0_rtx
3139 && v9_regcmp_p (compare_code))
3140 {
3141 rtx op0 = x;
3142 rtx temp;
3143
3144 /* Special case for op0 != 0. This can be done with one instruction if
3145 dest == x. */
3146
3147 if (compare_code == NE
3148 && GET_MODE (dest) == DImode
3149 && rtx_equal_p (op0, dest))
3150 {
3151 emit_insn (gen_rtx_SET (dest,
3152 gen_rtx_IF_THEN_ELSE (DImode,
3153 gen_rtx_fmt_ee (compare_code, DImode,
3154 op0, const0_rtx),
3155 const1_rtx,
3156 dest)));
3157 return 1;
3158 }
3159
3160 if (reg_overlap_mentioned_p (dest, op0))
3161 {
3162 /* Handle the case where dest == x.
3163 We "early clobber" the result. */
3164 op0 = gen_reg_rtx (GET_MODE (x));
3165 emit_move_insn (op0, x);
3166 }
3167
3168 emit_insn (gen_rtx_SET (dest, const0_rtx));
3169 if (GET_MODE (op0) != DImode)
3170 {
3171 temp = gen_reg_rtx (DImode);
3172 convert_move (temp, op0, 0);
3173 }
3174 else
3175 temp = op0;
3176 emit_insn (gen_rtx_SET (dest,
3177 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3178 gen_rtx_fmt_ee (compare_code, DImode,
3179 temp, const0_rtx),
3180 const1_rtx,
3181 dest)));
3182 return 1;
3183 }
3184 else
3185 {
3186 x = gen_compare_reg_1 (compare_code, x, y);
3187 y = const0_rtx;
3188
3189 emit_insn (gen_rtx_SET (dest, const0_rtx));
3190 emit_insn (gen_rtx_SET (dest,
3191 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3192 gen_rtx_fmt_ee (compare_code,
3193 GET_MODE (x), x, y),
3194 const1_rtx, dest)));
3195 return 1;
3196 }
3197 }
3198
3199
3200 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3201 without jumps using the addx/subx instructions. */
3202
3203 bool
3204 emit_scc_insn (rtx operands[])
3205 {
3206 rtx tem, x, y;
3207 enum rtx_code code;
3208 machine_mode mode;
3209
3210 /* The quad-word fp compare library routines all return nonzero to indicate
3211 true, which is different from the equivalent libgcc routines, so we must
3212 handle them specially here. */
3213 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3214 {
3215 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3216 GET_CODE (operands[1]));
3217 operands[2] = XEXP (operands[1], 0);
3218 operands[3] = XEXP (operands[1], 1);
3219 }
3220
3221 code = GET_CODE (operands[1]);
3222 x = operands[2];
3223 y = operands[3];
3224 mode = GET_MODE (x);
3225
3226 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3227 more applications). The exception to this is "reg != 0" which can
3228 be done in one instruction on v9 (so we do it). */
3229 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3230 {
3231 if (y != const0_rtx)
3232 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3233
3234 rtx pat = gen_rtx_SET (operands[0],
3235 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3236 x, const0_rtx));
3237
3238 /* If we can use addx/subx or addxc, add a clobber for CC. */
3239 if (mode == SImode || (code == NE && TARGET_VIS3))
3240 {
3241 rtx clobber
3242 = gen_rtx_CLOBBER (VOIDmode,
3243 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3244 SPARC_ICC_REG));
3245 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3246 }
3247
3248 emit_insn (pat);
3249 return true;
3250 }
3251
3252 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3253 if (TARGET_ARCH64
3254 && mode == DImode
3255 && !((code == LTU || code == GTU) && TARGET_VIS3)
3256 && gen_v9_scc (operands[0], code, x, y))
3257 return true;
3258
3259 /* We can do LTU and GEU using the addx/subx instructions too. And
3260 for GTU/LEU, if both operands are registers swap them and fall
3261 back to the easy case. */
3262 if (code == GTU || code == LEU)
3263 {
3264 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3265 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3266 {
3267 tem = x;
3268 x = y;
3269 y = tem;
3270 code = swap_condition (code);
3271 }
3272 }
3273
3274 if (code == LTU || code == GEU)
3275 {
3276 emit_insn (gen_rtx_SET (operands[0],
3277 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3278 gen_compare_reg_1 (code, x, y),
3279 const0_rtx)));
3280 return true;
3281 }
3282
3283 /* All the posibilities to use addx/subx based sequences has been
3284 exhausted, try for a 3 instruction sequence using v9 conditional
3285 moves. */
3286 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3287 return true;
3288
3289 /* Nope, do branches. */
3290 return false;
3291 }
3292
3293 /* Emit a conditional jump insn for the v9 architecture using comparison code
3294 CODE and jump target LABEL.
3295 This function exists to take advantage of the v9 brxx insns. */
3296
3297 static void
3298 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3299 {
3300 emit_jump_insn (gen_rtx_SET (pc_rtx,
3301 gen_rtx_IF_THEN_ELSE (VOIDmode,
3302 gen_rtx_fmt_ee (code, GET_MODE (op0),
3303 op0, const0_rtx),
3304 gen_rtx_LABEL_REF (VOIDmode, label),
3305 pc_rtx)));
3306 }
3307
3308 /* Emit a conditional jump insn for the UA2011 architecture using
3309 comparison code CODE and jump target LABEL. This function exists
3310 to take advantage of the UA2011 Compare and Branch insns. */
3311
3312 static void
3313 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3314 {
3315 rtx if_then_else;
3316
3317 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3318 gen_rtx_fmt_ee(code, GET_MODE(op0),
3319 op0, op1),
3320 gen_rtx_LABEL_REF (VOIDmode, label),
3321 pc_rtx);
3322
3323 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3324 }
3325
3326 void
3327 emit_conditional_branch_insn (rtx operands[])
3328 {
3329 /* The quad-word fp compare library routines all return nonzero to indicate
3330 true, which is different from the equivalent libgcc routines, so we must
3331 handle them specially here. */
3332 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3333 {
3334 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3335 GET_CODE (operands[0]));
3336 operands[1] = XEXP (operands[0], 0);
3337 operands[2] = XEXP (operands[0], 1);
3338 }
3339
3340 /* If we can tell early on that the comparison is against a constant
3341 that won't fit in the 5-bit signed immediate field of a cbcond,
3342 use one of the other v9 conditional branch sequences. */
3343 if (TARGET_CBCOND
3344 && GET_CODE (operands[1]) == REG
3345 && (GET_MODE (operands[1]) == SImode
3346 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3347 && (GET_CODE (operands[2]) != CONST_INT
3348 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3349 {
3350 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3351 return;
3352 }
3353
3354 if (TARGET_ARCH64 && operands[2] == const0_rtx
3355 && GET_CODE (operands[1]) == REG
3356 && GET_MODE (operands[1]) == DImode)
3357 {
3358 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3359 return;
3360 }
3361
3362 operands[1] = gen_compare_reg (operands[0]);
3363 operands[2] = const0_rtx;
3364 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3365 operands[1], operands[2]);
3366 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3367 operands[3]));
3368 }
3369
3370
3371 /* Generate a DFmode part of a hard TFmode register.
3372 REG is the TFmode hard register, LOW is 1 for the
3373 low 64bit of the register and 0 otherwise.
3374 */
3375 rtx
3376 gen_df_reg (rtx reg, int low)
3377 {
3378 int regno = REGNO (reg);
3379
3380 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3381 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3382 return gen_rtx_REG (DFmode, regno);
3383 }
3384 \f
3385 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3386 Unlike normal calls, TFmode operands are passed by reference. It is
3387 assumed that no more than 3 operands are required. */
3388
3389 static void
3390 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3391 {
3392 rtx ret_slot = NULL, arg[3], func_sym;
3393 int i;
3394
3395 /* We only expect to be called for conversions, unary, and binary ops. */
3396 gcc_assert (nargs == 2 || nargs == 3);
3397
3398 for (i = 0; i < nargs; ++i)
3399 {
3400 rtx this_arg = operands[i];
3401 rtx this_slot;
3402
3403 /* TFmode arguments and return values are passed by reference. */
3404 if (GET_MODE (this_arg) == TFmode)
3405 {
3406 int force_stack_temp;
3407
3408 force_stack_temp = 0;
3409 if (TARGET_BUGGY_QP_LIB && i == 0)
3410 force_stack_temp = 1;
3411
3412 if (GET_CODE (this_arg) == MEM
3413 && ! force_stack_temp)
3414 {
3415 tree expr = MEM_EXPR (this_arg);
3416 if (expr)
3417 mark_addressable (expr);
3418 this_arg = XEXP (this_arg, 0);
3419 }
3420 else if (CONSTANT_P (this_arg)
3421 && ! force_stack_temp)
3422 {
3423 this_slot = force_const_mem (TFmode, this_arg);
3424 this_arg = XEXP (this_slot, 0);
3425 }
3426 else
3427 {
3428 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3429
3430 /* Operand 0 is the return value. We'll copy it out later. */
3431 if (i > 0)
3432 emit_move_insn (this_slot, this_arg);
3433 else
3434 ret_slot = this_slot;
3435
3436 this_arg = XEXP (this_slot, 0);
3437 }
3438 }
3439
3440 arg[i] = this_arg;
3441 }
3442
3443 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3444
3445 if (GET_MODE (operands[0]) == TFmode)
3446 {
3447 if (nargs == 2)
3448 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3449 arg[0], GET_MODE (arg[0]),
3450 arg[1], GET_MODE (arg[1]));
3451 else
3452 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3453 arg[0], GET_MODE (arg[0]),
3454 arg[1], GET_MODE (arg[1]),
3455 arg[2], GET_MODE (arg[2]));
3456
3457 if (ret_slot)
3458 emit_move_insn (operands[0], ret_slot);
3459 }
3460 else
3461 {
3462 rtx ret;
3463
3464 gcc_assert (nargs == 2);
3465
3466 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3467 GET_MODE (operands[0]),
3468 arg[1], GET_MODE (arg[1]));
3469
3470 if (ret != operands[0])
3471 emit_move_insn (operands[0], ret);
3472 }
3473 }
3474
3475 /* Expand soft-float TFmode calls to sparc abi routines. */
3476
3477 static void
3478 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3479 {
3480 const char *func;
3481
3482 switch (code)
3483 {
3484 case PLUS:
3485 func = "_Qp_add";
3486 break;
3487 case MINUS:
3488 func = "_Qp_sub";
3489 break;
3490 case MULT:
3491 func = "_Qp_mul";
3492 break;
3493 case DIV:
3494 func = "_Qp_div";
3495 break;
3496 default:
3497 gcc_unreachable ();
3498 }
3499
3500 emit_soft_tfmode_libcall (func, 3, operands);
3501 }
3502
3503 static void
3504 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3505 {
3506 const char *func;
3507
3508 gcc_assert (code == SQRT);
3509 func = "_Qp_sqrt";
3510
3511 emit_soft_tfmode_libcall (func, 2, operands);
3512 }
3513
3514 static void
3515 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3516 {
3517 const char *func;
3518
3519 switch (code)
3520 {
3521 case FLOAT_EXTEND:
3522 switch (GET_MODE (operands[1]))
3523 {
3524 case E_SFmode:
3525 func = "_Qp_stoq";
3526 break;
3527 case E_DFmode:
3528 func = "_Qp_dtoq";
3529 break;
3530 default:
3531 gcc_unreachable ();
3532 }
3533 break;
3534
3535 case FLOAT_TRUNCATE:
3536 switch (GET_MODE (operands[0]))
3537 {
3538 case E_SFmode:
3539 func = "_Qp_qtos";
3540 break;
3541 case E_DFmode:
3542 func = "_Qp_qtod";
3543 break;
3544 default:
3545 gcc_unreachable ();
3546 }
3547 break;
3548
3549 case FLOAT:
3550 switch (GET_MODE (operands[1]))
3551 {
3552 case E_SImode:
3553 func = "_Qp_itoq";
3554 if (TARGET_ARCH64)
3555 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3556 break;
3557 case E_DImode:
3558 func = "_Qp_xtoq";
3559 break;
3560 default:
3561 gcc_unreachable ();
3562 }
3563 break;
3564
3565 case UNSIGNED_FLOAT:
3566 switch (GET_MODE (operands[1]))
3567 {
3568 case E_SImode:
3569 func = "_Qp_uitoq";
3570 if (TARGET_ARCH64)
3571 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3572 break;
3573 case E_DImode:
3574 func = "_Qp_uxtoq";
3575 break;
3576 default:
3577 gcc_unreachable ();
3578 }
3579 break;
3580
3581 case FIX:
3582 switch (GET_MODE (operands[0]))
3583 {
3584 case E_SImode:
3585 func = "_Qp_qtoi";
3586 break;
3587 case E_DImode:
3588 func = "_Qp_qtox";
3589 break;
3590 default:
3591 gcc_unreachable ();
3592 }
3593 break;
3594
3595 case UNSIGNED_FIX:
3596 switch (GET_MODE (operands[0]))
3597 {
3598 case E_SImode:
3599 func = "_Qp_qtoui";
3600 break;
3601 case E_DImode:
3602 func = "_Qp_qtoux";
3603 break;
3604 default:
3605 gcc_unreachable ();
3606 }
3607 break;
3608
3609 default:
3610 gcc_unreachable ();
3611 }
3612
3613 emit_soft_tfmode_libcall (func, 2, operands);
3614 }
3615
3616 /* Expand a hard-float tfmode operation. All arguments must be in
3617 registers. */
3618
3619 static void
3620 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3621 {
3622 rtx op, dest;
3623
3624 if (GET_RTX_CLASS (code) == RTX_UNARY)
3625 {
3626 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3627 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3628 }
3629 else
3630 {
3631 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3632 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3633 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3634 operands[1], operands[2]);
3635 }
3636
3637 if (register_operand (operands[0], VOIDmode))
3638 dest = operands[0];
3639 else
3640 dest = gen_reg_rtx (GET_MODE (operands[0]));
3641
3642 emit_insn (gen_rtx_SET (dest, op));
3643
3644 if (dest != operands[0])
3645 emit_move_insn (operands[0], dest);
3646 }
3647
3648 void
3649 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3650 {
3651 if (TARGET_HARD_QUAD)
3652 emit_hard_tfmode_operation (code, operands);
3653 else
3654 emit_soft_tfmode_binop (code, operands);
3655 }
3656
3657 void
3658 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3659 {
3660 if (TARGET_HARD_QUAD)
3661 emit_hard_tfmode_operation (code, operands);
3662 else
3663 emit_soft_tfmode_unop (code, operands);
3664 }
3665
3666 void
3667 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3668 {
3669 if (TARGET_HARD_QUAD)
3670 emit_hard_tfmode_operation (code, operands);
3671 else
3672 emit_soft_tfmode_cvt (code, operands);
3673 }
3674 \f
3675 /* Return nonzero if a branch/jump/call instruction will be emitting
3676 nop into its delay slot. */
3677
3678 int
3679 empty_delay_slot (rtx_insn *insn)
3680 {
3681 rtx seq;
3682
3683 /* If no previous instruction (should not happen), return true. */
3684 if (PREV_INSN (insn) == NULL)
3685 return 1;
3686
3687 seq = NEXT_INSN (PREV_INSN (insn));
3688 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3689 return 0;
3690
3691 return 1;
3692 }
3693
3694 /* Return nonzero if we should emit a nop after a cbcond instruction.
3695 The cbcond instruction does not have a delay slot, however there is
3696 a severe performance penalty if a control transfer appears right
3697 after a cbcond. Therefore we emit a nop when we detect this
3698 situation. */
3699
3700 int
3701 emit_cbcond_nop (rtx_insn *insn)
3702 {
3703 rtx next = next_active_insn (insn);
3704
3705 if (!next)
3706 return 1;
3707
3708 if (NONJUMP_INSN_P (next)
3709 && GET_CODE (PATTERN (next)) == SEQUENCE)
3710 next = XVECEXP (PATTERN (next), 0, 0);
3711 else if (CALL_P (next)
3712 && GET_CODE (PATTERN (next)) == PARALLEL)
3713 {
3714 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3715
3716 if (GET_CODE (delay) == RETURN)
3717 {
3718 /* It's a sibling call. Do not emit the nop if we're going
3719 to emit something other than the jump itself as the first
3720 instruction of the sibcall sequence. */
3721 if (sparc_leaf_function_p || TARGET_FLAT)
3722 return 0;
3723 }
3724 }
3725
3726 if (NONJUMP_INSN_P (next))
3727 return 0;
3728
3729 return 1;
3730 }
3731
3732 /* Return nonzero if TRIAL can go into the call delay slot. */
3733
3734 int
3735 eligible_for_call_delay (rtx_insn *trial)
3736 {
3737 rtx pat;
3738
3739 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3740 return 0;
3741
3742 /* Binutils allows
3743 call __tls_get_addr, %tgd_call (foo)
3744 add %l7, %o0, %o0, %tgd_add (foo)
3745 while Sun as/ld does not. */
3746 if (TARGET_GNU_TLS || !TARGET_TLS)
3747 return 1;
3748
3749 pat = PATTERN (trial);
3750
3751 /* We must reject tgd_add{32|64}, i.e.
3752 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3753 and tldm_add{32|64}, i.e.
3754 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3755 for Sun as/ld. */
3756 if (GET_CODE (pat) == SET
3757 && GET_CODE (SET_SRC (pat)) == PLUS)
3758 {
3759 rtx unspec = XEXP (SET_SRC (pat), 1);
3760
3761 if (GET_CODE (unspec) == UNSPEC
3762 && (XINT (unspec, 1) == UNSPEC_TLSGD
3763 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3764 return 0;
3765 }
3766
3767 return 1;
3768 }
3769
3770 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3771 instruction. RETURN_P is true if the v9 variant 'return' is to be
3772 considered in the test too.
3773
3774 TRIAL must be a SET whose destination is a REG appropriate for the
3775 'restore' instruction or, if RETURN_P is true, for the 'return'
3776 instruction. */
3777
3778 static int
3779 eligible_for_restore_insn (rtx trial, bool return_p)
3780 {
3781 rtx pat = PATTERN (trial);
3782 rtx src = SET_SRC (pat);
3783 bool src_is_freg = false;
3784 rtx src_reg;
3785
3786 /* Since we now can do moves between float and integer registers when
3787 VIS3 is enabled, we have to catch this case. We can allow such
3788 moves when doing a 'return' however. */
3789 src_reg = src;
3790 if (GET_CODE (src_reg) == SUBREG)
3791 src_reg = SUBREG_REG (src_reg);
3792 if (GET_CODE (src_reg) == REG
3793 && SPARC_FP_REG_P (REGNO (src_reg)))
3794 src_is_freg = true;
3795
3796 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3797 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3798 && arith_operand (src, GET_MODE (src))
3799 && ! src_is_freg)
3800 {
3801 if (TARGET_ARCH64)
3802 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3803 else
3804 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3805 }
3806
3807 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3808 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3809 && arith_double_operand (src, GET_MODE (src))
3810 && ! src_is_freg)
3811 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3812
3813 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3814 else if (! TARGET_FPU && register_operand (src, SFmode))
3815 return 1;
3816
3817 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3818 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3819 return 1;
3820
3821 /* If we have the 'return' instruction, anything that does not use
3822 local or output registers and can go into a delay slot wins. */
3823 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3824 return 1;
3825
3826 /* The 'restore src1,src2,dest' pattern for SImode. */
3827 else if (GET_CODE (src) == PLUS
3828 && register_operand (XEXP (src, 0), SImode)
3829 && arith_operand (XEXP (src, 1), SImode))
3830 return 1;
3831
3832 /* The 'restore src1,src2,dest' pattern for DImode. */
3833 else if (GET_CODE (src) == PLUS
3834 && register_operand (XEXP (src, 0), DImode)
3835 && arith_double_operand (XEXP (src, 1), DImode))
3836 return 1;
3837
3838 /* The 'restore src1,%lo(src2),dest' pattern. */
3839 else if (GET_CODE (src) == LO_SUM
3840 && ! TARGET_CM_MEDMID
3841 && ((register_operand (XEXP (src, 0), SImode)
3842 && immediate_operand (XEXP (src, 1), SImode))
3843 || (TARGET_ARCH64
3844 && register_operand (XEXP (src, 0), DImode)
3845 && immediate_operand (XEXP (src, 1), DImode))))
3846 return 1;
3847
3848 /* The 'restore src,src,dest' pattern. */
3849 else if (GET_CODE (src) == ASHIFT
3850 && (register_operand (XEXP (src, 0), SImode)
3851 || register_operand (XEXP (src, 0), DImode))
3852 && XEXP (src, 1) == const1_rtx)
3853 return 1;
3854
3855 return 0;
3856 }
3857
3858 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3859
3860 int
3861 eligible_for_return_delay (rtx_insn *trial)
3862 {
3863 int regno;
3864 rtx pat;
3865
3866 /* If the function uses __builtin_eh_return, the eh_return machinery
3867 occupies the delay slot. */
3868 if (crtl->calls_eh_return)
3869 return 0;
3870
3871 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3872 return 0;
3873
3874 /* In the case of a leaf or flat function, anything can go into the slot. */
3875 if (sparc_leaf_function_p || TARGET_FLAT)
3876 return 1;
3877
3878 if (!NONJUMP_INSN_P (trial))
3879 return 0;
3880
3881 pat = PATTERN (trial);
3882 if (GET_CODE (pat) == PARALLEL)
3883 {
3884 int i;
3885
3886 if (! TARGET_V9)
3887 return 0;
3888 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3889 {
3890 rtx expr = XVECEXP (pat, 0, i);
3891 if (GET_CODE (expr) != SET)
3892 return 0;
3893 if (GET_CODE (SET_DEST (expr)) != REG)
3894 return 0;
3895 regno = REGNO (SET_DEST (expr));
3896 if (regno >= 8 && regno < 24)
3897 return 0;
3898 }
3899 return !epilogue_renumber (&pat, 1);
3900 }
3901
3902 if (GET_CODE (pat) != SET)
3903 return 0;
3904
3905 if (GET_CODE (SET_DEST (pat)) != REG)
3906 return 0;
3907
3908 regno = REGNO (SET_DEST (pat));
3909
3910 /* Otherwise, only operations which can be done in tandem with
3911 a `restore' or `return' insn can go into the delay slot. */
3912 if (regno >= 8 && regno < 24)
3913 return 0;
3914
3915 /* If this instruction sets up floating point register and we have a return
3916 instruction, it can probably go in. But restore will not work
3917 with FP_REGS. */
3918 if (! SPARC_INT_REG_P (regno))
3919 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3920
3921 return eligible_for_restore_insn (trial, true);
3922 }
3923
3924 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3925
3926 int
3927 eligible_for_sibcall_delay (rtx_insn *trial)
3928 {
3929 rtx pat;
3930
3931 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3932 return 0;
3933
3934 if (!NONJUMP_INSN_P (trial))
3935 return 0;
3936
3937 pat = PATTERN (trial);
3938
3939 if (sparc_leaf_function_p || TARGET_FLAT)
3940 {
3941 /* If the tail call is done using the call instruction,
3942 we have to restore %o7 in the delay slot. */
3943 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3944 return 0;
3945
3946 /* %g1 is used to build the function address */
3947 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3948 return 0;
3949
3950 return 1;
3951 }
3952
3953 if (GET_CODE (pat) != SET)
3954 return 0;
3955
3956 /* Otherwise, only operations which can be done in tandem with
3957 a `restore' insn can go into the delay slot. */
3958 if (GET_CODE (SET_DEST (pat)) != REG
3959 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3960 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3961 return 0;
3962
3963 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3964 in most cases. */
3965 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3966 return 0;
3967
3968 return eligible_for_restore_insn (trial, false);
3969 }
3970 \f
3971 /* Determine if it's legal to put X into the constant pool. This
3972 is not possible if X contains the address of a symbol that is
3973 not constant (TLS) or not known at final link time (PIC). */
3974
3975 static bool
3976 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3977 {
3978 switch (GET_CODE (x))
3979 {
3980 case CONST_INT:
3981 case CONST_WIDE_INT:
3982 case CONST_DOUBLE:
3983 case CONST_VECTOR:
3984 /* Accept all non-symbolic constants. */
3985 return false;
3986
3987 case LABEL_REF:
3988 /* Labels are OK iff we are non-PIC. */
3989 return flag_pic != 0;
3990
3991 case SYMBOL_REF:
3992 /* 'Naked' TLS symbol references are never OK,
3993 non-TLS symbols are OK iff we are non-PIC. */
3994 if (SYMBOL_REF_TLS_MODEL (x))
3995 return true;
3996 else
3997 return flag_pic != 0;
3998
3999 case CONST:
4000 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4001 case PLUS:
4002 case MINUS:
4003 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4004 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4005 case UNSPEC:
4006 return true;
4007 default:
4008 gcc_unreachable ();
4009 }
4010 }
4011 \f
4012 /* Global Offset Table support. */
4013 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4014 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4015
4016 /* Return the SYMBOL_REF for the Global Offset Table. */
4017
4018 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4019
4020 static rtx
4021 sparc_got (void)
4022 {
4023 if (!sparc_got_symbol)
4024 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4025
4026 return sparc_got_symbol;
4027 }
4028
4029 /* Ensure that we are not using patterns that are not OK with PIC. */
4030
4031 int
4032 check_pic (int i)
4033 {
4034 rtx op;
4035
4036 switch (flag_pic)
4037 {
4038 case 1:
4039 op = recog_data.operand[i];
4040 gcc_assert (GET_CODE (op) != SYMBOL_REF
4041 && (GET_CODE (op) != CONST
4042 || (GET_CODE (XEXP (op, 0)) == MINUS
4043 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4044 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4045 /* fallthrough */
4046 case 2:
4047 default:
4048 return 1;
4049 }
4050 }
4051
4052 /* Return true if X is an address which needs a temporary register when
4053 reloaded while generating PIC code. */
4054
4055 int
4056 pic_address_needs_scratch (rtx x)
4057 {
4058 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4059 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4060 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4061 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4062 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4063 return 1;
4064
4065 return 0;
4066 }
4067
4068 /* Determine if a given RTX is a valid constant. We already know this
4069 satisfies CONSTANT_P. */
4070
4071 static bool
4072 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4073 {
4074 switch (GET_CODE (x))
4075 {
4076 case CONST:
4077 case SYMBOL_REF:
4078 if (sparc_tls_referenced_p (x))
4079 return false;
4080 break;
4081
4082 case CONST_DOUBLE:
4083 /* Floating point constants are generally not ok.
4084 The only exception is 0.0 and all-ones in VIS. */
4085 if (TARGET_VIS
4086 && SCALAR_FLOAT_MODE_P (mode)
4087 && (const_zero_operand (x, mode)
4088 || const_all_ones_operand (x, mode)))
4089 return true;
4090
4091 return false;
4092
4093 case CONST_VECTOR:
4094 /* Vector constants are generally not ok.
4095 The only exception is 0 or -1 in VIS. */
4096 if (TARGET_VIS
4097 && (const_zero_operand (x, mode)
4098 || const_all_ones_operand (x, mode)))
4099 return true;
4100
4101 return false;
4102
4103 default:
4104 break;
4105 }
4106
4107 return true;
4108 }
4109
4110 /* Determine if a given RTX is a valid constant address. */
4111
4112 bool
4113 constant_address_p (rtx x)
4114 {
4115 switch (GET_CODE (x))
4116 {
4117 case LABEL_REF:
4118 case CONST_INT:
4119 case HIGH:
4120 return true;
4121
4122 case CONST:
4123 if (flag_pic && pic_address_needs_scratch (x))
4124 return false;
4125 return sparc_legitimate_constant_p (Pmode, x);
4126
4127 case SYMBOL_REF:
4128 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4129
4130 default:
4131 return false;
4132 }
4133 }
4134
4135 /* Nonzero if the constant value X is a legitimate general operand
4136 when generating PIC code. It is given that flag_pic is on and
4137 that X satisfies CONSTANT_P. */
4138
4139 bool
4140 legitimate_pic_operand_p (rtx x)
4141 {
4142 if (pic_address_needs_scratch (x))
4143 return false;
4144 if (sparc_tls_referenced_p (x))
4145 return false;
4146 return true;
4147 }
4148
4149 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4150 (CONST_INT_P (X) \
4151 && INTVAL (X) >= -0x1000 \
4152 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4153
4154 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4155 (CONST_INT_P (X) \
4156 && INTVAL (X) >= -0x1000 \
4157 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4158
4159 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4160
4161 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4162 ordinarily. This changes a bit when generating PIC. */
4163
4164 static bool
4165 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4166 {
4167 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4168
4169 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4170 rs1 = addr;
4171 else if (GET_CODE (addr) == PLUS)
4172 {
4173 rs1 = XEXP (addr, 0);
4174 rs2 = XEXP (addr, 1);
4175
4176 /* Canonicalize. REG comes first, if there are no regs,
4177 LO_SUM comes first. */
4178 if (!REG_P (rs1)
4179 && GET_CODE (rs1) != SUBREG
4180 && (REG_P (rs2)
4181 || GET_CODE (rs2) == SUBREG
4182 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4183 {
4184 rs1 = XEXP (addr, 1);
4185 rs2 = XEXP (addr, 0);
4186 }
4187
4188 if ((flag_pic == 1
4189 && rs1 == pic_offset_table_rtx
4190 && !REG_P (rs2)
4191 && GET_CODE (rs2) != SUBREG
4192 && GET_CODE (rs2) != LO_SUM
4193 && GET_CODE (rs2) != MEM
4194 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4195 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4196 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4197 || ((REG_P (rs1)
4198 || GET_CODE (rs1) == SUBREG)
4199 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4200 {
4201 imm1 = rs2;
4202 rs2 = NULL;
4203 }
4204 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4205 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4206 {
4207 /* We prohibit REG + REG for TFmode when there are no quad move insns
4208 and we consequently need to split. We do this because REG+REG
4209 is not an offsettable address. If we get the situation in reload
4210 where source and destination of a movtf pattern are both MEMs with
4211 REG+REG address, then only one of them gets converted to an
4212 offsettable address. */
4213 if (mode == TFmode
4214 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4215 return 0;
4216
4217 /* Likewise for TImode, but in all cases. */
4218 if (mode == TImode)
4219 return 0;
4220
4221 /* We prohibit REG + REG on ARCH32 if not optimizing for
4222 DFmode/DImode because then mem_min_alignment is likely to be zero
4223 after reload and the forced split would lack a matching splitter
4224 pattern. */
4225 if (TARGET_ARCH32 && !optimize
4226 && (mode == DFmode || mode == DImode))
4227 return 0;
4228 }
4229 else if (USE_AS_OFFSETABLE_LO10
4230 && GET_CODE (rs1) == LO_SUM
4231 && TARGET_ARCH64
4232 && ! TARGET_CM_MEDMID
4233 && RTX_OK_FOR_OLO10_P (rs2, mode))
4234 {
4235 rs2 = NULL;
4236 imm1 = XEXP (rs1, 1);
4237 rs1 = XEXP (rs1, 0);
4238 if (!CONSTANT_P (imm1)
4239 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4240 return 0;
4241 }
4242 }
4243 else if (GET_CODE (addr) == LO_SUM)
4244 {
4245 rs1 = XEXP (addr, 0);
4246 imm1 = XEXP (addr, 1);
4247
4248 if (!CONSTANT_P (imm1)
4249 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4250 return 0;
4251
4252 /* We can't allow TFmode in 32-bit mode, because an offset greater
4253 than the alignment (8) may cause the LO_SUM to overflow. */
4254 if (mode == TFmode && TARGET_ARCH32)
4255 return 0;
4256
4257 /* During reload, accept the HIGH+LO_SUM construct generated by
4258 sparc_legitimize_reload_address. */
4259 if (reload_in_progress
4260 && GET_CODE (rs1) == HIGH
4261 && XEXP (rs1, 0) == imm1)
4262 return 1;
4263 }
4264 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4265 return 1;
4266 else
4267 return 0;
4268
4269 if (GET_CODE (rs1) == SUBREG)
4270 rs1 = SUBREG_REG (rs1);
4271 if (!REG_P (rs1))
4272 return 0;
4273
4274 if (rs2)
4275 {
4276 if (GET_CODE (rs2) == SUBREG)
4277 rs2 = SUBREG_REG (rs2);
4278 if (!REG_P (rs2))
4279 return 0;
4280 }
4281
4282 if (strict)
4283 {
4284 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4285 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4286 return 0;
4287 }
4288 else
4289 {
4290 if ((! SPARC_INT_REG_P (REGNO (rs1))
4291 && REGNO (rs1) != FRAME_POINTER_REGNUM
4292 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4293 || (rs2
4294 && (! SPARC_INT_REG_P (REGNO (rs2))
4295 && REGNO (rs2) != FRAME_POINTER_REGNUM
4296 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4297 return 0;
4298 }
4299 return 1;
4300 }
4301
4302 /* Return the SYMBOL_REF for the tls_get_addr function. */
4303
4304 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4305
4306 static rtx
4307 sparc_tls_get_addr (void)
4308 {
4309 if (!sparc_tls_symbol)
4310 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4311
4312 return sparc_tls_symbol;
4313 }
4314
4315 /* Return the Global Offset Table to be used in TLS mode. */
4316
4317 static rtx
4318 sparc_tls_got (void)
4319 {
4320 /* In PIC mode, this is just the PIC offset table. */
4321 if (flag_pic)
4322 {
4323 crtl->uses_pic_offset_table = 1;
4324 return pic_offset_table_rtx;
4325 }
4326
4327 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4328 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4329 if (TARGET_SUN_TLS && TARGET_ARCH32)
4330 {
4331 load_got_register ();
4332 return global_offset_table_rtx;
4333 }
4334
4335 /* In all other cases, we load a new pseudo with the GOT symbol. */
4336 return copy_to_reg (sparc_got ());
4337 }
4338
4339 /* Return true if X contains a thread-local symbol. */
4340
4341 static bool
4342 sparc_tls_referenced_p (rtx x)
4343 {
4344 if (!TARGET_HAVE_TLS)
4345 return false;
4346
4347 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4348 x = XEXP (XEXP (x, 0), 0);
4349
4350 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4351 return true;
4352
4353 /* That's all we handle in sparc_legitimize_tls_address for now. */
4354 return false;
4355 }
4356
4357 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4358 this (thread-local) address. */
4359
4360 static rtx
4361 sparc_legitimize_tls_address (rtx addr)
4362 {
4363 rtx temp1, temp2, temp3, ret, o0, got;
4364 rtx_insn *insn;
4365
4366 gcc_assert (can_create_pseudo_p ());
4367
4368 if (GET_CODE (addr) == SYMBOL_REF)
4369 switch (SYMBOL_REF_TLS_MODEL (addr))
4370 {
4371 case TLS_MODEL_GLOBAL_DYNAMIC:
4372 start_sequence ();
4373 temp1 = gen_reg_rtx (SImode);
4374 temp2 = gen_reg_rtx (SImode);
4375 ret = gen_reg_rtx (Pmode);
4376 o0 = gen_rtx_REG (Pmode, 8);
4377 got = sparc_tls_got ();
4378 emit_insn (gen_tgd_hi22 (temp1, addr));
4379 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4380 if (TARGET_ARCH32)
4381 {
4382 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4383 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4384 addr, const1_rtx));
4385 }
4386 else
4387 {
4388 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4389 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4390 addr, const1_rtx));
4391 }
4392 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4393 insn = get_insns ();
4394 end_sequence ();
4395 emit_libcall_block (insn, ret, o0, addr);
4396 break;
4397
4398 case TLS_MODEL_LOCAL_DYNAMIC:
4399 start_sequence ();
4400 temp1 = gen_reg_rtx (SImode);
4401 temp2 = gen_reg_rtx (SImode);
4402 temp3 = gen_reg_rtx (Pmode);
4403 ret = gen_reg_rtx (Pmode);
4404 o0 = gen_rtx_REG (Pmode, 8);
4405 got = sparc_tls_got ();
4406 emit_insn (gen_tldm_hi22 (temp1));
4407 emit_insn (gen_tldm_lo10 (temp2, temp1));
4408 if (TARGET_ARCH32)
4409 {
4410 emit_insn (gen_tldm_add32 (o0, got, temp2));
4411 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4412 const1_rtx));
4413 }
4414 else
4415 {
4416 emit_insn (gen_tldm_add64 (o0, got, temp2));
4417 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4418 const1_rtx));
4419 }
4420 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4421 insn = get_insns ();
4422 end_sequence ();
4423 emit_libcall_block (insn, temp3, o0,
4424 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4425 UNSPEC_TLSLD_BASE));
4426 temp1 = gen_reg_rtx (SImode);
4427 temp2 = gen_reg_rtx (SImode);
4428 emit_insn (gen_tldo_hix22 (temp1, addr));
4429 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4430 if (TARGET_ARCH32)
4431 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4432 else
4433 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4434 break;
4435
4436 case TLS_MODEL_INITIAL_EXEC:
4437 temp1 = gen_reg_rtx (SImode);
4438 temp2 = gen_reg_rtx (SImode);
4439 temp3 = gen_reg_rtx (Pmode);
4440 got = sparc_tls_got ();
4441 emit_insn (gen_tie_hi22 (temp1, addr));
4442 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4443 if (TARGET_ARCH32)
4444 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4445 else
4446 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4447 if (TARGET_SUN_TLS)
4448 {
4449 ret = gen_reg_rtx (Pmode);
4450 if (TARGET_ARCH32)
4451 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4452 temp3, addr));
4453 else
4454 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4455 temp3, addr));
4456 }
4457 else
4458 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4459 break;
4460
4461 case TLS_MODEL_LOCAL_EXEC:
4462 temp1 = gen_reg_rtx (Pmode);
4463 temp2 = gen_reg_rtx (Pmode);
4464 if (TARGET_ARCH32)
4465 {
4466 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4467 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4468 }
4469 else
4470 {
4471 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4472 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4473 }
4474 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4475 break;
4476
4477 default:
4478 gcc_unreachable ();
4479 }
4480
4481 else if (GET_CODE (addr) == CONST)
4482 {
4483 rtx base, offset;
4484
4485 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4486
4487 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4488 offset = XEXP (XEXP (addr, 0), 1);
4489
4490 base = force_operand (base, NULL_RTX);
4491 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4492 offset = force_reg (Pmode, offset);
4493 ret = gen_rtx_PLUS (Pmode, base, offset);
4494 }
4495
4496 else
4497 gcc_unreachable (); /* for now ... */
4498
4499 return ret;
4500 }
4501
4502 /* Legitimize PIC addresses. If the address is already position-independent,
4503 we return ORIG. Newly generated position-independent addresses go into a
4504 reg. This is REG if nonzero, otherwise we allocate register(s) as
4505 necessary. */
4506
4507 static rtx
4508 sparc_legitimize_pic_address (rtx orig, rtx reg)
4509 {
4510 bool gotdata_op = false;
4511
4512 if (GET_CODE (orig) == SYMBOL_REF
4513 /* See the comment in sparc_expand_move. */
4514 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4515 {
4516 rtx pic_ref, address;
4517 rtx_insn *insn;
4518
4519 if (reg == 0)
4520 {
4521 gcc_assert (can_create_pseudo_p ());
4522 reg = gen_reg_rtx (Pmode);
4523 }
4524
4525 if (flag_pic == 2)
4526 {
4527 /* If not during reload, allocate another temp reg here for loading
4528 in the address, so that these instructions can be optimized
4529 properly. */
4530 rtx temp_reg = (! can_create_pseudo_p ()
4531 ? reg : gen_reg_rtx (Pmode));
4532
4533 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4534 won't get confused into thinking that these two instructions
4535 are loading in the true address of the symbol. If in the
4536 future a PIC rtx exists, that should be used instead. */
4537 if (TARGET_ARCH64)
4538 {
4539 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4540 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4541 }
4542 else
4543 {
4544 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4545 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4546 }
4547 address = temp_reg;
4548 gotdata_op = true;
4549 }
4550 else
4551 address = orig;
4552
4553 crtl->uses_pic_offset_table = 1;
4554 if (gotdata_op)
4555 {
4556 if (TARGET_ARCH64)
4557 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4558 pic_offset_table_rtx,
4559 address, orig));
4560 else
4561 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4562 pic_offset_table_rtx,
4563 address, orig));
4564 }
4565 else
4566 {
4567 pic_ref
4568 = gen_const_mem (Pmode,
4569 gen_rtx_PLUS (Pmode,
4570 pic_offset_table_rtx, address));
4571 insn = emit_move_insn (reg, pic_ref);
4572 }
4573
4574 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4575 by loop. */
4576 set_unique_reg_note (insn, REG_EQUAL, orig);
4577 return reg;
4578 }
4579 else if (GET_CODE (orig) == CONST)
4580 {
4581 rtx base, offset;
4582
4583 if (GET_CODE (XEXP (orig, 0)) == PLUS
4584 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4585 return orig;
4586
4587 if (reg == 0)
4588 {
4589 gcc_assert (can_create_pseudo_p ());
4590 reg = gen_reg_rtx (Pmode);
4591 }
4592
4593 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4594 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4595 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4596 base == reg ? NULL_RTX : reg);
4597
4598 if (GET_CODE (offset) == CONST_INT)
4599 {
4600 if (SMALL_INT (offset))
4601 return plus_constant (Pmode, base, INTVAL (offset));
4602 else if (can_create_pseudo_p ())
4603 offset = force_reg (Pmode, offset);
4604 else
4605 /* If we reach here, then something is seriously wrong. */
4606 gcc_unreachable ();
4607 }
4608 return gen_rtx_PLUS (Pmode, base, offset);
4609 }
4610 else if (GET_CODE (orig) == LABEL_REF)
4611 /* ??? We ought to be checking that the register is live instead, in case
4612 it is eliminated. */
4613 crtl->uses_pic_offset_table = 1;
4614
4615 return orig;
4616 }
4617
4618 /* Try machine-dependent ways of modifying an illegitimate address X
4619 to be legitimate. If we find one, return the new, valid address.
4620
4621 OLDX is the address as it was before break_out_memory_refs was called.
4622 In some cases it is useful to look at this to decide what needs to be done.
4623
4624 MODE is the mode of the operand pointed to by X.
4625
4626 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4627
4628 static rtx
4629 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4630 machine_mode mode)
4631 {
4632 rtx orig_x = x;
4633
4634 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4635 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4636 force_operand (XEXP (x, 0), NULL_RTX));
4637 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4638 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4639 force_operand (XEXP (x, 1), NULL_RTX));
4640 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4641 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4642 XEXP (x, 1));
4643 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4644 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4645 force_operand (XEXP (x, 1), NULL_RTX));
4646
4647 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4648 return x;
4649
4650 if (sparc_tls_referenced_p (x))
4651 x = sparc_legitimize_tls_address (x);
4652 else if (flag_pic)
4653 x = sparc_legitimize_pic_address (x, NULL_RTX);
4654 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4655 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4656 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4657 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4658 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4659 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4660 else if (GET_CODE (x) == SYMBOL_REF
4661 || GET_CODE (x) == CONST
4662 || GET_CODE (x) == LABEL_REF)
4663 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4664
4665 return x;
4666 }
4667
4668 /* Delegitimize an address that was legitimized by the above function. */
4669
4670 static rtx
4671 sparc_delegitimize_address (rtx x)
4672 {
4673 x = delegitimize_mem_from_attrs (x);
4674
4675 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4676 switch (XINT (XEXP (x, 1), 1))
4677 {
4678 case UNSPEC_MOVE_PIC:
4679 case UNSPEC_TLSLE:
4680 x = XVECEXP (XEXP (x, 1), 0, 0);
4681 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4682 break;
4683 default:
4684 break;
4685 }
4686
4687 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4688 if (GET_CODE (x) == MINUS
4689 && REG_P (XEXP (x, 0))
4690 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4691 && GET_CODE (XEXP (x, 1)) == LO_SUM
4692 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4693 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4694 {
4695 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4696 gcc_assert (GET_CODE (x) == LABEL_REF);
4697 }
4698
4699 return x;
4700 }
4701
4702 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4703 replace the input X, or the original X if no replacement is called for.
4704 The output parameter *WIN is 1 if the calling macro should goto WIN,
4705 0 if it should not.
4706
4707 For SPARC, we wish to handle addresses by splitting them into
4708 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4709 This cuts the number of extra insns by one.
4710
4711 Do nothing when generating PIC code and the address is a symbolic
4712 operand or requires a scratch register. */
4713
4714 rtx
4715 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4716 int opnum, int type,
4717 int ind_levels ATTRIBUTE_UNUSED, int *win)
4718 {
4719 /* Decompose SImode constants into HIGH+LO_SUM. */
4720 if (CONSTANT_P (x)
4721 && (mode != TFmode || TARGET_ARCH64)
4722 && GET_MODE (x) == SImode
4723 && GET_CODE (x) != LO_SUM
4724 && GET_CODE (x) != HIGH
4725 && sparc_cmodel <= CM_MEDLOW
4726 && !(flag_pic
4727 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4728 {
4729 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4730 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4731 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4732 opnum, (enum reload_type)type);
4733 *win = 1;
4734 return x;
4735 }
4736
4737 /* We have to recognize what we have already generated above. */
4738 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4739 {
4740 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4741 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4742 opnum, (enum reload_type)type);
4743 *win = 1;
4744 return x;
4745 }
4746
4747 *win = 0;
4748 return x;
4749 }
4750
4751 /* Return true if ADDR (a legitimate address expression)
4752 has an effect that depends on the machine mode it is used for.
4753
4754 In PIC mode,
4755
4756 (mem:HI [%l7+a])
4757
4758 is not equivalent to
4759
4760 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4761
4762 because [%l7+a+1] is interpreted as the address of (a+1). */
4763
4764
4765 static bool
4766 sparc_mode_dependent_address_p (const_rtx addr,
4767 addr_space_t as ATTRIBUTE_UNUSED)
4768 {
4769 if (flag_pic && GET_CODE (addr) == PLUS)
4770 {
4771 rtx op0 = XEXP (addr, 0);
4772 rtx op1 = XEXP (addr, 1);
4773 if (op0 == pic_offset_table_rtx
4774 && symbolic_operand (op1, VOIDmode))
4775 return true;
4776 }
4777
4778 return false;
4779 }
4780
4781 #ifdef HAVE_GAS_HIDDEN
4782 # define USE_HIDDEN_LINKONCE 1
4783 #else
4784 # define USE_HIDDEN_LINKONCE 0
4785 #endif
4786
4787 static void
4788 get_pc_thunk_name (char name[32], unsigned int regno)
4789 {
4790 const char *reg_name = reg_names[regno];
4791
4792 /* Skip the leading '%' as that cannot be used in a
4793 symbol name. */
4794 reg_name += 1;
4795
4796 if (USE_HIDDEN_LINKONCE)
4797 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4798 else
4799 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4800 }
4801
4802 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4803
4804 static rtx
4805 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4806 {
4807 int orig_flag_pic = flag_pic;
4808 rtx insn;
4809
4810 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4811 flag_pic = 0;
4812 if (TARGET_ARCH64)
4813 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4814 else
4815 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4816 flag_pic = orig_flag_pic;
4817
4818 return insn;
4819 }
4820
4821 /* Emit code to load the GOT register. */
4822
4823 void
4824 load_got_register (void)
4825 {
4826 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4827 if (!global_offset_table_rtx)
4828 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4829
4830 if (TARGET_VXWORKS_RTP)
4831 emit_insn (gen_vxworks_load_got ());
4832 else
4833 {
4834 /* The GOT symbol is subject to a PC-relative relocation so we need a
4835 helper function to add the PC value and thus get the final value. */
4836 if (!got_helper_rtx)
4837 {
4838 char name[32];
4839 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4840 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4841 }
4842
4843 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4844 got_helper_rtx,
4845 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4846 }
4847
4848 /* Need to emit this whether or not we obey regdecls,
4849 since setjmp/longjmp can cause life info to screw up.
4850 ??? In the case where we don't obey regdecls, this is not sufficient
4851 since we may not fall out the bottom. */
4852 emit_use (global_offset_table_rtx);
4853 }
4854
4855 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4856 address of the call target. */
4857
4858 void
4859 sparc_emit_call_insn (rtx pat, rtx addr)
4860 {
4861 rtx_insn *insn;
4862
4863 insn = emit_call_insn (pat);
4864
4865 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4866 if (TARGET_VXWORKS_RTP
4867 && flag_pic
4868 && GET_CODE (addr) == SYMBOL_REF
4869 && (SYMBOL_REF_DECL (addr)
4870 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4871 : !SYMBOL_REF_LOCAL_P (addr)))
4872 {
4873 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4874 crtl->uses_pic_offset_table = 1;
4875 }
4876 }
4877 \f
4878 /* Return 1 if RTX is a MEM which is known to be aligned to at
4879 least a DESIRED byte boundary. */
4880
4881 int
4882 mem_min_alignment (rtx mem, int desired)
4883 {
4884 rtx addr, base, offset;
4885
4886 /* If it's not a MEM we can't accept it. */
4887 if (GET_CODE (mem) != MEM)
4888 return 0;
4889
4890 /* Obviously... */
4891 if (!TARGET_UNALIGNED_DOUBLES
4892 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4893 return 1;
4894
4895 /* ??? The rest of the function predates MEM_ALIGN so
4896 there is probably a bit of redundancy. */
4897 addr = XEXP (mem, 0);
4898 base = offset = NULL_RTX;
4899 if (GET_CODE (addr) == PLUS)
4900 {
4901 if (GET_CODE (XEXP (addr, 0)) == REG)
4902 {
4903 base = XEXP (addr, 0);
4904
4905 /* What we are saying here is that if the base
4906 REG is aligned properly, the compiler will make
4907 sure any REG based index upon it will be so
4908 as well. */
4909 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4910 offset = XEXP (addr, 1);
4911 else
4912 offset = const0_rtx;
4913 }
4914 }
4915 else if (GET_CODE (addr) == REG)
4916 {
4917 base = addr;
4918 offset = const0_rtx;
4919 }
4920
4921 if (base != NULL_RTX)
4922 {
4923 int regno = REGNO (base);
4924
4925 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4926 {
4927 /* Check if the compiler has recorded some information
4928 about the alignment of the base REG. If reload has
4929 completed, we already matched with proper alignments.
4930 If not running global_alloc, reload might give us
4931 unaligned pointer to local stack though. */
4932 if (((cfun != 0
4933 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4934 || (optimize && reload_completed))
4935 && (INTVAL (offset) & (desired - 1)) == 0)
4936 return 1;
4937 }
4938 else
4939 {
4940 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4941 return 1;
4942 }
4943 }
4944 else if (! TARGET_UNALIGNED_DOUBLES
4945 || CONSTANT_P (addr)
4946 || GET_CODE (addr) == LO_SUM)
4947 {
4948 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4949 is true, in which case we can only assume that an access is aligned if
4950 it is to a constant address, or the address involves a LO_SUM. */
4951 return 1;
4952 }
4953
4954 /* An obviously unaligned address. */
4955 return 0;
4956 }
4957
4958 \f
4959 /* Vectors to keep interesting information about registers where it can easily
4960 be got. We used to use the actual mode value as the bit number, but there
4961 are more than 32 modes now. Instead we use two tables: one indexed by
4962 hard register number, and one indexed by mode. */
4963
4964 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4965 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4966 mapped into one sparc_mode_class mode. */
4967
4968 enum sparc_mode_class {
4969 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4970 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4971 CC_MODE, CCFP_MODE
4972 };
4973
4974 /* Modes for single-word and smaller quantities. */
4975 #define S_MODES \
4976 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4977
4978 /* Modes for double-word and smaller quantities. */
4979 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4980
4981 /* Modes for quad-word and smaller quantities. */
4982 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4983
4984 /* Modes for 8-word and smaller quantities. */
4985 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4986
4987 /* Modes for single-float quantities. */
4988 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4989
4990 /* Modes for double-float and smaller quantities. */
4991 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4992
4993 /* Modes for quad-float and smaller quantities. */
4994 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4995
4996 /* Modes for quad-float pairs and smaller quantities. */
4997 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4998
4999 /* Modes for double-float only quantities. */
5000 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5001
5002 /* Modes for quad-float and double-float only quantities. */
5003 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5004
5005 /* Modes for quad-float pairs and double-float only quantities. */
5006 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5007
5008 /* Modes for condition codes. */
5009 #define CC_MODES (1 << (int) CC_MODE)
5010 #define CCFP_MODES (1 << (int) CCFP_MODE)
5011
5012 /* Value is 1 if register/mode pair is acceptable on sparc.
5013
5014 The funny mixture of D and T modes is because integer operations
5015 do not specially operate on tetra quantities, so non-quad-aligned
5016 registers can hold quadword quantities (except %o4 and %i4 because
5017 they cross fixed registers).
5018
5019 ??? Note that, despite the settings, non-double-aligned parameter
5020 registers can hold double-word quantities in 32-bit mode. */
5021
5022 /* This points to either the 32-bit or the 64-bit version. */
5023 static const int *hard_regno_mode_classes;
5024
5025 static const int hard_32bit_mode_classes[] = {
5026 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5027 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5028 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5029 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5030
5031 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5032 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5033 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5034 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5035
5036 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5037 and none can hold SFmode/SImode values. */
5038 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5039 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5040 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5041 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5042
5043 /* %fcc[0123] */
5044 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5045
5046 /* %icc, %sfp, %gsr */
5047 CC_MODES, 0, D_MODES
5048 };
5049
5050 static const int hard_64bit_mode_classes[] = {
5051 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5052 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5053 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5054 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5055
5056 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5057 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5058 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5059 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5060
5061 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5062 and none can hold SFmode/SImode values. */
5063 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5064 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5065 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5066 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5067
5068 /* %fcc[0123] */
5069 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5070
5071 /* %icc, %sfp, %gsr */
5072 CC_MODES, 0, D_MODES
5073 };
5074
5075 static int sparc_mode_class [NUM_MACHINE_MODES];
5076
5077 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5078
5079 static void
5080 sparc_init_modes (void)
5081 {
5082 int i;
5083
5084 for (i = 0; i < NUM_MACHINE_MODES; i++)
5085 {
5086 machine_mode m = (machine_mode) i;
5087 unsigned int size = GET_MODE_SIZE (m);
5088
5089 switch (GET_MODE_CLASS (m))
5090 {
5091 case MODE_INT:
5092 case MODE_PARTIAL_INT:
5093 case MODE_COMPLEX_INT:
5094 if (size < 4)
5095 sparc_mode_class[i] = 1 << (int) H_MODE;
5096 else if (size == 4)
5097 sparc_mode_class[i] = 1 << (int) S_MODE;
5098 else if (size == 8)
5099 sparc_mode_class[i] = 1 << (int) D_MODE;
5100 else if (size == 16)
5101 sparc_mode_class[i] = 1 << (int) T_MODE;
5102 else if (size == 32)
5103 sparc_mode_class[i] = 1 << (int) O_MODE;
5104 else
5105 sparc_mode_class[i] = 0;
5106 break;
5107 case MODE_VECTOR_INT:
5108 if (size == 4)
5109 sparc_mode_class[i] = 1 << (int) SF_MODE;
5110 else if (size == 8)
5111 sparc_mode_class[i] = 1 << (int) DF_MODE;
5112 else
5113 sparc_mode_class[i] = 0;
5114 break;
5115 case MODE_FLOAT:
5116 case MODE_COMPLEX_FLOAT:
5117 if (size == 4)
5118 sparc_mode_class[i] = 1 << (int) SF_MODE;
5119 else if (size == 8)
5120 sparc_mode_class[i] = 1 << (int) DF_MODE;
5121 else if (size == 16)
5122 sparc_mode_class[i] = 1 << (int) TF_MODE;
5123 else if (size == 32)
5124 sparc_mode_class[i] = 1 << (int) OF_MODE;
5125 else
5126 sparc_mode_class[i] = 0;
5127 break;
5128 case MODE_CC:
5129 if (m == CCFPmode || m == CCFPEmode)
5130 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5131 else
5132 sparc_mode_class[i] = 1 << (int) CC_MODE;
5133 break;
5134 default:
5135 sparc_mode_class[i] = 0;
5136 break;
5137 }
5138 }
5139
5140 if (TARGET_ARCH64)
5141 hard_regno_mode_classes = hard_64bit_mode_classes;
5142 else
5143 hard_regno_mode_classes = hard_32bit_mode_classes;
5144
5145 /* Initialize the array used by REGNO_REG_CLASS. */
5146 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5147 {
5148 if (i < 16 && TARGET_V8PLUS)
5149 sparc_regno_reg_class[i] = I64_REGS;
5150 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5151 sparc_regno_reg_class[i] = GENERAL_REGS;
5152 else if (i < 64)
5153 sparc_regno_reg_class[i] = FP_REGS;
5154 else if (i < 96)
5155 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5156 else if (i < 100)
5157 sparc_regno_reg_class[i] = FPCC_REGS;
5158 else
5159 sparc_regno_reg_class[i] = NO_REGS;
5160 }
5161 }
5162 \f
5163 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5164
5165 static inline bool
5166 save_global_or_fp_reg_p (unsigned int regno,
5167 int leaf_function ATTRIBUTE_UNUSED)
5168 {
5169 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5170 }
5171
5172 /* Return whether the return address register (%i7) is needed. */
5173
5174 static inline bool
5175 return_addr_reg_needed_p (int leaf_function)
5176 {
5177 /* If it is live, for example because of __builtin_return_address (0). */
5178 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5179 return true;
5180
5181 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5182 if (!leaf_function
5183 /* Loading the GOT register clobbers %o7. */
5184 || crtl->uses_pic_offset_table
5185 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5186 return true;
5187
5188 return false;
5189 }
5190
5191 /* Return whether REGNO, a local or in register, must be saved/restored. */
5192
5193 static bool
5194 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5195 {
5196 /* General case: call-saved registers live at some point. */
5197 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5198 return true;
5199
5200 /* Frame pointer register (%fp) if needed. */
5201 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5202 return true;
5203
5204 /* Return address register (%i7) if needed. */
5205 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5206 return true;
5207
5208 /* GOT register (%l7) if needed. */
5209 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5210 return true;
5211
5212 /* If the function accesses prior frames, the frame pointer and the return
5213 address of the previous frame must be saved on the stack. */
5214 if (crtl->accesses_prior_frames
5215 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5216 return true;
5217
5218 return false;
5219 }
5220
5221 /* Compute the frame size required by the function. This function is called
5222 during the reload pass and also by sparc_expand_prologue. */
5223
5224 HOST_WIDE_INT
5225 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5226 {
5227 HOST_WIDE_INT frame_size, apparent_frame_size;
5228 int args_size, n_global_fp_regs = 0;
5229 bool save_local_in_regs_p = false;
5230 unsigned int i;
5231
5232 /* If the function allocates dynamic stack space, the dynamic offset is
5233 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5234 if (leaf_function && !cfun->calls_alloca)
5235 args_size = 0;
5236 else
5237 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5238
5239 /* Calculate space needed for global registers. */
5240 if (TARGET_ARCH64)
5241 {
5242 for (i = 0; i < 8; i++)
5243 if (save_global_or_fp_reg_p (i, 0))
5244 n_global_fp_regs += 2;
5245 }
5246 else
5247 {
5248 for (i = 0; i < 8; i += 2)
5249 if (save_global_or_fp_reg_p (i, 0)
5250 || save_global_or_fp_reg_p (i + 1, 0))
5251 n_global_fp_regs += 2;
5252 }
5253
5254 /* In the flat window model, find out which local and in registers need to
5255 be saved. We don't reserve space in the current frame for them as they
5256 will be spilled into the register window save area of the caller's frame.
5257 However, as soon as we use this register window save area, we must create
5258 that of the current frame to make it the live one. */
5259 if (TARGET_FLAT)
5260 for (i = 16; i < 32; i++)
5261 if (save_local_or_in_reg_p (i, leaf_function))
5262 {
5263 save_local_in_regs_p = true;
5264 break;
5265 }
5266
5267 /* Calculate space needed for FP registers. */
5268 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5269 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5270 n_global_fp_regs += 2;
5271
5272 if (size == 0
5273 && n_global_fp_regs == 0
5274 && args_size == 0
5275 && !save_local_in_regs_p)
5276 frame_size = apparent_frame_size = 0;
5277 else
5278 {
5279 /* We subtract TARGET_STARTING_FRAME_OFFSET, remember it's negative. */
5280 apparent_frame_size
5281 = ROUND_UP (size - targetm.starting_frame_offset (), 8);
5282 apparent_frame_size += n_global_fp_regs * 4;
5283
5284 /* We need to add the size of the outgoing argument area. */
5285 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5286
5287 /* And that of the register window save area. */
5288 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5289
5290 /* Finally, bump to the appropriate alignment. */
5291 frame_size = SPARC_STACK_ALIGN (frame_size);
5292 }
5293
5294 /* Set up values for use in prologue and epilogue. */
5295 sparc_frame_size = frame_size;
5296 sparc_apparent_frame_size = apparent_frame_size;
5297 sparc_n_global_fp_regs = n_global_fp_regs;
5298 sparc_save_local_in_regs_p = save_local_in_regs_p;
5299
5300 return frame_size;
5301 }
5302
5303 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5304
5305 int
5306 sparc_initial_elimination_offset (int to)
5307 {
5308 int offset;
5309
5310 if (to == STACK_POINTER_REGNUM)
5311 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5312 else
5313 offset = 0;
5314
5315 offset += SPARC_STACK_BIAS;
5316 return offset;
5317 }
5318
5319 /* Output any necessary .register pseudo-ops. */
5320
5321 void
5322 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5323 {
5324 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5325 int i;
5326
5327 if (TARGET_ARCH32)
5328 return;
5329
5330 /* Check if %g[2367] were used without
5331 .register being printed for them already. */
5332 for (i = 2; i < 8; i++)
5333 {
5334 if (df_regs_ever_live_p (i)
5335 && ! sparc_hard_reg_printed [i])
5336 {
5337 sparc_hard_reg_printed [i] = 1;
5338 /* %g7 is used as TLS base register, use #ignore
5339 for it instead of #scratch. */
5340 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5341 i == 7 ? "ignore" : "scratch");
5342 }
5343 if (i == 3) i = 5;
5344 }
5345 #endif
5346 }
5347
5348 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5349
5350 #if PROBE_INTERVAL > 4096
5351 #error Cannot use indexed addressing mode for stack probing
5352 #endif
5353
5354 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5355 inclusive. These are offsets from the current stack pointer.
5356
5357 Note that we don't use the REG+REG addressing mode for the probes because
5358 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5359 so the advantages of having a single code win here. */
5360
5361 static void
5362 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5363 {
5364 rtx g1 = gen_rtx_REG (Pmode, 1);
5365
5366 /* See if we have a constant small number of probes to generate. If so,
5367 that's the easy case. */
5368 if (size <= PROBE_INTERVAL)
5369 {
5370 emit_move_insn (g1, GEN_INT (first));
5371 emit_insn (gen_rtx_SET (g1,
5372 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5373 emit_stack_probe (plus_constant (Pmode, g1, -size));
5374 }
5375
5376 /* The run-time loop is made up of 9 insns in the generic case while the
5377 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5378 else if (size <= 4 * PROBE_INTERVAL)
5379 {
5380 HOST_WIDE_INT i;
5381
5382 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5383 emit_insn (gen_rtx_SET (g1,
5384 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5385 emit_stack_probe (g1);
5386
5387 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5388 it exceeds SIZE. If only two probes are needed, this will not
5389 generate any code. Then probe at FIRST + SIZE. */
5390 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5391 {
5392 emit_insn (gen_rtx_SET (g1,
5393 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5394 emit_stack_probe (g1);
5395 }
5396
5397 emit_stack_probe (plus_constant (Pmode, g1,
5398 (i - PROBE_INTERVAL) - size));
5399 }
5400
5401 /* Otherwise, do the same as above, but in a loop. Note that we must be
5402 extra careful with variables wrapping around because we might be at
5403 the very top (or the very bottom) of the address space and we have
5404 to be able to handle this case properly; in particular, we use an
5405 equality test for the loop condition. */
5406 else
5407 {
5408 HOST_WIDE_INT rounded_size;
5409 rtx g4 = gen_rtx_REG (Pmode, 4);
5410
5411 emit_move_insn (g1, GEN_INT (first));
5412
5413
5414 /* Step 1: round SIZE to the previous multiple of the interval. */
5415
5416 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5417 emit_move_insn (g4, GEN_INT (rounded_size));
5418
5419
5420 /* Step 2: compute initial and final value of the loop counter. */
5421
5422 /* TEST_ADDR = SP + FIRST. */
5423 emit_insn (gen_rtx_SET (g1,
5424 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5425
5426 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5427 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5428
5429
5430 /* Step 3: the loop
5431
5432 while (TEST_ADDR != LAST_ADDR)
5433 {
5434 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5435 probe at TEST_ADDR
5436 }
5437
5438 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5439 until it is equal to ROUNDED_SIZE. */
5440
5441 if (TARGET_ARCH64)
5442 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5443 else
5444 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5445
5446
5447 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5448 that SIZE is equal to ROUNDED_SIZE. */
5449
5450 if (size != rounded_size)
5451 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5452 }
5453
5454 /* Make sure nothing is scheduled before we are done. */
5455 emit_insn (gen_blockage ());
5456 }
5457
5458 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5459 absolute addresses. */
5460
5461 const char *
5462 output_probe_stack_range (rtx reg1, rtx reg2)
5463 {
5464 static int labelno = 0;
5465 char loop_lab[32];
5466 rtx xops[2];
5467
5468 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5469
5470 /* Loop. */
5471 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5472
5473 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5474 xops[0] = reg1;
5475 xops[1] = GEN_INT (-PROBE_INTERVAL);
5476 output_asm_insn ("add\t%0, %1, %0", xops);
5477
5478 /* Test if TEST_ADDR == LAST_ADDR. */
5479 xops[1] = reg2;
5480 output_asm_insn ("cmp\t%0, %1", xops);
5481
5482 /* Probe at TEST_ADDR and branch. */
5483 if (TARGET_ARCH64)
5484 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5485 else
5486 fputs ("\tbne\t", asm_out_file);
5487 assemble_name_raw (asm_out_file, loop_lab);
5488 fputc ('\n', asm_out_file);
5489 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5490 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5491
5492 return "";
5493 }
5494
5495 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5496 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5497 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5498 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5499 the action to be performed if it returns false. Return the new offset. */
5500
5501 typedef bool (*sorr_pred_t) (unsigned int, int);
5502 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5503
5504 static int
5505 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5506 int offset, int leaf_function, sorr_pred_t save_p,
5507 sorr_act_t action_true, sorr_act_t action_false)
5508 {
5509 unsigned int i;
5510 rtx mem;
5511 rtx_insn *insn;
5512
5513 if (TARGET_ARCH64 && high <= 32)
5514 {
5515 int fp_offset = -1;
5516
5517 for (i = low; i < high; i++)
5518 {
5519 if (save_p (i, leaf_function))
5520 {
5521 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5522 base, offset));
5523 if (action_true == SORR_SAVE)
5524 {
5525 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5526 RTX_FRAME_RELATED_P (insn) = 1;
5527 }
5528 else /* action_true == SORR_RESTORE */
5529 {
5530 /* The frame pointer must be restored last since its old
5531 value may be used as base address for the frame. This
5532 is problematic in 64-bit mode only because of the lack
5533 of double-word load instruction. */
5534 if (i == HARD_FRAME_POINTER_REGNUM)
5535 fp_offset = offset;
5536 else
5537 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5538 }
5539 offset += 8;
5540 }
5541 else if (action_false == SORR_ADVANCE)
5542 offset += 8;
5543 }
5544
5545 if (fp_offset >= 0)
5546 {
5547 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5548 emit_move_insn (hard_frame_pointer_rtx, mem);
5549 }
5550 }
5551 else
5552 {
5553 for (i = low; i < high; i += 2)
5554 {
5555 bool reg0 = save_p (i, leaf_function);
5556 bool reg1 = save_p (i + 1, leaf_function);
5557 machine_mode mode;
5558 int regno;
5559
5560 if (reg0 && reg1)
5561 {
5562 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5563 regno = i;
5564 }
5565 else if (reg0)
5566 {
5567 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5568 regno = i;
5569 }
5570 else if (reg1)
5571 {
5572 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5573 regno = i + 1;
5574 offset += 4;
5575 }
5576 else
5577 {
5578 if (action_false == SORR_ADVANCE)
5579 offset += 8;
5580 continue;
5581 }
5582
5583 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5584 if (action_true == SORR_SAVE)
5585 {
5586 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5587 RTX_FRAME_RELATED_P (insn) = 1;
5588 if (mode == DImode)
5589 {
5590 rtx set1, set2;
5591 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5592 offset));
5593 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5594 RTX_FRAME_RELATED_P (set1) = 1;
5595 mem
5596 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5597 offset + 4));
5598 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5599 RTX_FRAME_RELATED_P (set2) = 1;
5600 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5601 gen_rtx_PARALLEL (VOIDmode,
5602 gen_rtvec (2, set1, set2)));
5603 }
5604 }
5605 else /* action_true == SORR_RESTORE */
5606 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5607
5608 /* Bump and round down to double word
5609 in case we already bumped by 4. */
5610 offset = ROUND_DOWN (offset + 8, 8);
5611 }
5612 }
5613
5614 return offset;
5615 }
5616
5617 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5618
5619 static rtx
5620 emit_adjust_base_to_offset (rtx base, int offset)
5621 {
5622 /* ??? This might be optimized a little as %g1 might already have a
5623 value close enough that a single add insn will do. */
5624 /* ??? Although, all of this is probably only a temporary fix because
5625 if %g1 can hold a function result, then sparc_expand_epilogue will
5626 lose (the result will be clobbered). */
5627 rtx new_base = gen_rtx_REG (Pmode, 1);
5628 emit_move_insn (new_base, GEN_INT (offset));
5629 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5630 return new_base;
5631 }
5632
5633 /* Emit code to save/restore call-saved global and FP registers. */
5634
5635 static void
5636 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5637 {
5638 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5639 {
5640 base = emit_adjust_base_to_offset (base, offset);
5641 offset = 0;
5642 }
5643
5644 offset
5645 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5646 save_global_or_fp_reg_p, action, SORR_NONE);
5647 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5648 save_global_or_fp_reg_p, action, SORR_NONE);
5649 }
5650
5651 /* Emit code to save/restore call-saved local and in registers. */
5652
5653 static void
5654 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5655 {
5656 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5657 {
5658 base = emit_adjust_base_to_offset (base, offset);
5659 offset = 0;
5660 }
5661
5662 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5663 save_local_or_in_reg_p, action, SORR_ADVANCE);
5664 }
5665
5666 /* Emit a window_save insn. */
5667
5668 static rtx_insn *
5669 emit_window_save (rtx increment)
5670 {
5671 rtx_insn *insn = emit_insn (gen_window_save (increment));
5672 RTX_FRAME_RELATED_P (insn) = 1;
5673
5674 /* The incoming return address (%o7) is saved in %i7. */
5675 add_reg_note (insn, REG_CFA_REGISTER,
5676 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5677 gen_rtx_REG (Pmode,
5678 INCOMING_RETURN_ADDR_REGNUM)));
5679
5680 /* The window save event. */
5681 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5682
5683 /* The CFA is %fp, the hard frame pointer. */
5684 add_reg_note (insn, REG_CFA_DEF_CFA,
5685 plus_constant (Pmode, hard_frame_pointer_rtx,
5686 INCOMING_FRAME_SP_OFFSET));
5687
5688 return insn;
5689 }
5690
5691 /* Generate an increment for the stack pointer. */
5692
5693 static rtx
5694 gen_stack_pointer_inc (rtx increment)
5695 {
5696 return gen_rtx_SET (stack_pointer_rtx,
5697 gen_rtx_PLUS (Pmode,
5698 stack_pointer_rtx,
5699 increment));
5700 }
5701
5702 /* Expand the function prologue. The prologue is responsible for reserving
5703 storage for the frame, saving the call-saved registers and loading the
5704 GOT register if needed. */
5705
5706 void
5707 sparc_expand_prologue (void)
5708 {
5709 HOST_WIDE_INT size;
5710 rtx_insn *insn;
5711
5712 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5713 on the final value of the flag means deferring the prologue/epilogue
5714 expansion until just before the second scheduling pass, which is too
5715 late to emit multiple epilogues or return insns.
5716
5717 Of course we are making the assumption that the value of the flag
5718 will not change between now and its final value. Of the three parts
5719 of the formula, only the last one can reasonably vary. Let's take a
5720 closer look, after assuming that the first two ones are set to true
5721 (otherwise the last value is effectively silenced).
5722
5723 If only_leaf_regs_used returns false, the global predicate will also
5724 be false so the actual frame size calculated below will be positive.
5725 As a consequence, the save_register_window insn will be emitted in
5726 the instruction stream; now this insn explicitly references %fp
5727 which is not a leaf register so only_leaf_regs_used will always
5728 return false subsequently.
5729
5730 If only_leaf_regs_used returns true, we hope that the subsequent
5731 optimization passes won't cause non-leaf registers to pop up. For
5732 example, the regrename pass has special provisions to not rename to
5733 non-leaf registers in a leaf function. */
5734 sparc_leaf_function_p
5735 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5736
5737 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5738
5739 if (flag_stack_usage_info)
5740 current_function_static_stack_size = size;
5741
5742 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5743 || flag_stack_clash_protection)
5744 {
5745 if (crtl->is_leaf && !cfun->calls_alloca)
5746 {
5747 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5748 sparc_emit_probe_stack_range (get_stack_check_protect (),
5749 size - get_stack_check_protect ());
5750 }
5751 else if (size > 0)
5752 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5753 }
5754
5755 if (size == 0)
5756 ; /* do nothing. */
5757 else if (sparc_leaf_function_p)
5758 {
5759 rtx size_int_rtx = GEN_INT (-size);
5760
5761 if (size <= 4096)
5762 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5763 else if (size <= 8192)
5764 {
5765 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5766 RTX_FRAME_RELATED_P (insn) = 1;
5767
5768 /* %sp is still the CFA register. */
5769 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5770 }
5771 else
5772 {
5773 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5774 emit_move_insn (size_rtx, size_int_rtx);
5775 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5776 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5777 gen_stack_pointer_inc (size_int_rtx));
5778 }
5779
5780 RTX_FRAME_RELATED_P (insn) = 1;
5781 }
5782 else
5783 {
5784 rtx size_int_rtx = GEN_INT (-size);
5785
5786 if (size <= 4096)
5787 emit_window_save (size_int_rtx);
5788 else if (size <= 8192)
5789 {
5790 emit_window_save (GEN_INT (-4096));
5791
5792 /* %sp is not the CFA register anymore. */
5793 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5794
5795 /* Make sure no %fp-based store is issued until after the frame is
5796 established. The offset between the frame pointer and the stack
5797 pointer is calculated relative to the value of the stack pointer
5798 at the end of the function prologue, and moving instructions that
5799 access the stack via the frame pointer between the instructions
5800 that decrement the stack pointer could result in accessing the
5801 register window save area, which is volatile. */
5802 emit_insn (gen_frame_blockage ());
5803 }
5804 else
5805 {
5806 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5807 emit_move_insn (size_rtx, size_int_rtx);
5808 emit_window_save (size_rtx);
5809 }
5810 }
5811
5812 if (sparc_leaf_function_p)
5813 {
5814 sparc_frame_base_reg = stack_pointer_rtx;
5815 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5816 }
5817 else
5818 {
5819 sparc_frame_base_reg = hard_frame_pointer_rtx;
5820 sparc_frame_base_offset = SPARC_STACK_BIAS;
5821 }
5822
5823 if (sparc_n_global_fp_regs > 0)
5824 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5825 sparc_frame_base_offset
5826 - sparc_apparent_frame_size,
5827 SORR_SAVE);
5828
5829 /* Load the GOT register if needed. */
5830 if (crtl->uses_pic_offset_table)
5831 load_got_register ();
5832
5833 /* Advertise that the data calculated just above are now valid. */
5834 sparc_prologue_data_valid_p = true;
5835 }
5836
5837 /* Expand the function prologue. The prologue is responsible for reserving
5838 storage for the frame, saving the call-saved registers and loading the
5839 GOT register if needed. */
5840
5841 void
5842 sparc_flat_expand_prologue (void)
5843 {
5844 HOST_WIDE_INT size;
5845 rtx_insn *insn;
5846
5847 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5848
5849 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5850
5851 if (flag_stack_usage_info)
5852 current_function_static_stack_size = size;
5853
5854 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5855 || flag_stack_clash_protection)
5856 {
5857 if (crtl->is_leaf && !cfun->calls_alloca)
5858 {
5859 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5860 sparc_emit_probe_stack_range (get_stack_check_protect (),
5861 size - get_stack_check_protect ());
5862 }
5863 else if (size > 0)
5864 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5865 }
5866
5867 if (sparc_save_local_in_regs_p)
5868 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5869 SORR_SAVE);
5870
5871 if (size == 0)
5872 ; /* do nothing. */
5873 else
5874 {
5875 rtx size_int_rtx, size_rtx;
5876
5877 size_rtx = size_int_rtx = GEN_INT (-size);
5878
5879 /* We establish the frame (i.e. decrement the stack pointer) first, even
5880 if we use a frame pointer, because we cannot clobber any call-saved
5881 registers, including the frame pointer, if we haven't created a new
5882 register save area, for the sake of compatibility with the ABI. */
5883 if (size <= 4096)
5884 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5885 else if (size <= 8192 && !frame_pointer_needed)
5886 {
5887 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5888 RTX_FRAME_RELATED_P (insn) = 1;
5889 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5890 }
5891 else
5892 {
5893 size_rtx = gen_rtx_REG (Pmode, 1);
5894 emit_move_insn (size_rtx, size_int_rtx);
5895 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5896 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5897 gen_stack_pointer_inc (size_int_rtx));
5898 }
5899 RTX_FRAME_RELATED_P (insn) = 1;
5900
5901 /* Ensure nothing is scheduled until after the frame is established. */
5902 emit_insn (gen_blockage ());
5903
5904 if (frame_pointer_needed)
5905 {
5906 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5907 gen_rtx_MINUS (Pmode,
5908 stack_pointer_rtx,
5909 size_rtx)));
5910 RTX_FRAME_RELATED_P (insn) = 1;
5911
5912 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5913 gen_rtx_SET (hard_frame_pointer_rtx,
5914 plus_constant (Pmode, stack_pointer_rtx,
5915 size)));
5916 }
5917
5918 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5919 {
5920 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5921 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5922
5923 insn = emit_move_insn (i7, o7);
5924 RTX_FRAME_RELATED_P (insn) = 1;
5925
5926 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5927
5928 /* Prevent this instruction from ever being considered dead,
5929 even if this function has no epilogue. */
5930 emit_use (i7);
5931 }
5932 }
5933
5934 if (frame_pointer_needed)
5935 {
5936 sparc_frame_base_reg = hard_frame_pointer_rtx;
5937 sparc_frame_base_offset = SPARC_STACK_BIAS;
5938 }
5939 else
5940 {
5941 sparc_frame_base_reg = stack_pointer_rtx;
5942 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5943 }
5944
5945 if (sparc_n_global_fp_regs > 0)
5946 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5947 sparc_frame_base_offset
5948 - sparc_apparent_frame_size,
5949 SORR_SAVE);
5950
5951 /* Load the GOT register if needed. */
5952 if (crtl->uses_pic_offset_table)
5953 load_got_register ();
5954
5955 /* Advertise that the data calculated just above are now valid. */
5956 sparc_prologue_data_valid_p = true;
5957 }
5958
5959 /* This function generates the assembly code for function entry, which boils
5960 down to emitting the necessary .register directives. */
5961
5962 static void
5963 sparc_asm_function_prologue (FILE *file)
5964 {
5965 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5966 if (!TARGET_FLAT)
5967 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5968
5969 sparc_output_scratch_registers (file);
5970 }
5971
5972 /* Expand the function epilogue, either normal or part of a sibcall.
5973 We emit all the instructions except the return or the call. */
5974
5975 void
5976 sparc_expand_epilogue (bool for_eh)
5977 {
5978 HOST_WIDE_INT size = sparc_frame_size;
5979
5980 if (cfun->calls_alloca)
5981 emit_insn (gen_frame_blockage ());
5982
5983 if (sparc_n_global_fp_regs > 0)
5984 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5985 sparc_frame_base_offset
5986 - sparc_apparent_frame_size,
5987 SORR_RESTORE);
5988
5989 if (size == 0 || for_eh)
5990 ; /* do nothing. */
5991 else if (sparc_leaf_function_p)
5992 {
5993 if (size <= 4096)
5994 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5995 else if (size <= 8192)
5996 {
5997 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5998 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5999 }
6000 else
6001 {
6002 rtx reg = gen_rtx_REG (Pmode, 1);
6003 emit_move_insn (reg, GEN_INT (size));
6004 emit_insn (gen_stack_pointer_inc (reg));
6005 }
6006 }
6007 }
6008
6009 /* Expand the function epilogue, either normal or part of a sibcall.
6010 We emit all the instructions except the return or the call. */
6011
6012 void
6013 sparc_flat_expand_epilogue (bool for_eh)
6014 {
6015 HOST_WIDE_INT size = sparc_frame_size;
6016
6017 if (sparc_n_global_fp_regs > 0)
6018 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6019 sparc_frame_base_offset
6020 - sparc_apparent_frame_size,
6021 SORR_RESTORE);
6022
6023 /* If we have a frame pointer, we'll need both to restore it before the
6024 frame is destroyed and use its current value in destroying the frame.
6025 Since we don't have an atomic way to do that in the flat window model,
6026 we save the current value into a temporary register (%g1). */
6027 if (frame_pointer_needed && !for_eh)
6028 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6029
6030 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6031 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6032 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6033
6034 if (sparc_save_local_in_regs_p)
6035 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6036 sparc_frame_base_offset,
6037 SORR_RESTORE);
6038
6039 if (size == 0 || for_eh)
6040 ; /* do nothing. */
6041 else if (frame_pointer_needed)
6042 {
6043 /* Make sure the frame is destroyed after everything else is done. */
6044 emit_insn (gen_blockage ());
6045
6046 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6047 }
6048 else
6049 {
6050 /* Likewise. */
6051 emit_insn (gen_blockage ());
6052
6053 if (size <= 4096)
6054 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6055 else if (size <= 8192)
6056 {
6057 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6058 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6059 }
6060 else
6061 {
6062 rtx reg = gen_rtx_REG (Pmode, 1);
6063 emit_move_insn (reg, GEN_INT (size));
6064 emit_insn (gen_stack_pointer_inc (reg));
6065 }
6066 }
6067 }
6068
6069 /* Return true if it is appropriate to emit `return' instructions in the
6070 body of a function. */
6071
6072 bool
6073 sparc_can_use_return_insn_p (void)
6074 {
6075 return sparc_prologue_data_valid_p
6076 && sparc_n_global_fp_regs == 0
6077 && TARGET_FLAT
6078 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6079 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6080 }
6081
6082 /* This function generates the assembly code for function exit. */
6083
6084 static void
6085 sparc_asm_function_epilogue (FILE *file)
6086 {
6087 /* If the last two instructions of a function are "call foo; dslot;"
6088 the return address might point to the first instruction in the next
6089 function and we have to output a dummy nop for the sake of sane
6090 backtraces in such cases. This is pointless for sibling calls since
6091 the return address is explicitly adjusted. */
6092
6093 rtx_insn *insn = get_last_insn ();
6094
6095 rtx last_real_insn = prev_real_insn (insn);
6096 if (last_real_insn
6097 && NONJUMP_INSN_P (last_real_insn)
6098 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6099 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6100
6101 if (last_real_insn
6102 && CALL_P (last_real_insn)
6103 && !SIBLING_CALL_P (last_real_insn))
6104 fputs("\tnop\n", file);
6105
6106 sparc_output_deferred_case_vectors ();
6107 }
6108
6109 /* Output a 'restore' instruction. */
6110
6111 static void
6112 output_restore (rtx pat)
6113 {
6114 rtx operands[3];
6115
6116 if (! pat)
6117 {
6118 fputs ("\t restore\n", asm_out_file);
6119 return;
6120 }
6121
6122 gcc_assert (GET_CODE (pat) == SET);
6123
6124 operands[0] = SET_DEST (pat);
6125 pat = SET_SRC (pat);
6126
6127 switch (GET_CODE (pat))
6128 {
6129 case PLUS:
6130 operands[1] = XEXP (pat, 0);
6131 operands[2] = XEXP (pat, 1);
6132 output_asm_insn (" restore %r1, %2, %Y0", operands);
6133 break;
6134 case LO_SUM:
6135 operands[1] = XEXP (pat, 0);
6136 operands[2] = XEXP (pat, 1);
6137 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6138 break;
6139 case ASHIFT:
6140 operands[1] = XEXP (pat, 0);
6141 gcc_assert (XEXP (pat, 1) == const1_rtx);
6142 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6143 break;
6144 default:
6145 operands[1] = pat;
6146 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6147 break;
6148 }
6149 }
6150
6151 /* Output a return. */
6152
6153 const char *
6154 output_return (rtx_insn *insn)
6155 {
6156 if (crtl->calls_eh_return)
6157 {
6158 /* If the function uses __builtin_eh_return, the eh_return
6159 machinery occupies the delay slot. */
6160 gcc_assert (!final_sequence);
6161
6162 if (flag_delayed_branch)
6163 {
6164 if (!TARGET_FLAT && TARGET_V9)
6165 fputs ("\treturn\t%i7+8\n", asm_out_file);
6166 else
6167 {
6168 if (!TARGET_FLAT)
6169 fputs ("\trestore\n", asm_out_file);
6170
6171 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6172 }
6173
6174 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6175 }
6176 else
6177 {
6178 if (!TARGET_FLAT)
6179 fputs ("\trestore\n", asm_out_file);
6180
6181 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6182 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6183 }
6184 }
6185 else if (sparc_leaf_function_p || TARGET_FLAT)
6186 {
6187 /* This is a leaf or flat function so we don't have to bother restoring
6188 the register window, which frees us from dealing with the convoluted
6189 semantics of restore/return. We simply output the jump to the
6190 return address and the insn in the delay slot (if any). */
6191
6192 return "jmp\t%%o7+%)%#";
6193 }
6194 else
6195 {
6196 /* This is a regular function so we have to restore the register window.
6197 We may have a pending insn for the delay slot, which will be either
6198 combined with the 'restore' instruction or put in the delay slot of
6199 the 'return' instruction. */
6200
6201 if (final_sequence)
6202 {
6203 rtx_insn *delay;
6204 rtx pat;
6205 int seen;
6206
6207 delay = NEXT_INSN (insn);
6208 gcc_assert (delay);
6209
6210 pat = PATTERN (delay);
6211
6212 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6213 {
6214 epilogue_renumber (&pat, 0);
6215 return "return\t%%i7+%)%#";
6216 }
6217 else
6218 {
6219 output_asm_insn ("jmp\t%%i7+%)", NULL);
6220
6221 /* We're going to output the insn in the delay slot manually.
6222 Make sure to output its source location first. */
6223 PATTERN (delay) = gen_blockage ();
6224 INSN_CODE (delay) = -1;
6225 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6226 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6227
6228 output_restore (pat);
6229 }
6230 }
6231 else
6232 {
6233 /* The delay slot is empty. */
6234 if (TARGET_V9)
6235 return "return\t%%i7+%)\n\t nop";
6236 else if (flag_delayed_branch)
6237 return "jmp\t%%i7+%)\n\t restore";
6238 else
6239 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6240 }
6241 }
6242
6243 return "";
6244 }
6245
6246 /* Output a sibling call. */
6247
6248 const char *
6249 output_sibcall (rtx_insn *insn, rtx call_operand)
6250 {
6251 rtx operands[1];
6252
6253 gcc_assert (flag_delayed_branch);
6254
6255 operands[0] = call_operand;
6256
6257 if (sparc_leaf_function_p || TARGET_FLAT)
6258 {
6259 /* This is a leaf or flat function so we don't have to bother restoring
6260 the register window. We simply output the jump to the function and
6261 the insn in the delay slot (if any). */
6262
6263 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6264
6265 if (final_sequence)
6266 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6267 operands);
6268 else
6269 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6270 it into branch if possible. */
6271 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6272 operands);
6273 }
6274 else
6275 {
6276 /* This is a regular function so we have to restore the register window.
6277 We may have a pending insn for the delay slot, which will be combined
6278 with the 'restore' instruction. */
6279
6280 output_asm_insn ("call\t%a0, 0", operands);
6281
6282 if (final_sequence)
6283 {
6284 rtx_insn *delay;
6285 rtx pat;
6286 int seen;
6287
6288 delay = NEXT_INSN (insn);
6289 gcc_assert (delay);
6290
6291 pat = PATTERN (delay);
6292
6293 /* We're going to output the insn in the delay slot manually.
6294 Make sure to output its source location first. */
6295 PATTERN (delay) = gen_blockage ();
6296 INSN_CODE (delay) = -1;
6297 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6298 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6299
6300 output_restore (pat);
6301 }
6302 else
6303 output_restore (NULL_RTX);
6304 }
6305
6306 return "";
6307 }
6308 \f
6309 /* Functions for handling argument passing.
6310
6311 For 32-bit, the first 6 args are normally in registers and the rest are
6312 pushed. Any arg that starts within the first 6 words is at least
6313 partially passed in a register unless its data type forbids.
6314
6315 For 64-bit, the argument registers are laid out as an array of 16 elements
6316 and arguments are added sequentially. The first 6 int args and up to the
6317 first 16 fp args (depending on size) are passed in regs.
6318
6319 Slot Stack Integral Float Float in structure Double Long Double
6320 ---- ----- -------- ----- ------------------ ------ -----------
6321 15 [SP+248] %f31 %f30,%f31 %d30
6322 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6323 13 [SP+232] %f27 %f26,%f27 %d26
6324 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6325 11 [SP+216] %f23 %f22,%f23 %d22
6326 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6327 9 [SP+200] %f19 %f18,%f19 %d18
6328 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6329 7 [SP+184] %f15 %f14,%f15 %d14
6330 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6331 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6332 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6333 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6334 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6335 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6336 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6337
6338 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6339
6340 Integral arguments are always passed as 64-bit quantities appropriately
6341 extended.
6342
6343 Passing of floating point values is handled as follows.
6344 If a prototype is in scope:
6345 If the value is in a named argument (i.e. not a stdarg function or a
6346 value not part of the `...') then the value is passed in the appropriate
6347 fp reg.
6348 If the value is part of the `...' and is passed in one of the first 6
6349 slots then the value is passed in the appropriate int reg.
6350 If the value is part of the `...' and is not passed in one of the first 6
6351 slots then the value is passed in memory.
6352 If a prototype is not in scope:
6353 If the value is one of the first 6 arguments the value is passed in the
6354 appropriate integer reg and the appropriate fp reg.
6355 If the value is not one of the first 6 arguments the value is passed in
6356 the appropriate fp reg and in memory.
6357
6358
6359 Summary of the calling conventions implemented by GCC on the SPARC:
6360
6361 32-bit ABI:
6362 size argument return value
6363
6364 small integer <4 int. reg. int. reg.
6365 word 4 int. reg. int. reg.
6366 double word 8 int. reg. int. reg.
6367
6368 _Complex small integer <8 int. reg. int. reg.
6369 _Complex word 8 int. reg. int. reg.
6370 _Complex double word 16 memory int. reg.
6371
6372 vector integer <=8 int. reg. FP reg.
6373 vector integer >8 memory memory
6374
6375 float 4 int. reg. FP reg.
6376 double 8 int. reg. FP reg.
6377 long double 16 memory memory
6378
6379 _Complex float 8 memory FP reg.
6380 _Complex double 16 memory FP reg.
6381 _Complex long double 32 memory FP reg.
6382
6383 vector float any memory memory
6384
6385 aggregate any memory memory
6386
6387
6388
6389 64-bit ABI:
6390 size argument return value
6391
6392 small integer <8 int. reg. int. reg.
6393 word 8 int. reg. int. reg.
6394 double word 16 int. reg. int. reg.
6395
6396 _Complex small integer <16 int. reg. int. reg.
6397 _Complex word 16 int. reg. int. reg.
6398 _Complex double word 32 memory int. reg.
6399
6400 vector integer <=16 FP reg. FP reg.
6401 vector integer 16<s<=32 memory FP reg.
6402 vector integer >32 memory memory
6403
6404 float 4 FP reg. FP reg.
6405 double 8 FP reg. FP reg.
6406 long double 16 FP reg. FP reg.
6407
6408 _Complex float 8 FP reg. FP reg.
6409 _Complex double 16 FP reg. FP reg.
6410 _Complex long double 32 memory FP reg.
6411
6412 vector float <=16 FP reg. FP reg.
6413 vector float 16<s<=32 memory FP reg.
6414 vector float >32 memory memory
6415
6416 aggregate <=16 reg. reg.
6417 aggregate 16<s<=32 memory reg.
6418 aggregate >32 memory memory
6419
6420
6421
6422 Note #1: complex floating-point types follow the extended SPARC ABIs as
6423 implemented by the Sun compiler.
6424
6425 Note #2: integral vector types follow the scalar floating-point types
6426 conventions to match what is implemented by the Sun VIS SDK.
6427
6428 Note #3: floating-point vector types follow the aggregate types
6429 conventions. */
6430
6431
6432 /* Maximum number of int regs for args. */
6433 #define SPARC_INT_ARG_MAX 6
6434 /* Maximum number of fp regs for args. */
6435 #define SPARC_FP_ARG_MAX 16
6436 /* Number of words (partially) occupied for a given size in units. */
6437 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6438
6439 /* Handle the INIT_CUMULATIVE_ARGS macro.
6440 Initialize a variable CUM of type CUMULATIVE_ARGS
6441 for a call to a function whose data type is FNTYPE.
6442 For a library call, FNTYPE is 0. */
6443
6444 void
6445 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6446 {
6447 cum->words = 0;
6448 cum->prototype_p = fntype && prototype_p (fntype);
6449 cum->libcall_p = !fntype;
6450 }
6451
6452 /* Handle promotion of pointer and integer arguments. */
6453
6454 static machine_mode
6455 sparc_promote_function_mode (const_tree type, machine_mode mode,
6456 int *punsignedp, const_tree, int)
6457 {
6458 if (type && POINTER_TYPE_P (type))
6459 {
6460 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6461 return Pmode;
6462 }
6463
6464 /* Integral arguments are passed as full words, as per the ABI. */
6465 if (GET_MODE_CLASS (mode) == MODE_INT
6466 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6467 return word_mode;
6468
6469 return mode;
6470 }
6471
6472 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6473
6474 static bool
6475 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6476 {
6477 return TARGET_ARCH64 ? true : false;
6478 }
6479
6480 /* Traverse the record TYPE recursively and call FUNC on its fields.
6481 NAMED is true if this is for a named parameter. DATA is passed
6482 to FUNC for each field. OFFSET is the starting position and
6483 PACKED is true if we are inside a packed record. */
6484
6485 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6486 static void
6487 traverse_record_type (const_tree type, bool named, T *data,
6488 HOST_WIDE_INT offset = 0, bool packed = false)
6489 {
6490 /* The ABI obviously doesn't specify how packed structures are passed.
6491 These are passed in integer regs if possible, otherwise memory. */
6492 if (!packed)
6493 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6494 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6495 {
6496 packed = true;
6497 break;
6498 }
6499
6500 /* Walk the real fields, but skip those with no size or a zero size.
6501 ??? Fields with variable offset are handled as having zero offset. */
6502 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6503 if (TREE_CODE (field) == FIELD_DECL)
6504 {
6505 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6506 continue;
6507
6508 HOST_WIDE_INT bitpos = offset;
6509 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6510 bitpos += int_bit_position (field);
6511
6512 tree field_type = TREE_TYPE (field);
6513 if (TREE_CODE (field_type) == RECORD_TYPE)
6514 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6515 packed);
6516 else
6517 {
6518 const bool fp_type
6519 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6520 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6521 data);
6522 }
6523 }
6524 }
6525
6526 /* Handle recursive register classifying for structure layout. */
6527
6528 typedef struct
6529 {
6530 bool fp_regs; /* true if field eligible to FP registers. */
6531 bool fp_regs_in_first_word; /* true if such field in first word. */
6532 } classify_data_t;
6533
6534 /* A subroutine of function_arg_slotno. Classify the field. */
6535
6536 inline void
6537 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6538 classify_data_t *data)
6539 {
6540 if (fp)
6541 {
6542 data->fp_regs = true;
6543 if (bitpos < BITS_PER_WORD)
6544 data->fp_regs_in_first_word = true;
6545 }
6546 }
6547
6548 /* Compute the slot number to pass an argument in.
6549 Return the slot number or -1 if passing on the stack.
6550
6551 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6552 the preceding args and about the function being called.
6553 MODE is the argument's machine mode.
6554 TYPE is the data type of the argument (as a tree).
6555 This is null for libcalls where that information may
6556 not be available.
6557 NAMED is nonzero if this argument is a named parameter
6558 (otherwise it is an extra parameter matching an ellipsis).
6559 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6560 *PREGNO records the register number to use if scalar type.
6561 *PPADDING records the amount of padding needed in words. */
6562
6563 static int
6564 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6565 const_tree type, bool named, bool incoming,
6566 int *pregno, int *ppadding)
6567 {
6568 int regbase = (incoming
6569 ? SPARC_INCOMING_INT_ARG_FIRST
6570 : SPARC_OUTGOING_INT_ARG_FIRST);
6571 int slotno = cum->words;
6572 enum mode_class mclass;
6573 int regno;
6574
6575 *ppadding = 0;
6576
6577 if (type && TREE_ADDRESSABLE (type))
6578 return -1;
6579
6580 if (TARGET_ARCH32
6581 && mode == BLKmode
6582 && type
6583 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6584 return -1;
6585
6586 /* For SPARC64, objects requiring 16-byte alignment get it. */
6587 if (TARGET_ARCH64
6588 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6589 && (slotno & 1) != 0)
6590 slotno++, *ppadding = 1;
6591
6592 mclass = GET_MODE_CLASS (mode);
6593 if (type && TREE_CODE (type) == VECTOR_TYPE)
6594 {
6595 /* Vector types deserve special treatment because they are
6596 polymorphic wrt their mode, depending upon whether VIS
6597 instructions are enabled. */
6598 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6599 {
6600 /* The SPARC port defines no floating-point vector modes. */
6601 gcc_assert (mode == BLKmode);
6602 }
6603 else
6604 {
6605 /* Integral vector types should either have a vector
6606 mode or an integral mode, because we are guaranteed
6607 by pass_by_reference that their size is not greater
6608 than 16 bytes and TImode is 16-byte wide. */
6609 gcc_assert (mode != BLKmode);
6610
6611 /* Vector integers are handled like floats according to
6612 the Sun VIS SDK. */
6613 mclass = MODE_FLOAT;
6614 }
6615 }
6616
6617 switch (mclass)
6618 {
6619 case MODE_FLOAT:
6620 case MODE_COMPLEX_FLOAT:
6621 case MODE_VECTOR_INT:
6622 if (TARGET_ARCH64 && TARGET_FPU && named)
6623 {
6624 /* If all arg slots are filled, then must pass on stack. */
6625 if (slotno >= SPARC_FP_ARG_MAX)
6626 return -1;
6627
6628 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6629 /* Arguments filling only one single FP register are
6630 right-justified in the outer double FP register. */
6631 if (GET_MODE_SIZE (mode) <= 4)
6632 regno++;
6633 break;
6634 }
6635 /* fallthrough */
6636
6637 case MODE_INT:
6638 case MODE_COMPLEX_INT:
6639 /* If all arg slots are filled, then must pass on stack. */
6640 if (slotno >= SPARC_INT_ARG_MAX)
6641 return -1;
6642
6643 regno = regbase + slotno;
6644 break;
6645
6646 case MODE_RANDOM:
6647 if (mode == VOIDmode)
6648 /* MODE is VOIDmode when generating the actual call. */
6649 return -1;
6650
6651 gcc_assert (mode == BLKmode);
6652
6653 if (TARGET_ARCH32
6654 || !type
6655 || (TREE_CODE (type) != RECORD_TYPE
6656 && TREE_CODE (type) != VECTOR_TYPE))
6657 {
6658 /* If all arg slots are filled, then must pass on stack. */
6659 if (slotno >= SPARC_INT_ARG_MAX)
6660 return -1;
6661
6662 regno = regbase + slotno;
6663 }
6664 else /* TARGET_ARCH64 && type */
6665 {
6666 /* If all arg slots are filled, then must pass on stack. */
6667 if (slotno >= SPARC_FP_ARG_MAX)
6668 return -1;
6669
6670 if (TREE_CODE (type) == RECORD_TYPE)
6671 {
6672 classify_data_t data = { false, false };
6673 traverse_record_type<classify_data_t, classify_registers>
6674 (type, named, &data);
6675
6676 if (data.fp_regs)
6677 {
6678 /* If all FP slots are filled except for the last one and
6679 there is no FP field in the first word, then must pass
6680 on stack. */
6681 if (slotno >= SPARC_FP_ARG_MAX - 1
6682 && !data.fp_regs_in_first_word)
6683 return -1;
6684 }
6685 else
6686 {
6687 /* If all int slots are filled, then must pass on stack. */
6688 if (slotno >= SPARC_INT_ARG_MAX)
6689 return -1;
6690 }
6691 }
6692
6693 /* PREGNO isn't set since both int and FP regs can be used. */
6694 return slotno;
6695 }
6696 break;
6697
6698 default :
6699 gcc_unreachable ();
6700 }
6701
6702 *pregno = regno;
6703 return slotno;
6704 }
6705
6706 /* Handle recursive register counting/assigning for structure layout. */
6707
6708 typedef struct
6709 {
6710 int slotno; /* slot number of the argument. */
6711 int regbase; /* regno of the base register. */
6712 int intoffset; /* offset of the first pending integer field. */
6713 int nregs; /* number of words passed in registers. */
6714 bool stack; /* true if part of the argument is on the stack. */
6715 rtx ret; /* return expression being built. */
6716 } assign_data_t;
6717
6718 /* A subroutine of function_arg_record_value. Compute the number of integer
6719 registers to be assigned between PARMS->intoffset and BITPOS. Return
6720 true if at least one integer register is assigned or false otherwise. */
6721
6722 static bool
6723 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6724 {
6725 if (data->intoffset < 0)
6726 return false;
6727
6728 const int intoffset = data->intoffset;
6729 data->intoffset = -1;
6730
6731 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6732 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6733 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6734 int nregs = (endbit - startbit) / BITS_PER_WORD;
6735
6736 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6737 {
6738 nregs = SPARC_INT_ARG_MAX - this_slotno;
6739
6740 /* We need to pass this field (partly) on the stack. */
6741 data->stack = 1;
6742 }
6743
6744 if (nregs <= 0)
6745 return false;
6746
6747 *pnregs = nregs;
6748 return true;
6749 }
6750
6751 /* A subroutine of function_arg_record_value. Compute the number and the mode
6752 of the FP registers to be assigned for FIELD. Return true if at least one
6753 FP register is assigned or false otherwise. */
6754
6755 static bool
6756 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6757 assign_data_t *data,
6758 int *pnregs, machine_mode *pmode)
6759 {
6760 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6761 machine_mode mode = DECL_MODE (field);
6762 int nregs, nslots;
6763
6764 /* Slots are counted as words while regs are counted as having the size of
6765 the (inner) mode. */
6766 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6767 {
6768 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6769 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6770 }
6771 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6772 {
6773 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6774 nregs = 2;
6775 }
6776 else
6777 nregs = 1;
6778
6779 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6780
6781 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6782 {
6783 nslots = SPARC_FP_ARG_MAX - this_slotno;
6784 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6785
6786 /* We need to pass this field (partly) on the stack. */
6787 data->stack = 1;
6788
6789 if (nregs <= 0)
6790 return false;
6791 }
6792
6793 *pnregs = nregs;
6794 *pmode = mode;
6795 return true;
6796 }
6797
6798 /* A subroutine of function_arg_record_value. Count the number of registers
6799 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6800
6801 inline void
6802 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6803 assign_data_t *data)
6804 {
6805 if (fp)
6806 {
6807 int nregs;
6808 machine_mode mode;
6809
6810 if (compute_int_layout (bitpos, data, &nregs))
6811 data->nregs += nregs;
6812
6813 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6814 data->nregs += nregs;
6815 }
6816 else
6817 {
6818 if (data->intoffset < 0)
6819 data->intoffset = bitpos;
6820 }
6821 }
6822
6823 /* A subroutine of function_arg_record_value. Assign the bits of the
6824 structure between PARMS->intoffset and BITPOS to integer registers. */
6825
6826 static void
6827 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6828 {
6829 int intoffset = data->intoffset;
6830 machine_mode mode;
6831 int nregs;
6832
6833 if (!compute_int_layout (bitpos, data, &nregs))
6834 return;
6835
6836 /* If this is the trailing part of a word, only load that much into
6837 the register. Otherwise load the whole register. Note that in
6838 the latter case we may pick up unwanted bits. It's not a problem
6839 at the moment but may wish to revisit. */
6840 if (intoffset % BITS_PER_WORD != 0)
6841 mode = smallest_int_mode_for_size (BITS_PER_WORD
6842 - intoffset % BITS_PER_WORD);
6843 else
6844 mode = word_mode;
6845
6846 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6847 unsigned int regno = data->regbase + this_slotno;
6848 intoffset /= BITS_PER_UNIT;
6849
6850 do
6851 {
6852 rtx reg = gen_rtx_REG (mode, regno);
6853 XVECEXP (data->ret, 0, data->stack + data->nregs)
6854 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6855 data->nregs += 1;
6856 mode = word_mode;
6857 regno += 1;
6858 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6859 }
6860 while (--nregs > 0);
6861 }
6862
6863 /* A subroutine of function_arg_record_value. Assign FIELD at position
6864 BITPOS to FP registers. */
6865
6866 static void
6867 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6868 assign_data_t *data)
6869 {
6870 int nregs;
6871 machine_mode mode;
6872
6873 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6874 return;
6875
6876 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6877 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6878 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6879 regno++;
6880 int pos = bitpos / BITS_PER_UNIT;
6881
6882 do
6883 {
6884 rtx reg = gen_rtx_REG (mode, regno);
6885 XVECEXP (data->ret, 0, data->stack + data->nregs)
6886 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6887 data->nregs += 1;
6888 regno += GET_MODE_SIZE (mode) / 4;
6889 pos += GET_MODE_SIZE (mode);
6890 }
6891 while (--nregs > 0);
6892 }
6893
6894 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6895 the structure between PARMS->intoffset and BITPOS to registers. */
6896
6897 inline void
6898 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6899 assign_data_t *data)
6900 {
6901 if (fp)
6902 {
6903 assign_int_registers (bitpos, data);
6904
6905 assign_fp_registers (field, bitpos, data);
6906 }
6907 else
6908 {
6909 if (data->intoffset < 0)
6910 data->intoffset = bitpos;
6911 }
6912 }
6913
6914 /* Used by function_arg and sparc_function_value_1 to implement the complex
6915 conventions of the 64-bit ABI for passing and returning structures.
6916 Return an expression valid as a return value for the FUNCTION_ARG
6917 and TARGET_FUNCTION_VALUE.
6918
6919 TYPE is the data type of the argument (as a tree).
6920 This is null for libcalls where that information may
6921 not be available.
6922 MODE is the argument's machine mode.
6923 SLOTNO is the index number of the argument's slot in the parameter array.
6924 NAMED is true if this argument is a named parameter
6925 (otherwise it is an extra parameter matching an ellipsis).
6926 REGBASE is the regno of the base register for the parameter array. */
6927
6928 static rtx
6929 function_arg_record_value (const_tree type, machine_mode mode,
6930 int slotno, bool named, int regbase)
6931 {
6932 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6933 assign_data_t data;
6934 int nregs;
6935
6936 data.slotno = slotno;
6937 data.regbase = regbase;
6938
6939 /* Count how many registers we need. */
6940 data.nregs = 0;
6941 data.intoffset = 0;
6942 data.stack = false;
6943 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6944
6945 /* Take into account pending integer fields. */
6946 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6947 data.nregs += nregs;
6948
6949 /* Allocate the vector and handle some annoying special cases. */
6950 nregs = data.nregs;
6951
6952 if (nregs == 0)
6953 {
6954 /* ??? Empty structure has no value? Duh? */
6955 if (typesize <= 0)
6956 {
6957 /* Though there's nothing really to store, return a word register
6958 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6959 leads to breakage due to the fact that there are zero bytes to
6960 load. */
6961 return gen_rtx_REG (mode, regbase);
6962 }
6963
6964 /* ??? C++ has structures with no fields, and yet a size. Give up
6965 for now and pass everything back in integer registers. */
6966 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6967 if (nregs + slotno > SPARC_INT_ARG_MAX)
6968 nregs = SPARC_INT_ARG_MAX - slotno;
6969 }
6970
6971 gcc_assert (nregs > 0);
6972
6973 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6974
6975 /* If at least one field must be passed on the stack, generate
6976 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6977 also be passed on the stack. We can't do much better because the
6978 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6979 of structures for which the fields passed exclusively in registers
6980 are not at the beginning of the structure. */
6981 if (data.stack)
6982 XVECEXP (data.ret, 0, 0)
6983 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6984
6985 /* Assign the registers. */
6986 data.nregs = 0;
6987 data.intoffset = 0;
6988 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6989
6990 /* Assign pending integer fields. */
6991 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6992
6993 gcc_assert (data.nregs == nregs);
6994
6995 return data.ret;
6996 }
6997
6998 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6999 of the 64-bit ABI for passing and returning unions.
7000 Return an expression valid as a return value for the FUNCTION_ARG
7001 and TARGET_FUNCTION_VALUE.
7002
7003 SIZE is the size in bytes of the union.
7004 MODE is the argument's machine mode.
7005 REGNO is the hard register the union will be passed in. */
7006
7007 static rtx
7008 function_arg_union_value (int size, machine_mode mode, int slotno,
7009 int regno)
7010 {
7011 int nwords = CEIL_NWORDS (size), i;
7012 rtx regs;
7013
7014 /* See comment in previous function for empty structures. */
7015 if (nwords == 0)
7016 return gen_rtx_REG (mode, regno);
7017
7018 if (slotno == SPARC_INT_ARG_MAX - 1)
7019 nwords = 1;
7020
7021 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7022
7023 for (i = 0; i < nwords; i++)
7024 {
7025 /* Unions are passed left-justified. */
7026 XVECEXP (regs, 0, i)
7027 = gen_rtx_EXPR_LIST (VOIDmode,
7028 gen_rtx_REG (word_mode, regno),
7029 GEN_INT (UNITS_PER_WORD * i));
7030 regno++;
7031 }
7032
7033 return regs;
7034 }
7035
7036 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7037 for passing and returning BLKmode vectors.
7038 Return an expression valid as a return value for the FUNCTION_ARG
7039 and TARGET_FUNCTION_VALUE.
7040
7041 SIZE is the size in bytes of the vector.
7042 REGNO is the FP hard register the vector will be passed in. */
7043
7044 static rtx
7045 function_arg_vector_value (int size, int regno)
7046 {
7047 const int nregs = MAX (1, size / 8);
7048 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7049
7050 if (size < 8)
7051 XVECEXP (regs, 0, 0)
7052 = gen_rtx_EXPR_LIST (VOIDmode,
7053 gen_rtx_REG (SImode, regno),
7054 const0_rtx);
7055 else
7056 for (int i = 0; i < nregs; i++)
7057 XVECEXP (regs, 0, i)
7058 = gen_rtx_EXPR_LIST (VOIDmode,
7059 gen_rtx_REG (DImode, regno + 2*i),
7060 GEN_INT (i*8));
7061
7062 return regs;
7063 }
7064
7065 /* Determine where to put an argument to a function.
7066 Value is zero to push the argument on the stack,
7067 or a hard register in which to store the argument.
7068
7069 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7070 the preceding args and about the function being called.
7071 MODE is the argument's machine mode.
7072 TYPE is the data type of the argument (as a tree).
7073 This is null for libcalls where that information may
7074 not be available.
7075 NAMED is true if this argument is a named parameter
7076 (otherwise it is an extra parameter matching an ellipsis).
7077 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7078 TARGET_FUNCTION_INCOMING_ARG. */
7079
7080 static rtx
7081 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7082 const_tree type, bool named, bool incoming)
7083 {
7084 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7085
7086 int regbase = (incoming
7087 ? SPARC_INCOMING_INT_ARG_FIRST
7088 : SPARC_OUTGOING_INT_ARG_FIRST);
7089 int slotno, regno, padding;
7090 enum mode_class mclass = GET_MODE_CLASS (mode);
7091
7092 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7093 &regno, &padding);
7094 if (slotno == -1)
7095 return 0;
7096
7097 /* Vector types deserve special treatment because they are polymorphic wrt
7098 their mode, depending upon whether VIS instructions are enabled. */
7099 if (type && TREE_CODE (type) == VECTOR_TYPE)
7100 {
7101 HOST_WIDE_INT size = int_size_in_bytes (type);
7102 gcc_assert ((TARGET_ARCH32 && size <= 8)
7103 || (TARGET_ARCH64 && size <= 16));
7104
7105 if (mode == BLKmode)
7106 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7107
7108 mclass = MODE_FLOAT;
7109 }
7110
7111 if (TARGET_ARCH32)
7112 return gen_rtx_REG (mode, regno);
7113
7114 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7115 and are promoted to registers if possible. */
7116 if (type && TREE_CODE (type) == RECORD_TYPE)
7117 {
7118 HOST_WIDE_INT size = int_size_in_bytes (type);
7119 gcc_assert (size <= 16);
7120
7121 return function_arg_record_value (type, mode, slotno, named, regbase);
7122 }
7123
7124 /* Unions up to 16 bytes in size are passed in integer registers. */
7125 else if (type && TREE_CODE (type) == UNION_TYPE)
7126 {
7127 HOST_WIDE_INT size = int_size_in_bytes (type);
7128 gcc_assert (size <= 16);
7129
7130 return function_arg_union_value (size, mode, slotno, regno);
7131 }
7132
7133 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7134 but also have the slot allocated for them.
7135 If no prototype is in scope fp values in register slots get passed
7136 in two places, either fp regs and int regs or fp regs and memory. */
7137 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7138 && SPARC_FP_REG_P (regno))
7139 {
7140 rtx reg = gen_rtx_REG (mode, regno);
7141 if (cum->prototype_p || cum->libcall_p)
7142 return reg;
7143 else
7144 {
7145 rtx v0, v1;
7146
7147 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7148 {
7149 int intreg;
7150
7151 /* On incoming, we don't need to know that the value
7152 is passed in %f0 and %i0, and it confuses other parts
7153 causing needless spillage even on the simplest cases. */
7154 if (incoming)
7155 return reg;
7156
7157 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7158 + (regno - SPARC_FP_ARG_FIRST) / 2);
7159
7160 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7161 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7162 const0_rtx);
7163 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7164 }
7165 else
7166 {
7167 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7168 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7169 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7170 }
7171 }
7172 }
7173
7174 /* All other aggregate types are passed in an integer register in a mode
7175 corresponding to the size of the type. */
7176 else if (type && AGGREGATE_TYPE_P (type))
7177 {
7178 HOST_WIDE_INT size = int_size_in_bytes (type);
7179 gcc_assert (size <= 16);
7180
7181 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7182 }
7183
7184 return gen_rtx_REG (mode, regno);
7185 }
7186
7187 /* Handle the TARGET_FUNCTION_ARG target hook. */
7188
7189 static rtx
7190 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7191 const_tree type, bool named)
7192 {
7193 return sparc_function_arg_1 (cum, mode, type, named, false);
7194 }
7195
7196 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7197
7198 static rtx
7199 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7200 const_tree type, bool named)
7201 {
7202 return sparc_function_arg_1 (cum, mode, type, named, true);
7203 }
7204
7205 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7206
7207 static unsigned int
7208 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7209 {
7210 return ((TARGET_ARCH64
7211 && (GET_MODE_ALIGNMENT (mode) == 128
7212 || (type && TYPE_ALIGN (type) == 128)))
7213 ? 128
7214 : PARM_BOUNDARY);
7215 }
7216
7217 /* For an arg passed partly in registers and partly in memory,
7218 this is the number of bytes of registers used.
7219 For args passed entirely in registers or entirely in memory, zero.
7220
7221 Any arg that starts in the first 6 regs but won't entirely fit in them
7222 needs partial registers on v8. On v9, structures with integer
7223 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7224 values that begin in the last fp reg [where "last fp reg" varies with the
7225 mode] will be split between that reg and memory. */
7226
7227 static int
7228 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7229 tree type, bool named)
7230 {
7231 int slotno, regno, padding;
7232
7233 /* We pass false for incoming here, it doesn't matter. */
7234 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7235 false, &regno, &padding);
7236
7237 if (slotno == -1)
7238 return 0;
7239
7240 if (TARGET_ARCH32)
7241 {
7242 if ((slotno + (mode == BLKmode
7243 ? CEIL_NWORDS (int_size_in_bytes (type))
7244 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7245 > SPARC_INT_ARG_MAX)
7246 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7247 }
7248 else
7249 {
7250 /* We are guaranteed by pass_by_reference that the size of the
7251 argument is not greater than 16 bytes, so we only need to return
7252 one word if the argument is partially passed in registers. */
7253
7254 if (type && AGGREGATE_TYPE_P (type))
7255 {
7256 int size = int_size_in_bytes (type);
7257
7258 if (size > UNITS_PER_WORD
7259 && (slotno == SPARC_INT_ARG_MAX - 1
7260 || slotno == SPARC_FP_ARG_MAX - 1))
7261 return UNITS_PER_WORD;
7262 }
7263 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7264 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7265 && ! (TARGET_FPU && named)))
7266 {
7267 /* The complex types are passed as packed types. */
7268 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7269 && slotno == SPARC_INT_ARG_MAX - 1)
7270 return UNITS_PER_WORD;
7271 }
7272 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7273 {
7274 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7275 > SPARC_FP_ARG_MAX)
7276 return UNITS_PER_WORD;
7277 }
7278 }
7279
7280 return 0;
7281 }
7282
7283 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7284 Specify whether to pass the argument by reference. */
7285
7286 static bool
7287 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7288 machine_mode mode, const_tree type,
7289 bool named ATTRIBUTE_UNUSED)
7290 {
7291 if (TARGET_ARCH32)
7292 /* Original SPARC 32-bit ABI says that structures and unions,
7293 and quad-precision floats are passed by reference. For Pascal,
7294 also pass arrays by reference. All other base types are passed
7295 in registers.
7296
7297 Extended ABI (as implemented by the Sun compiler) says that all
7298 complex floats are passed by reference. Pass complex integers
7299 in registers up to 8 bytes. More generally, enforce the 2-word
7300 cap for passing arguments in registers.
7301
7302 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7303 integers are passed like floats of the same size, that is in
7304 registers up to 8 bytes. Pass all vector floats by reference
7305 like structure and unions. */
7306 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7307 || mode == SCmode
7308 /* Catch CDImode, TFmode, DCmode and TCmode. */
7309 || GET_MODE_SIZE (mode) > 8
7310 || (type
7311 && TREE_CODE (type) == VECTOR_TYPE
7312 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7313 else
7314 /* Original SPARC 64-bit ABI says that structures and unions
7315 smaller than 16 bytes are passed in registers, as well as
7316 all other base types.
7317
7318 Extended ABI (as implemented by the Sun compiler) says that
7319 complex floats are passed in registers up to 16 bytes. Pass
7320 all complex integers in registers up to 16 bytes. More generally,
7321 enforce the 2-word cap for passing arguments in registers.
7322
7323 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7324 integers are passed like floats of the same size, that is in
7325 registers (up to 16 bytes). Pass all vector floats like structure
7326 and unions. */
7327 return ((type
7328 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7329 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7330 /* Catch CTImode and TCmode. */
7331 || GET_MODE_SIZE (mode) > 16);
7332 }
7333
7334 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7335 Update the data in CUM to advance over an argument
7336 of mode MODE and data type TYPE.
7337 TYPE is null for libcalls where that information may not be available. */
7338
7339 static void
7340 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7341 const_tree type, bool named)
7342 {
7343 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7344 int regno, padding;
7345
7346 /* We pass false for incoming here, it doesn't matter. */
7347 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7348
7349 /* If argument requires leading padding, add it. */
7350 cum->words += padding;
7351
7352 if (TARGET_ARCH32)
7353 cum->words += (mode == BLKmode
7354 ? CEIL_NWORDS (int_size_in_bytes (type))
7355 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7356 else
7357 {
7358 if (type && AGGREGATE_TYPE_P (type))
7359 {
7360 int size = int_size_in_bytes (type);
7361
7362 if (size <= 8)
7363 ++cum->words;
7364 else if (size <= 16)
7365 cum->words += 2;
7366 else /* passed by reference */
7367 ++cum->words;
7368 }
7369 else
7370 cum->words += (mode == BLKmode
7371 ? CEIL_NWORDS (int_size_in_bytes (type))
7372 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7373 }
7374 }
7375
7376 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7377 are always stored left shifted in their argument slot. */
7378
7379 static pad_direction
7380 sparc_function_arg_padding (machine_mode mode, const_tree type)
7381 {
7382 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7383 return PAD_UPWARD;
7384
7385 /* Fall back to the default. */
7386 return default_function_arg_padding (mode, type);
7387 }
7388
7389 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7390 Specify whether to return the return value in memory. */
7391
7392 static bool
7393 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7394 {
7395 if (TARGET_ARCH32)
7396 /* Original SPARC 32-bit ABI says that structures and unions,
7397 and quad-precision floats are returned in memory. All other
7398 base types are returned in registers.
7399
7400 Extended ABI (as implemented by the Sun compiler) says that
7401 all complex floats are returned in registers (8 FP registers
7402 at most for '_Complex long double'). Return all complex integers
7403 in registers (4 at most for '_Complex long long').
7404
7405 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7406 integers are returned like floats of the same size, that is in
7407 registers up to 8 bytes and in memory otherwise. Return all
7408 vector floats in memory like structure and unions; note that
7409 they always have BLKmode like the latter. */
7410 return (TYPE_MODE (type) == BLKmode
7411 || TYPE_MODE (type) == TFmode
7412 || (TREE_CODE (type) == VECTOR_TYPE
7413 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7414 else
7415 /* Original SPARC 64-bit ABI says that structures and unions
7416 smaller than 32 bytes are returned in registers, as well as
7417 all other base types.
7418
7419 Extended ABI (as implemented by the Sun compiler) says that all
7420 complex floats are returned in registers (8 FP registers at most
7421 for '_Complex long double'). Return all complex integers in
7422 registers (4 at most for '_Complex TItype').
7423
7424 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7425 integers are returned like floats of the same size, that is in
7426 registers. Return all vector floats like structure and unions;
7427 note that they always have BLKmode like the latter. */
7428 return (TYPE_MODE (type) == BLKmode
7429 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7430 }
7431
7432 /* Handle the TARGET_STRUCT_VALUE target hook.
7433 Return where to find the structure return value address. */
7434
7435 static rtx
7436 sparc_struct_value_rtx (tree fndecl, int incoming)
7437 {
7438 if (TARGET_ARCH64)
7439 return 0;
7440 else
7441 {
7442 rtx mem;
7443
7444 if (incoming)
7445 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7446 STRUCT_VALUE_OFFSET));
7447 else
7448 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7449 STRUCT_VALUE_OFFSET));
7450
7451 /* Only follow the SPARC ABI for fixed-size structure returns.
7452 Variable size structure returns are handled per the normal
7453 procedures in GCC. This is enabled by -mstd-struct-return */
7454 if (incoming == 2
7455 && sparc_std_struct_return
7456 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7457 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7458 {
7459 /* We must check and adjust the return address, as it is optional
7460 as to whether the return object is really provided. */
7461 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7462 rtx scratch = gen_reg_rtx (SImode);
7463 rtx_code_label *endlab = gen_label_rtx ();
7464
7465 /* Calculate the return object size. */
7466 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7467 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7468 /* Construct a temporary return value. */
7469 rtx temp_val
7470 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7471
7472 /* Implement SPARC 32-bit psABI callee return struct checking:
7473
7474 Fetch the instruction where we will return to and see if
7475 it's an unimp instruction (the most significant 10 bits
7476 will be zero). */
7477 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7478 plus_constant (Pmode,
7479 ret_reg, 8)));
7480 /* Assume the size is valid and pre-adjust. */
7481 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7482 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7483 0, endlab);
7484 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7485 /* Write the address of the memory pointed to by temp_val into
7486 the memory pointed to by mem. */
7487 emit_move_insn (mem, XEXP (temp_val, 0));
7488 emit_label (endlab);
7489 }
7490
7491 return mem;
7492 }
7493 }
7494
7495 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7496 For v9, function return values are subject to the same rules as arguments,
7497 except that up to 32 bytes may be returned in registers. */
7498
7499 static rtx
7500 sparc_function_value_1 (const_tree type, machine_mode mode,
7501 bool outgoing)
7502 {
7503 /* Beware that the two values are swapped here wrt function_arg. */
7504 int regbase = (outgoing
7505 ? SPARC_INCOMING_INT_ARG_FIRST
7506 : SPARC_OUTGOING_INT_ARG_FIRST);
7507 enum mode_class mclass = GET_MODE_CLASS (mode);
7508 int regno;
7509
7510 /* Vector types deserve special treatment because they are polymorphic wrt
7511 their mode, depending upon whether VIS instructions are enabled. */
7512 if (type && TREE_CODE (type) == VECTOR_TYPE)
7513 {
7514 HOST_WIDE_INT size = int_size_in_bytes (type);
7515 gcc_assert ((TARGET_ARCH32 && size <= 8)
7516 || (TARGET_ARCH64 && size <= 32));
7517
7518 if (mode == BLKmode)
7519 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7520
7521 mclass = MODE_FLOAT;
7522 }
7523
7524 if (TARGET_ARCH64 && type)
7525 {
7526 /* Structures up to 32 bytes in size are returned in registers. */
7527 if (TREE_CODE (type) == RECORD_TYPE)
7528 {
7529 HOST_WIDE_INT size = int_size_in_bytes (type);
7530 gcc_assert (size <= 32);
7531
7532 return function_arg_record_value (type, mode, 0, 1, regbase);
7533 }
7534
7535 /* Unions up to 32 bytes in size are returned in integer registers. */
7536 else if (TREE_CODE (type) == UNION_TYPE)
7537 {
7538 HOST_WIDE_INT size = int_size_in_bytes (type);
7539 gcc_assert (size <= 32);
7540
7541 return function_arg_union_value (size, mode, 0, regbase);
7542 }
7543
7544 /* Objects that require it are returned in FP registers. */
7545 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7546 ;
7547
7548 /* All other aggregate types are returned in an integer register in a
7549 mode corresponding to the size of the type. */
7550 else if (AGGREGATE_TYPE_P (type))
7551 {
7552 /* All other aggregate types are passed in an integer register
7553 in a mode corresponding to the size of the type. */
7554 HOST_WIDE_INT size = int_size_in_bytes (type);
7555 gcc_assert (size <= 32);
7556
7557 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7558
7559 /* ??? We probably should have made the same ABI change in
7560 3.4.0 as the one we made for unions. The latter was
7561 required by the SCD though, while the former is not
7562 specified, so we favored compatibility and efficiency.
7563
7564 Now we're stuck for aggregates larger than 16 bytes,
7565 because OImode vanished in the meantime. Let's not
7566 try to be unduly clever, and simply follow the ABI
7567 for unions in that case. */
7568 if (mode == BLKmode)
7569 return function_arg_union_value (size, mode, 0, regbase);
7570 else
7571 mclass = MODE_INT;
7572 }
7573
7574 /* We should only have pointer and integer types at this point. This
7575 must match sparc_promote_function_mode. */
7576 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7577 mode = word_mode;
7578 }
7579
7580 /* We should only have pointer and integer types at this point, except with
7581 -freg-struct-return. This must match sparc_promote_function_mode. */
7582 else if (TARGET_ARCH32
7583 && !(type && AGGREGATE_TYPE_P (type))
7584 && mclass == MODE_INT
7585 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7586 mode = word_mode;
7587
7588 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7589 regno = SPARC_FP_ARG_FIRST;
7590 else
7591 regno = regbase;
7592
7593 return gen_rtx_REG (mode, regno);
7594 }
7595
7596 /* Handle TARGET_FUNCTION_VALUE.
7597 On the SPARC, the value is found in the first "output" register, but the
7598 called function leaves it in the first "input" register. */
7599
7600 static rtx
7601 sparc_function_value (const_tree valtype,
7602 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7603 bool outgoing)
7604 {
7605 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7606 }
7607
7608 /* Handle TARGET_LIBCALL_VALUE. */
7609
7610 static rtx
7611 sparc_libcall_value (machine_mode mode,
7612 const_rtx fun ATTRIBUTE_UNUSED)
7613 {
7614 return sparc_function_value_1 (NULL_TREE, mode, false);
7615 }
7616
7617 /* Handle FUNCTION_VALUE_REGNO_P.
7618 On the SPARC, the first "output" reg is used for integer values, and the
7619 first floating point register is used for floating point values. */
7620
7621 static bool
7622 sparc_function_value_regno_p (const unsigned int regno)
7623 {
7624 return (regno == 8 || (TARGET_FPU && regno == 32));
7625 }
7626
7627 /* Do what is necessary for `va_start'. We look at the current function
7628 to determine if stdarg or varargs is used and return the address of
7629 the first unnamed parameter. */
7630
7631 static rtx
7632 sparc_builtin_saveregs (void)
7633 {
7634 int first_reg = crtl->args.info.words;
7635 rtx address;
7636 int regno;
7637
7638 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7639 emit_move_insn (gen_rtx_MEM (word_mode,
7640 gen_rtx_PLUS (Pmode,
7641 frame_pointer_rtx,
7642 GEN_INT (FIRST_PARM_OFFSET (0)
7643 + (UNITS_PER_WORD
7644 * regno)))),
7645 gen_rtx_REG (word_mode,
7646 SPARC_INCOMING_INT_ARG_FIRST + regno));
7647
7648 address = gen_rtx_PLUS (Pmode,
7649 frame_pointer_rtx,
7650 GEN_INT (FIRST_PARM_OFFSET (0)
7651 + UNITS_PER_WORD * first_reg));
7652
7653 return address;
7654 }
7655
7656 /* Implement `va_start' for stdarg. */
7657
7658 static void
7659 sparc_va_start (tree valist, rtx nextarg)
7660 {
7661 nextarg = expand_builtin_saveregs ();
7662 std_expand_builtin_va_start (valist, nextarg);
7663 }
7664
7665 /* Implement `va_arg' for stdarg. */
7666
7667 static tree
7668 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7669 gimple_seq *post_p)
7670 {
7671 HOST_WIDE_INT size, rsize, align;
7672 tree addr, incr;
7673 bool indirect;
7674 tree ptrtype = build_pointer_type (type);
7675
7676 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7677 {
7678 indirect = true;
7679 size = rsize = UNITS_PER_WORD;
7680 align = 0;
7681 }
7682 else
7683 {
7684 indirect = false;
7685 size = int_size_in_bytes (type);
7686 rsize = ROUND_UP (size, UNITS_PER_WORD);
7687 align = 0;
7688
7689 if (TARGET_ARCH64)
7690 {
7691 /* For SPARC64, objects requiring 16-byte alignment get it. */
7692 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7693 align = 2 * UNITS_PER_WORD;
7694
7695 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7696 are left-justified in their slots. */
7697 if (AGGREGATE_TYPE_P (type))
7698 {
7699 if (size == 0)
7700 size = rsize = UNITS_PER_WORD;
7701 else
7702 size = rsize;
7703 }
7704 }
7705 }
7706
7707 incr = valist;
7708 if (align)
7709 {
7710 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7711 incr = fold_convert (sizetype, incr);
7712 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7713 size_int (-align));
7714 incr = fold_convert (ptr_type_node, incr);
7715 }
7716
7717 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7718 addr = incr;
7719
7720 if (BYTES_BIG_ENDIAN && size < rsize)
7721 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7722
7723 if (indirect)
7724 {
7725 addr = fold_convert (build_pointer_type (ptrtype), addr);
7726 addr = build_va_arg_indirect_ref (addr);
7727 }
7728
7729 /* If the address isn't aligned properly for the type, we need a temporary.
7730 FIXME: This is inefficient, usually we can do this in registers. */
7731 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7732 {
7733 tree tmp = create_tmp_var (type, "va_arg_tmp");
7734 tree dest_addr = build_fold_addr_expr (tmp);
7735 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7736 3, dest_addr, addr, size_int (rsize));
7737 TREE_ADDRESSABLE (tmp) = 1;
7738 gimplify_and_add (copy, pre_p);
7739 addr = dest_addr;
7740 }
7741
7742 else
7743 addr = fold_convert (ptrtype, addr);
7744
7745 incr = fold_build_pointer_plus_hwi (incr, rsize);
7746 gimplify_assign (valist, incr, post_p);
7747
7748 return build_va_arg_indirect_ref (addr);
7749 }
7750 \f
7751 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7752 Specify whether the vector mode is supported by the hardware. */
7753
7754 static bool
7755 sparc_vector_mode_supported_p (machine_mode mode)
7756 {
7757 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7758 }
7759 \f
7760 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7761
7762 static machine_mode
7763 sparc_preferred_simd_mode (scalar_mode mode)
7764 {
7765 if (TARGET_VIS)
7766 switch (mode)
7767 {
7768 case E_SImode:
7769 return V2SImode;
7770 case E_HImode:
7771 return V4HImode;
7772 case E_QImode:
7773 return V8QImode;
7774
7775 default:;
7776 }
7777
7778 return word_mode;
7779 }
7780 \f
7781 /* Return the string to output an unconditional branch to LABEL, which is
7782 the operand number of the label.
7783
7784 DEST is the destination insn (i.e. the label), INSN is the source. */
7785
7786 const char *
7787 output_ubranch (rtx dest, rtx_insn *insn)
7788 {
7789 static char string[64];
7790 bool v9_form = false;
7791 int delta;
7792 char *p;
7793
7794 /* Even if we are trying to use cbcond for this, evaluate
7795 whether we can use V9 branches as our backup plan. */
7796
7797 delta = 5000000;
7798 if (INSN_ADDRESSES_SET_P ())
7799 delta = (INSN_ADDRESSES (INSN_UID (dest))
7800 - INSN_ADDRESSES (INSN_UID (insn)));
7801
7802 /* Leave some instructions for "slop". */
7803 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7804 v9_form = true;
7805
7806 if (TARGET_CBCOND)
7807 {
7808 bool emit_nop = emit_cbcond_nop (insn);
7809 bool far = false;
7810 const char *rval;
7811
7812 if (delta < -500 || delta > 500)
7813 far = true;
7814
7815 if (far)
7816 {
7817 if (v9_form)
7818 rval = "ba,a,pt\t%%xcc, %l0";
7819 else
7820 rval = "b,a\t%l0";
7821 }
7822 else
7823 {
7824 if (emit_nop)
7825 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7826 else
7827 rval = "cwbe\t%%g0, %%g0, %l0";
7828 }
7829 return rval;
7830 }
7831
7832 if (v9_form)
7833 strcpy (string, "ba%*,pt\t%%xcc, ");
7834 else
7835 strcpy (string, "b%*\t");
7836
7837 p = strchr (string, '\0');
7838 *p++ = '%';
7839 *p++ = 'l';
7840 *p++ = '0';
7841 *p++ = '%';
7842 *p++ = '(';
7843 *p = '\0';
7844
7845 return string;
7846 }
7847
7848 /* Return the string to output a conditional branch to LABEL, which is
7849 the operand number of the label. OP is the conditional expression.
7850 XEXP (OP, 0) is assumed to be a condition code register (integer or
7851 floating point) and its mode specifies what kind of comparison we made.
7852
7853 DEST is the destination insn (i.e. the label), INSN is the source.
7854
7855 REVERSED is nonzero if we should reverse the sense of the comparison.
7856
7857 ANNUL is nonzero if we should generate an annulling branch. */
7858
7859 const char *
7860 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7861 rtx_insn *insn)
7862 {
7863 static char string[64];
7864 enum rtx_code code = GET_CODE (op);
7865 rtx cc_reg = XEXP (op, 0);
7866 machine_mode mode = GET_MODE (cc_reg);
7867 const char *labelno, *branch;
7868 int spaces = 8, far;
7869 char *p;
7870
7871 /* v9 branches are limited to +-1MB. If it is too far away,
7872 change
7873
7874 bne,pt %xcc, .LC30
7875
7876 to
7877
7878 be,pn %xcc, .+12
7879 nop
7880 ba .LC30
7881
7882 and
7883
7884 fbne,a,pn %fcc2, .LC29
7885
7886 to
7887
7888 fbe,pt %fcc2, .+16
7889 nop
7890 ba .LC29 */
7891
7892 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7893 if (reversed ^ far)
7894 {
7895 /* Reversal of FP compares takes care -- an ordered compare
7896 becomes an unordered compare and vice versa. */
7897 if (mode == CCFPmode || mode == CCFPEmode)
7898 code = reverse_condition_maybe_unordered (code);
7899 else
7900 code = reverse_condition (code);
7901 }
7902
7903 /* Start by writing the branch condition. */
7904 if (mode == CCFPmode || mode == CCFPEmode)
7905 {
7906 switch (code)
7907 {
7908 case NE:
7909 branch = "fbne";
7910 break;
7911 case EQ:
7912 branch = "fbe";
7913 break;
7914 case GE:
7915 branch = "fbge";
7916 break;
7917 case GT:
7918 branch = "fbg";
7919 break;
7920 case LE:
7921 branch = "fble";
7922 break;
7923 case LT:
7924 branch = "fbl";
7925 break;
7926 case UNORDERED:
7927 branch = "fbu";
7928 break;
7929 case ORDERED:
7930 branch = "fbo";
7931 break;
7932 case UNGT:
7933 branch = "fbug";
7934 break;
7935 case UNLT:
7936 branch = "fbul";
7937 break;
7938 case UNEQ:
7939 branch = "fbue";
7940 break;
7941 case UNGE:
7942 branch = "fbuge";
7943 break;
7944 case UNLE:
7945 branch = "fbule";
7946 break;
7947 case LTGT:
7948 branch = "fblg";
7949 break;
7950 default:
7951 gcc_unreachable ();
7952 }
7953
7954 /* ??? !v9: FP branches cannot be preceded by another floating point
7955 insn. Because there is currently no concept of pre-delay slots,
7956 we can fix this only by always emitting a nop before a floating
7957 point branch. */
7958
7959 string[0] = '\0';
7960 if (! TARGET_V9)
7961 strcpy (string, "nop\n\t");
7962 strcat (string, branch);
7963 }
7964 else
7965 {
7966 switch (code)
7967 {
7968 case NE:
7969 if (mode == CCVmode || mode == CCXVmode)
7970 branch = "bvs";
7971 else
7972 branch = "bne";
7973 break;
7974 case EQ:
7975 if (mode == CCVmode || mode == CCXVmode)
7976 branch = "bvc";
7977 else
7978 branch = "be";
7979 break;
7980 case GE:
7981 if (mode == CCNZmode || mode == CCXNZmode)
7982 branch = "bpos";
7983 else
7984 branch = "bge";
7985 break;
7986 case GT:
7987 branch = "bg";
7988 break;
7989 case LE:
7990 branch = "ble";
7991 break;
7992 case LT:
7993 if (mode == CCNZmode || mode == CCXNZmode)
7994 branch = "bneg";
7995 else
7996 branch = "bl";
7997 break;
7998 case GEU:
7999 branch = "bgeu";
8000 break;
8001 case GTU:
8002 branch = "bgu";
8003 break;
8004 case LEU:
8005 branch = "bleu";
8006 break;
8007 case LTU:
8008 branch = "blu";
8009 break;
8010 default:
8011 gcc_unreachable ();
8012 }
8013 strcpy (string, branch);
8014 }
8015 spaces -= strlen (branch);
8016 p = strchr (string, '\0');
8017
8018 /* Now add the annulling, the label, and a possible noop. */
8019 if (annul && ! far)
8020 {
8021 strcpy (p, ",a");
8022 p += 2;
8023 spaces -= 2;
8024 }
8025
8026 if (TARGET_V9)
8027 {
8028 rtx note;
8029 int v8 = 0;
8030
8031 if (! far && insn && INSN_ADDRESSES_SET_P ())
8032 {
8033 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8034 - INSN_ADDRESSES (INSN_UID (insn)));
8035 /* Leave some instructions for "slop". */
8036 if (delta < -260000 || delta >= 260000)
8037 v8 = 1;
8038 }
8039
8040 switch (mode)
8041 {
8042 case E_CCmode:
8043 case E_CCNZmode:
8044 case E_CCCmode:
8045 case E_CCVmode:
8046 labelno = "%%icc, ";
8047 if (v8)
8048 labelno = "";
8049 break;
8050 case E_CCXmode:
8051 case E_CCXNZmode:
8052 case E_CCXCmode:
8053 case E_CCXVmode:
8054 labelno = "%%xcc, ";
8055 gcc_assert (!v8);
8056 break;
8057 case E_CCFPmode:
8058 case E_CCFPEmode:
8059 {
8060 static char v9_fcc_labelno[] = "%%fccX, ";
8061 /* Set the char indicating the number of the fcc reg to use. */
8062 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8063 labelno = v9_fcc_labelno;
8064 if (v8)
8065 {
8066 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8067 labelno = "";
8068 }
8069 }
8070 break;
8071 default:
8072 gcc_unreachable ();
8073 }
8074
8075 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8076 {
8077 strcpy (p,
8078 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8079 >= profile_probability::even ()) ^ far)
8080 ? ",pt" : ",pn");
8081 p += 3;
8082 spaces -= 3;
8083 }
8084 }
8085 else
8086 labelno = "";
8087
8088 if (spaces > 0)
8089 *p++ = '\t';
8090 else
8091 *p++ = ' ';
8092 strcpy (p, labelno);
8093 p = strchr (p, '\0');
8094 if (far)
8095 {
8096 strcpy (p, ".+12\n\t nop\n\tb\t");
8097 /* Skip the next insn if requested or
8098 if we know that it will be a nop. */
8099 if (annul || ! final_sequence)
8100 p[3] = '6';
8101 p += 14;
8102 }
8103 *p++ = '%';
8104 *p++ = 'l';
8105 *p++ = label + '0';
8106 *p++ = '%';
8107 *p++ = '#';
8108 *p = '\0';
8109
8110 return string;
8111 }
8112
8113 /* Emit a library call comparison between floating point X and Y.
8114 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8115 Return the new operator to be used in the comparison sequence.
8116
8117 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8118 values as arguments instead of the TFmode registers themselves,
8119 that's why we cannot call emit_float_lib_cmp. */
8120
8121 rtx
8122 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8123 {
8124 const char *qpfunc;
8125 rtx slot0, slot1, result, tem, tem2, libfunc;
8126 machine_mode mode;
8127 enum rtx_code new_comparison;
8128
8129 switch (comparison)
8130 {
8131 case EQ:
8132 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8133 break;
8134
8135 case NE:
8136 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8137 break;
8138
8139 case GT:
8140 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8141 break;
8142
8143 case GE:
8144 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8145 break;
8146
8147 case LT:
8148 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8149 break;
8150
8151 case LE:
8152 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8153 break;
8154
8155 case ORDERED:
8156 case UNORDERED:
8157 case UNGT:
8158 case UNLT:
8159 case UNEQ:
8160 case UNGE:
8161 case UNLE:
8162 case LTGT:
8163 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8164 break;
8165
8166 default:
8167 gcc_unreachable ();
8168 }
8169
8170 if (TARGET_ARCH64)
8171 {
8172 if (MEM_P (x))
8173 {
8174 tree expr = MEM_EXPR (x);
8175 if (expr)
8176 mark_addressable (expr);
8177 slot0 = x;
8178 }
8179 else
8180 {
8181 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8182 emit_move_insn (slot0, x);
8183 }
8184
8185 if (MEM_P (y))
8186 {
8187 tree expr = MEM_EXPR (y);
8188 if (expr)
8189 mark_addressable (expr);
8190 slot1 = y;
8191 }
8192 else
8193 {
8194 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8195 emit_move_insn (slot1, y);
8196 }
8197
8198 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8199 emit_library_call (libfunc, LCT_NORMAL,
8200 DImode,
8201 XEXP (slot0, 0), Pmode,
8202 XEXP (slot1, 0), Pmode);
8203 mode = DImode;
8204 }
8205 else
8206 {
8207 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8208 emit_library_call (libfunc, LCT_NORMAL,
8209 SImode,
8210 x, TFmode, y, TFmode);
8211 mode = SImode;
8212 }
8213
8214
8215 /* Immediately move the result of the libcall into a pseudo
8216 register so reload doesn't clobber the value if it needs
8217 the return register for a spill reg. */
8218 result = gen_reg_rtx (mode);
8219 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8220
8221 switch (comparison)
8222 {
8223 default:
8224 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8225 case ORDERED:
8226 case UNORDERED:
8227 new_comparison = (comparison == UNORDERED ? EQ : NE);
8228 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8229 case UNGT:
8230 case UNGE:
8231 new_comparison = (comparison == UNGT ? GT : NE);
8232 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8233 case UNLE:
8234 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8235 case UNLT:
8236 tem = gen_reg_rtx (mode);
8237 if (TARGET_ARCH32)
8238 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8239 else
8240 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8241 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8242 case UNEQ:
8243 case LTGT:
8244 tem = gen_reg_rtx (mode);
8245 if (TARGET_ARCH32)
8246 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8247 else
8248 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8249 tem2 = gen_reg_rtx (mode);
8250 if (TARGET_ARCH32)
8251 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8252 else
8253 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8254 new_comparison = (comparison == UNEQ ? EQ : NE);
8255 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8256 }
8257
8258 gcc_unreachable ();
8259 }
8260
8261 /* Generate an unsigned DImode to FP conversion. This is the same code
8262 optabs would emit if we didn't have TFmode patterns. */
8263
8264 void
8265 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8266 {
8267 rtx i0, i1, f0, in, out;
8268
8269 out = operands[0];
8270 in = force_reg (DImode, operands[1]);
8271 rtx_code_label *neglab = gen_label_rtx ();
8272 rtx_code_label *donelab = gen_label_rtx ();
8273 i0 = gen_reg_rtx (DImode);
8274 i1 = gen_reg_rtx (DImode);
8275 f0 = gen_reg_rtx (mode);
8276
8277 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8278
8279 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8280 emit_jump_insn (gen_jump (donelab));
8281 emit_barrier ();
8282
8283 emit_label (neglab);
8284
8285 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8286 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8287 emit_insn (gen_iordi3 (i0, i0, i1));
8288 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8289 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8290
8291 emit_label (donelab);
8292 }
8293
8294 /* Generate an FP to unsigned DImode conversion. This is the same code
8295 optabs would emit if we didn't have TFmode patterns. */
8296
8297 void
8298 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8299 {
8300 rtx i0, i1, f0, in, out, limit;
8301
8302 out = operands[0];
8303 in = force_reg (mode, operands[1]);
8304 rtx_code_label *neglab = gen_label_rtx ();
8305 rtx_code_label *donelab = gen_label_rtx ();
8306 i0 = gen_reg_rtx (DImode);
8307 i1 = gen_reg_rtx (DImode);
8308 limit = gen_reg_rtx (mode);
8309 f0 = gen_reg_rtx (mode);
8310
8311 emit_move_insn (limit,
8312 const_double_from_real_value (
8313 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8314 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8315
8316 emit_insn (gen_rtx_SET (out,
8317 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8318 emit_jump_insn (gen_jump (donelab));
8319 emit_barrier ();
8320
8321 emit_label (neglab);
8322
8323 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8324 emit_insn (gen_rtx_SET (i0,
8325 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8326 emit_insn (gen_movdi (i1, const1_rtx));
8327 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8328 emit_insn (gen_xordi3 (out, i0, i1));
8329
8330 emit_label (donelab);
8331 }
8332
8333 /* Return the string to output a compare and branch instruction to DEST.
8334 DEST is the destination insn (i.e. the label), INSN is the source,
8335 and OP is the conditional expression. */
8336
8337 const char *
8338 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8339 {
8340 machine_mode mode = GET_MODE (XEXP (op, 0));
8341 enum rtx_code code = GET_CODE (op);
8342 const char *cond_str, *tmpl;
8343 int far, emit_nop, len;
8344 static char string[64];
8345 char size_char;
8346
8347 /* Compare and Branch is limited to +-2KB. If it is too far away,
8348 change
8349
8350 cxbne X, Y, .LC30
8351
8352 to
8353
8354 cxbe X, Y, .+16
8355 nop
8356 ba,pt xcc, .LC30
8357 nop */
8358
8359 len = get_attr_length (insn);
8360
8361 far = len == 4;
8362 emit_nop = len == 2;
8363
8364 if (far)
8365 code = reverse_condition (code);
8366
8367 size_char = ((mode == SImode) ? 'w' : 'x');
8368
8369 switch (code)
8370 {
8371 case NE:
8372 cond_str = "ne";
8373 break;
8374
8375 case EQ:
8376 cond_str = "e";
8377 break;
8378
8379 case GE:
8380 cond_str = "ge";
8381 break;
8382
8383 case GT:
8384 cond_str = "g";
8385 break;
8386
8387 case LE:
8388 cond_str = "le";
8389 break;
8390
8391 case LT:
8392 cond_str = "l";
8393 break;
8394
8395 case GEU:
8396 cond_str = "cc";
8397 break;
8398
8399 case GTU:
8400 cond_str = "gu";
8401 break;
8402
8403 case LEU:
8404 cond_str = "leu";
8405 break;
8406
8407 case LTU:
8408 cond_str = "cs";
8409 break;
8410
8411 default:
8412 gcc_unreachable ();
8413 }
8414
8415 if (far)
8416 {
8417 int veryfar = 1, delta;
8418
8419 if (INSN_ADDRESSES_SET_P ())
8420 {
8421 delta = (INSN_ADDRESSES (INSN_UID (dest))
8422 - INSN_ADDRESSES (INSN_UID (insn)));
8423 /* Leave some instructions for "slop". */
8424 if (delta >= -260000 && delta < 260000)
8425 veryfar = 0;
8426 }
8427
8428 if (veryfar)
8429 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8430 else
8431 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8432 }
8433 else
8434 {
8435 if (emit_nop)
8436 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8437 else
8438 tmpl = "c%cb%s\t%%1, %%2, %%3";
8439 }
8440
8441 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8442
8443 return string;
8444 }
8445
8446 /* Return the string to output a conditional branch to LABEL, testing
8447 register REG. LABEL is the operand number of the label; REG is the
8448 operand number of the reg. OP is the conditional expression. The mode
8449 of REG says what kind of comparison we made.
8450
8451 DEST is the destination insn (i.e. the label), INSN is the source.
8452
8453 REVERSED is nonzero if we should reverse the sense of the comparison.
8454
8455 ANNUL is nonzero if we should generate an annulling branch. */
8456
8457 const char *
8458 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8459 int annul, rtx_insn *insn)
8460 {
8461 static char string[64];
8462 enum rtx_code code = GET_CODE (op);
8463 machine_mode mode = GET_MODE (XEXP (op, 0));
8464 rtx note;
8465 int far;
8466 char *p;
8467
8468 /* branch on register are limited to +-128KB. If it is too far away,
8469 change
8470
8471 brnz,pt %g1, .LC30
8472
8473 to
8474
8475 brz,pn %g1, .+12
8476 nop
8477 ba,pt %xcc, .LC30
8478
8479 and
8480
8481 brgez,a,pn %o1, .LC29
8482
8483 to
8484
8485 brlz,pt %o1, .+16
8486 nop
8487 ba,pt %xcc, .LC29 */
8488
8489 far = get_attr_length (insn) >= 3;
8490
8491 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8492 if (reversed ^ far)
8493 code = reverse_condition (code);
8494
8495 /* Only 64-bit versions of these instructions exist. */
8496 gcc_assert (mode == DImode);
8497
8498 /* Start by writing the branch condition. */
8499
8500 switch (code)
8501 {
8502 case NE:
8503 strcpy (string, "brnz");
8504 break;
8505
8506 case EQ:
8507 strcpy (string, "brz");
8508 break;
8509
8510 case GE:
8511 strcpy (string, "brgez");
8512 break;
8513
8514 case LT:
8515 strcpy (string, "brlz");
8516 break;
8517
8518 case LE:
8519 strcpy (string, "brlez");
8520 break;
8521
8522 case GT:
8523 strcpy (string, "brgz");
8524 break;
8525
8526 default:
8527 gcc_unreachable ();
8528 }
8529
8530 p = strchr (string, '\0');
8531
8532 /* Now add the annulling, reg, label, and nop. */
8533 if (annul && ! far)
8534 {
8535 strcpy (p, ",a");
8536 p += 2;
8537 }
8538
8539 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8540 {
8541 strcpy (p,
8542 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8543 >= profile_probability::even ()) ^ far)
8544 ? ",pt" : ",pn");
8545 p += 3;
8546 }
8547
8548 *p = p < string + 8 ? '\t' : ' ';
8549 p++;
8550 *p++ = '%';
8551 *p++ = '0' + reg;
8552 *p++ = ',';
8553 *p++ = ' ';
8554 if (far)
8555 {
8556 int veryfar = 1, delta;
8557
8558 if (INSN_ADDRESSES_SET_P ())
8559 {
8560 delta = (INSN_ADDRESSES (INSN_UID (dest))
8561 - INSN_ADDRESSES (INSN_UID (insn)));
8562 /* Leave some instructions for "slop". */
8563 if (delta >= -260000 && delta < 260000)
8564 veryfar = 0;
8565 }
8566
8567 strcpy (p, ".+12\n\t nop\n\t");
8568 /* Skip the next insn if requested or
8569 if we know that it will be a nop. */
8570 if (annul || ! final_sequence)
8571 p[3] = '6';
8572 p += 12;
8573 if (veryfar)
8574 {
8575 strcpy (p, "b\t");
8576 p += 2;
8577 }
8578 else
8579 {
8580 strcpy (p, "ba,pt\t%%xcc, ");
8581 p += 13;
8582 }
8583 }
8584 *p++ = '%';
8585 *p++ = 'l';
8586 *p++ = '0' + label;
8587 *p++ = '%';
8588 *p++ = '#';
8589 *p = '\0';
8590
8591 return string;
8592 }
8593
8594 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8595 Such instructions cannot be used in the delay slot of return insn on v9.
8596 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8597 */
8598
8599 static int
8600 epilogue_renumber (register rtx *where, int test)
8601 {
8602 register const char *fmt;
8603 register int i;
8604 register enum rtx_code code;
8605
8606 if (*where == 0)
8607 return 0;
8608
8609 code = GET_CODE (*where);
8610
8611 switch (code)
8612 {
8613 case REG:
8614 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8615 return 1;
8616 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8617 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8618 /* fallthrough */
8619 case SCRATCH:
8620 case CC0:
8621 case PC:
8622 case CONST_INT:
8623 case CONST_WIDE_INT:
8624 case CONST_DOUBLE:
8625 return 0;
8626
8627 /* Do not replace the frame pointer with the stack pointer because
8628 it can cause the delayed instruction to load below the stack.
8629 This occurs when instructions like:
8630
8631 (set (reg/i:SI 24 %i0)
8632 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8633 (const_int -20 [0xffffffec])) 0))
8634
8635 are in the return delayed slot. */
8636 case PLUS:
8637 if (GET_CODE (XEXP (*where, 0)) == REG
8638 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8639 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8640 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8641 return 1;
8642 break;
8643
8644 case MEM:
8645 if (SPARC_STACK_BIAS
8646 && GET_CODE (XEXP (*where, 0)) == REG
8647 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8648 return 1;
8649 break;
8650
8651 default:
8652 break;
8653 }
8654
8655 fmt = GET_RTX_FORMAT (code);
8656
8657 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8658 {
8659 if (fmt[i] == 'E')
8660 {
8661 register int j;
8662 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8663 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8664 return 1;
8665 }
8666 else if (fmt[i] == 'e'
8667 && epilogue_renumber (&(XEXP (*where, i)), test))
8668 return 1;
8669 }
8670 return 0;
8671 }
8672 \f
8673 /* Leaf functions and non-leaf functions have different needs. */
8674
8675 static const int
8676 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8677
8678 static const int
8679 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8680
8681 static const int *const reg_alloc_orders[] = {
8682 reg_leaf_alloc_order,
8683 reg_nonleaf_alloc_order};
8684
8685 void
8686 order_regs_for_local_alloc (void)
8687 {
8688 static int last_order_nonleaf = 1;
8689
8690 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8691 {
8692 last_order_nonleaf = !last_order_nonleaf;
8693 memcpy ((char *) reg_alloc_order,
8694 (const char *) reg_alloc_orders[last_order_nonleaf],
8695 FIRST_PSEUDO_REGISTER * sizeof (int));
8696 }
8697 }
8698 \f
8699 /* Return 1 if REG and MEM are legitimate enough to allow the various
8700 MEM<-->REG splits to be run. */
8701
8702 int
8703 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8704 {
8705 /* Punt if we are here by mistake. */
8706 gcc_assert (reload_completed);
8707
8708 /* We must have an offsettable memory reference. */
8709 if (!offsettable_memref_p (mem))
8710 return 0;
8711
8712 /* If we have legitimate args for ldd/std, we do not want
8713 the split to happen. */
8714 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8715 return 0;
8716
8717 /* Success. */
8718 return 1;
8719 }
8720
8721 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8722
8723 void
8724 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8725 {
8726 rtx high_part = gen_highpart (mode, dest);
8727 rtx low_part = gen_lowpart (mode, dest);
8728 rtx word0 = adjust_address (src, mode, 0);
8729 rtx word1 = adjust_address (src, mode, 4);
8730
8731 if (reg_overlap_mentioned_p (high_part, word1))
8732 {
8733 emit_move_insn_1 (low_part, word1);
8734 emit_move_insn_1 (high_part, word0);
8735 }
8736 else
8737 {
8738 emit_move_insn_1 (high_part, word0);
8739 emit_move_insn_1 (low_part, word1);
8740 }
8741 }
8742
8743 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8744
8745 void
8746 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8747 {
8748 rtx word0 = adjust_address (dest, mode, 0);
8749 rtx word1 = adjust_address (dest, mode, 4);
8750 rtx high_part = gen_highpart (mode, src);
8751 rtx low_part = gen_lowpart (mode, src);
8752
8753 emit_move_insn_1 (word0, high_part);
8754 emit_move_insn_1 (word1, low_part);
8755 }
8756
8757 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8758
8759 int
8760 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8761 {
8762 /* Punt if we are here by mistake. */
8763 gcc_assert (reload_completed);
8764
8765 if (GET_CODE (reg1) == SUBREG)
8766 reg1 = SUBREG_REG (reg1);
8767 if (GET_CODE (reg1) != REG)
8768 return 0;
8769 const int regno1 = REGNO (reg1);
8770
8771 if (GET_CODE (reg2) == SUBREG)
8772 reg2 = SUBREG_REG (reg2);
8773 if (GET_CODE (reg2) != REG)
8774 return 0;
8775 const int regno2 = REGNO (reg2);
8776
8777 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8778 return 1;
8779
8780 if (TARGET_VIS3)
8781 {
8782 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8783 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8784 return 1;
8785 }
8786
8787 return 0;
8788 }
8789
8790 /* Split a REG <--> REG move into a pair of moves in MODE. */
8791
8792 void
8793 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8794 {
8795 rtx dest1 = gen_highpart (mode, dest);
8796 rtx dest2 = gen_lowpart (mode, dest);
8797 rtx src1 = gen_highpart (mode, src);
8798 rtx src2 = gen_lowpart (mode, src);
8799
8800 /* Now emit using the real source and destination we found, swapping
8801 the order if we detect overlap. */
8802 if (reg_overlap_mentioned_p (dest1, src2))
8803 {
8804 emit_move_insn_1 (dest2, src2);
8805 emit_move_insn_1 (dest1, src1);
8806 }
8807 else
8808 {
8809 emit_move_insn_1 (dest1, src1);
8810 emit_move_insn_1 (dest2, src2);
8811 }
8812 }
8813
8814 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8815 This makes them candidates for using ldd and std insns.
8816
8817 Note reg1 and reg2 *must* be hard registers. */
8818
8819 int
8820 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8821 {
8822 /* We might have been passed a SUBREG. */
8823 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8824 return 0;
8825
8826 if (REGNO (reg1) % 2 != 0)
8827 return 0;
8828
8829 /* Integer ldd is deprecated in SPARC V9 */
8830 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8831 return 0;
8832
8833 return (REGNO (reg1) == REGNO (reg2) - 1);
8834 }
8835
8836 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8837 an ldd or std insn.
8838
8839 This can only happen when addr1 and addr2, the addresses in mem1
8840 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8841 addr1 must also be aligned on a 64-bit boundary.
8842
8843 Also iff dependent_reg_rtx is not null it should not be used to
8844 compute the address for mem1, i.e. we cannot optimize a sequence
8845 like:
8846 ld [%o0], %o0
8847 ld [%o0 + 4], %o1
8848 to
8849 ldd [%o0], %o0
8850 nor:
8851 ld [%g3 + 4], %g3
8852 ld [%g3], %g2
8853 to
8854 ldd [%g3], %g2
8855
8856 But, note that the transformation from:
8857 ld [%g2 + 4], %g3
8858 ld [%g2], %g2
8859 to
8860 ldd [%g2], %g2
8861 is perfectly fine. Thus, the peephole2 patterns always pass us
8862 the destination register of the first load, never the second one.
8863
8864 For stores we don't have a similar problem, so dependent_reg_rtx is
8865 NULL_RTX. */
8866
8867 int
8868 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8869 {
8870 rtx addr1, addr2;
8871 unsigned int reg1;
8872 HOST_WIDE_INT offset1;
8873
8874 /* The mems cannot be volatile. */
8875 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8876 return 0;
8877
8878 /* MEM1 should be aligned on a 64-bit boundary. */
8879 if (MEM_ALIGN (mem1) < 64)
8880 return 0;
8881
8882 addr1 = XEXP (mem1, 0);
8883 addr2 = XEXP (mem2, 0);
8884
8885 /* Extract a register number and offset (if used) from the first addr. */
8886 if (GET_CODE (addr1) == PLUS)
8887 {
8888 /* If not a REG, return zero. */
8889 if (GET_CODE (XEXP (addr1, 0)) != REG)
8890 return 0;
8891 else
8892 {
8893 reg1 = REGNO (XEXP (addr1, 0));
8894 /* The offset must be constant! */
8895 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8896 return 0;
8897 offset1 = INTVAL (XEXP (addr1, 1));
8898 }
8899 }
8900 else if (GET_CODE (addr1) != REG)
8901 return 0;
8902 else
8903 {
8904 reg1 = REGNO (addr1);
8905 /* This was a simple (mem (reg)) expression. Offset is 0. */
8906 offset1 = 0;
8907 }
8908
8909 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8910 if (GET_CODE (addr2) != PLUS)
8911 return 0;
8912
8913 if (GET_CODE (XEXP (addr2, 0)) != REG
8914 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8915 return 0;
8916
8917 if (reg1 != REGNO (XEXP (addr2, 0)))
8918 return 0;
8919
8920 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8921 return 0;
8922
8923 /* The first offset must be evenly divisible by 8 to ensure the
8924 address is 64-bit aligned. */
8925 if (offset1 % 8 != 0)
8926 return 0;
8927
8928 /* The offset for the second addr must be 4 more than the first addr. */
8929 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8930 return 0;
8931
8932 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8933 instructions. */
8934 return 1;
8935 }
8936
8937 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8938
8939 rtx
8940 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8941 {
8942 rtx x = widen_memory_access (mem1, mode, 0);
8943 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8944 return x;
8945 }
8946
8947 /* Return 1 if reg is a pseudo, or is the first register in
8948 a hard register pair. This makes it suitable for use in
8949 ldd and std insns. */
8950
8951 int
8952 register_ok_for_ldd (rtx reg)
8953 {
8954 /* We might have been passed a SUBREG. */
8955 if (!REG_P (reg))
8956 return 0;
8957
8958 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8959 return (REGNO (reg) % 2 == 0);
8960
8961 return 1;
8962 }
8963
8964 /* Return 1 if OP, a MEM, has an address which is known to be
8965 aligned to an 8-byte boundary. */
8966
8967 int
8968 memory_ok_for_ldd (rtx op)
8969 {
8970 /* In 64-bit mode, we assume that the address is word-aligned. */
8971 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8972 return 0;
8973
8974 if (! can_create_pseudo_p ()
8975 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8976 return 0;
8977
8978 return 1;
8979 }
8980 \f
8981 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8982
8983 static bool
8984 sparc_print_operand_punct_valid_p (unsigned char code)
8985 {
8986 if (code == '#'
8987 || code == '*'
8988 || code == '('
8989 || code == ')'
8990 || code == '_'
8991 || code == '&')
8992 return true;
8993
8994 return false;
8995 }
8996
8997 /* Implement TARGET_PRINT_OPERAND.
8998 Print operand X (an rtx) in assembler syntax to file FILE.
8999 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9000 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9001
9002 static void
9003 sparc_print_operand (FILE *file, rtx x, int code)
9004 {
9005 const char *s;
9006
9007 switch (code)
9008 {
9009 case '#':
9010 /* Output an insn in a delay slot. */
9011 if (final_sequence)
9012 sparc_indent_opcode = 1;
9013 else
9014 fputs ("\n\t nop", file);
9015 return;
9016 case '*':
9017 /* Output an annul flag if there's nothing for the delay slot and we
9018 are optimizing. This is always used with '(' below.
9019 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9020 this is a dbx bug. So, we only do this when optimizing.
9021 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9022 Always emit a nop in case the next instruction is a branch. */
9023 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9024 fputs (",a", file);
9025 return;
9026 case '(':
9027 /* Output a 'nop' if there's nothing for the delay slot and we are
9028 not optimizing. This is always used with '*' above. */
9029 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9030 fputs ("\n\t nop", file);
9031 else if (final_sequence)
9032 sparc_indent_opcode = 1;
9033 return;
9034 case ')':
9035 /* Output the right displacement from the saved PC on function return.
9036 The caller may have placed an "unimp" insn immediately after the call
9037 so we have to account for it. This insn is used in the 32-bit ABI
9038 when calling a function that returns a non zero-sized structure. The
9039 64-bit ABI doesn't have it. Be careful to have this test be the same
9040 as that for the call. The exception is when sparc_std_struct_return
9041 is enabled, the psABI is followed exactly and the adjustment is made
9042 by the code in sparc_struct_value_rtx. The call emitted is the same
9043 when sparc_std_struct_return is enabled. */
9044 if (!TARGET_ARCH64
9045 && cfun->returns_struct
9046 && !sparc_std_struct_return
9047 && DECL_SIZE (DECL_RESULT (current_function_decl))
9048 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9049 == INTEGER_CST
9050 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9051 fputs ("12", file);
9052 else
9053 fputc ('8', file);
9054 return;
9055 case '_':
9056 /* Output the Embedded Medium/Anywhere code model base register. */
9057 fputs (EMBMEDANY_BASE_REG, file);
9058 return;
9059 case '&':
9060 /* Print some local dynamic TLS name. */
9061 if (const char *name = get_some_local_dynamic_name ())
9062 assemble_name (file, name);
9063 else
9064 output_operand_lossage ("'%%&' used without any "
9065 "local dynamic TLS references");
9066 return;
9067
9068 case 'Y':
9069 /* Adjust the operand to take into account a RESTORE operation. */
9070 if (GET_CODE (x) == CONST_INT)
9071 break;
9072 else if (GET_CODE (x) != REG)
9073 output_operand_lossage ("invalid %%Y operand");
9074 else if (REGNO (x) < 8)
9075 fputs (reg_names[REGNO (x)], file);
9076 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9077 fputs (reg_names[REGNO (x)-16], file);
9078 else
9079 output_operand_lossage ("invalid %%Y operand");
9080 return;
9081 case 'L':
9082 /* Print out the low order register name of a register pair. */
9083 if (WORDS_BIG_ENDIAN)
9084 fputs (reg_names[REGNO (x)+1], file);
9085 else
9086 fputs (reg_names[REGNO (x)], file);
9087 return;
9088 case 'H':
9089 /* Print out the high order register name of a register pair. */
9090 if (WORDS_BIG_ENDIAN)
9091 fputs (reg_names[REGNO (x)], file);
9092 else
9093 fputs (reg_names[REGNO (x)+1], file);
9094 return;
9095 case 'R':
9096 /* Print out the second register name of a register pair or quad.
9097 I.e., R (%o0) => %o1. */
9098 fputs (reg_names[REGNO (x)+1], file);
9099 return;
9100 case 'S':
9101 /* Print out the third register name of a register quad.
9102 I.e., S (%o0) => %o2. */
9103 fputs (reg_names[REGNO (x)+2], file);
9104 return;
9105 case 'T':
9106 /* Print out the fourth register name of a register quad.
9107 I.e., T (%o0) => %o3. */
9108 fputs (reg_names[REGNO (x)+3], file);
9109 return;
9110 case 'x':
9111 /* Print a condition code register. */
9112 if (REGNO (x) == SPARC_ICC_REG)
9113 {
9114 switch (GET_MODE (x))
9115 {
9116 case E_CCmode:
9117 case E_CCNZmode:
9118 case E_CCCmode:
9119 case E_CCVmode:
9120 s = "%icc";
9121 break;
9122 case E_CCXmode:
9123 case E_CCXNZmode:
9124 case E_CCXCmode:
9125 case E_CCXVmode:
9126 s = "%xcc";
9127 break;
9128 default:
9129 gcc_unreachable ();
9130 }
9131 fputs (s, file);
9132 }
9133 else
9134 /* %fccN register */
9135 fputs (reg_names[REGNO (x)], file);
9136 return;
9137 case 'm':
9138 /* Print the operand's address only. */
9139 output_address (GET_MODE (x), XEXP (x, 0));
9140 return;
9141 case 'r':
9142 /* In this case we need a register. Use %g0 if the
9143 operand is const0_rtx. */
9144 if (x == const0_rtx
9145 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9146 {
9147 fputs ("%g0", file);
9148 return;
9149 }
9150 else
9151 break;
9152
9153 case 'A':
9154 switch (GET_CODE (x))
9155 {
9156 case IOR:
9157 s = "or";
9158 break;
9159 case AND:
9160 s = "and";
9161 break;
9162 case XOR:
9163 s = "xor";
9164 break;
9165 default:
9166 output_operand_lossage ("invalid %%A operand");
9167 s = "";
9168 break;
9169 }
9170 fputs (s, file);
9171 return;
9172
9173 case 'B':
9174 switch (GET_CODE (x))
9175 {
9176 case IOR:
9177 s = "orn";
9178 break;
9179 case AND:
9180 s = "andn";
9181 break;
9182 case XOR:
9183 s = "xnor";
9184 break;
9185 default:
9186 output_operand_lossage ("invalid %%B operand");
9187 s = "";
9188 break;
9189 }
9190 fputs (s, file);
9191 return;
9192
9193 /* This is used by the conditional move instructions. */
9194 case 'C':
9195 {
9196 machine_mode mode = GET_MODE (XEXP (x, 0));
9197 switch (GET_CODE (x))
9198 {
9199 case NE:
9200 if (mode == CCVmode || mode == CCXVmode)
9201 s = "vs";
9202 else
9203 s = "ne";
9204 break;
9205 case EQ:
9206 if (mode == CCVmode || mode == CCXVmode)
9207 s = "vc";
9208 else
9209 s = "e";
9210 break;
9211 case GE:
9212 if (mode == CCNZmode || mode == CCXNZmode)
9213 s = "pos";
9214 else
9215 s = "ge";
9216 break;
9217 case GT:
9218 s = "g";
9219 break;
9220 case LE:
9221 s = "le";
9222 break;
9223 case LT:
9224 if (mode == CCNZmode || mode == CCXNZmode)
9225 s = "neg";
9226 else
9227 s = "l";
9228 break;
9229 case GEU:
9230 s = "geu";
9231 break;
9232 case GTU:
9233 s = "gu";
9234 break;
9235 case LEU:
9236 s = "leu";
9237 break;
9238 case LTU:
9239 s = "lu";
9240 break;
9241 case LTGT:
9242 s = "lg";
9243 break;
9244 case UNORDERED:
9245 s = "u";
9246 break;
9247 case ORDERED:
9248 s = "o";
9249 break;
9250 case UNLT:
9251 s = "ul";
9252 break;
9253 case UNLE:
9254 s = "ule";
9255 break;
9256 case UNGT:
9257 s = "ug";
9258 break;
9259 case UNGE:
9260 s = "uge"
9261 ; break;
9262 case UNEQ:
9263 s = "ue";
9264 break;
9265 default:
9266 output_operand_lossage ("invalid %%C operand");
9267 s = "";
9268 break;
9269 }
9270 fputs (s, file);
9271 return;
9272 }
9273
9274 /* This are used by the movr instruction pattern. */
9275 case 'D':
9276 {
9277 switch (GET_CODE (x))
9278 {
9279 case NE:
9280 s = "ne";
9281 break;
9282 case EQ:
9283 s = "e";
9284 break;
9285 case GE:
9286 s = "gez";
9287 break;
9288 case LT:
9289 s = "lz";
9290 break;
9291 case LE:
9292 s = "lez";
9293 break;
9294 case GT:
9295 s = "gz";
9296 break;
9297 default:
9298 output_operand_lossage ("invalid %%D operand");
9299 s = "";
9300 break;
9301 }
9302 fputs (s, file);
9303 return;
9304 }
9305
9306 case 'b':
9307 {
9308 /* Print a sign-extended character. */
9309 int i = trunc_int_for_mode (INTVAL (x), QImode);
9310 fprintf (file, "%d", i);
9311 return;
9312 }
9313
9314 case 'f':
9315 /* Operand must be a MEM; write its address. */
9316 if (GET_CODE (x) != MEM)
9317 output_operand_lossage ("invalid %%f operand");
9318 output_address (GET_MODE (x), XEXP (x, 0));
9319 return;
9320
9321 case 's':
9322 {
9323 /* Print a sign-extended 32-bit value. */
9324 HOST_WIDE_INT i;
9325 if (GET_CODE(x) == CONST_INT)
9326 i = INTVAL (x);
9327 else
9328 {
9329 output_operand_lossage ("invalid %%s operand");
9330 return;
9331 }
9332 i = trunc_int_for_mode (i, SImode);
9333 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9334 return;
9335 }
9336
9337 case 0:
9338 /* Do nothing special. */
9339 break;
9340
9341 default:
9342 /* Undocumented flag. */
9343 output_operand_lossage ("invalid operand output code");
9344 }
9345
9346 if (GET_CODE (x) == REG)
9347 fputs (reg_names[REGNO (x)], file);
9348 else if (GET_CODE (x) == MEM)
9349 {
9350 fputc ('[', file);
9351 /* Poor Sun assembler doesn't understand absolute addressing. */
9352 if (CONSTANT_P (XEXP (x, 0)))
9353 fputs ("%g0+", file);
9354 output_address (GET_MODE (x), XEXP (x, 0));
9355 fputc (']', file);
9356 }
9357 else if (GET_CODE (x) == HIGH)
9358 {
9359 fputs ("%hi(", file);
9360 output_addr_const (file, XEXP (x, 0));
9361 fputc (')', file);
9362 }
9363 else if (GET_CODE (x) == LO_SUM)
9364 {
9365 sparc_print_operand (file, XEXP (x, 0), 0);
9366 if (TARGET_CM_MEDMID)
9367 fputs ("+%l44(", file);
9368 else
9369 fputs ("+%lo(", file);
9370 output_addr_const (file, XEXP (x, 1));
9371 fputc (')', file);
9372 }
9373 else if (GET_CODE (x) == CONST_DOUBLE)
9374 output_operand_lossage ("floating-point constant not a valid immediate operand");
9375 else
9376 output_addr_const (file, x);
9377 }
9378
9379 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9380
9381 static void
9382 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9383 {
9384 register rtx base, index = 0;
9385 int offset = 0;
9386 register rtx addr = x;
9387
9388 if (REG_P (addr))
9389 fputs (reg_names[REGNO (addr)], file);
9390 else if (GET_CODE (addr) == PLUS)
9391 {
9392 if (CONST_INT_P (XEXP (addr, 0)))
9393 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9394 else if (CONST_INT_P (XEXP (addr, 1)))
9395 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9396 else
9397 base = XEXP (addr, 0), index = XEXP (addr, 1);
9398 if (GET_CODE (base) == LO_SUM)
9399 {
9400 gcc_assert (USE_AS_OFFSETABLE_LO10
9401 && TARGET_ARCH64
9402 && ! TARGET_CM_MEDMID);
9403 output_operand (XEXP (base, 0), 0);
9404 fputs ("+%lo(", file);
9405 output_address (VOIDmode, XEXP (base, 1));
9406 fprintf (file, ")+%d", offset);
9407 }
9408 else
9409 {
9410 fputs (reg_names[REGNO (base)], file);
9411 if (index == 0)
9412 fprintf (file, "%+d", offset);
9413 else if (REG_P (index))
9414 fprintf (file, "+%s", reg_names[REGNO (index)]);
9415 else if (GET_CODE (index) == SYMBOL_REF
9416 || GET_CODE (index) == LABEL_REF
9417 || GET_CODE (index) == CONST)
9418 fputc ('+', file), output_addr_const (file, index);
9419 else gcc_unreachable ();
9420 }
9421 }
9422 else if (GET_CODE (addr) == MINUS
9423 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9424 {
9425 output_addr_const (file, XEXP (addr, 0));
9426 fputs ("-(", file);
9427 output_addr_const (file, XEXP (addr, 1));
9428 fputs ("-.)", file);
9429 }
9430 else if (GET_CODE (addr) == LO_SUM)
9431 {
9432 output_operand (XEXP (addr, 0), 0);
9433 if (TARGET_CM_MEDMID)
9434 fputs ("+%l44(", file);
9435 else
9436 fputs ("+%lo(", file);
9437 output_address (VOIDmode, XEXP (addr, 1));
9438 fputc (')', file);
9439 }
9440 else if (flag_pic
9441 && GET_CODE (addr) == CONST
9442 && GET_CODE (XEXP (addr, 0)) == MINUS
9443 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9444 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9445 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9446 {
9447 addr = XEXP (addr, 0);
9448 output_addr_const (file, XEXP (addr, 0));
9449 /* Group the args of the second CONST in parenthesis. */
9450 fputs ("-(", file);
9451 /* Skip past the second CONST--it does nothing for us. */
9452 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9453 /* Close the parenthesis. */
9454 fputc (')', file);
9455 }
9456 else
9457 {
9458 output_addr_const (file, addr);
9459 }
9460 }
9461 \f
9462 /* Target hook for assembling integer objects. The sparc version has
9463 special handling for aligned DI-mode objects. */
9464
9465 static bool
9466 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9467 {
9468 /* ??? We only output .xword's for symbols and only then in environments
9469 where the assembler can handle them. */
9470 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9471 {
9472 if (TARGET_V9)
9473 {
9474 assemble_integer_with_op ("\t.xword\t", x);
9475 return true;
9476 }
9477 else
9478 {
9479 assemble_aligned_integer (4, const0_rtx);
9480 assemble_aligned_integer (4, x);
9481 return true;
9482 }
9483 }
9484 return default_assemble_integer (x, size, aligned_p);
9485 }
9486 \f
9487 /* Return the value of a code used in the .proc pseudo-op that says
9488 what kind of result this function returns. For non-C types, we pick
9489 the closest C type. */
9490
9491 #ifndef SHORT_TYPE_SIZE
9492 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9493 #endif
9494
9495 #ifndef INT_TYPE_SIZE
9496 #define INT_TYPE_SIZE BITS_PER_WORD
9497 #endif
9498
9499 #ifndef LONG_TYPE_SIZE
9500 #define LONG_TYPE_SIZE BITS_PER_WORD
9501 #endif
9502
9503 #ifndef LONG_LONG_TYPE_SIZE
9504 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9505 #endif
9506
9507 #ifndef FLOAT_TYPE_SIZE
9508 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9509 #endif
9510
9511 #ifndef DOUBLE_TYPE_SIZE
9512 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9513 #endif
9514
9515 #ifndef LONG_DOUBLE_TYPE_SIZE
9516 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9517 #endif
9518
9519 unsigned long
9520 sparc_type_code (register tree type)
9521 {
9522 register unsigned long qualifiers = 0;
9523 register unsigned shift;
9524
9525 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9526 setting more, since some assemblers will give an error for this. Also,
9527 we must be careful to avoid shifts of 32 bits or more to avoid getting
9528 unpredictable results. */
9529
9530 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9531 {
9532 switch (TREE_CODE (type))
9533 {
9534 case ERROR_MARK:
9535 return qualifiers;
9536
9537 case ARRAY_TYPE:
9538 qualifiers |= (3 << shift);
9539 break;
9540
9541 case FUNCTION_TYPE:
9542 case METHOD_TYPE:
9543 qualifiers |= (2 << shift);
9544 break;
9545
9546 case POINTER_TYPE:
9547 case REFERENCE_TYPE:
9548 case OFFSET_TYPE:
9549 qualifiers |= (1 << shift);
9550 break;
9551
9552 case RECORD_TYPE:
9553 return (qualifiers | 8);
9554
9555 case UNION_TYPE:
9556 case QUAL_UNION_TYPE:
9557 return (qualifiers | 9);
9558
9559 case ENUMERAL_TYPE:
9560 return (qualifiers | 10);
9561
9562 case VOID_TYPE:
9563 return (qualifiers | 16);
9564
9565 case INTEGER_TYPE:
9566 /* If this is a range type, consider it to be the underlying
9567 type. */
9568 if (TREE_TYPE (type) != 0)
9569 break;
9570
9571 /* Carefully distinguish all the standard types of C,
9572 without messing up if the language is not C. We do this by
9573 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9574 look at both the names and the above fields, but that's redundant.
9575 Any type whose size is between two C types will be considered
9576 to be the wider of the two types. Also, we do not have a
9577 special code to use for "long long", so anything wider than
9578 long is treated the same. Note that we can't distinguish
9579 between "int" and "long" in this code if they are the same
9580 size, but that's fine, since neither can the assembler. */
9581
9582 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9583 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9584
9585 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9586 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9587
9588 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9589 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9590
9591 else
9592 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9593
9594 case REAL_TYPE:
9595 /* If this is a range type, consider it to be the underlying
9596 type. */
9597 if (TREE_TYPE (type) != 0)
9598 break;
9599
9600 /* Carefully distinguish all the standard types of C,
9601 without messing up if the language is not C. */
9602
9603 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9604 return (qualifiers | 6);
9605
9606 else
9607 return (qualifiers | 7);
9608
9609 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9610 /* ??? We need to distinguish between double and float complex types,
9611 but I don't know how yet because I can't reach this code from
9612 existing front-ends. */
9613 return (qualifiers | 7); /* Who knows? */
9614
9615 case VECTOR_TYPE:
9616 case BOOLEAN_TYPE: /* Boolean truth value type. */
9617 case LANG_TYPE:
9618 case NULLPTR_TYPE:
9619 return qualifiers;
9620
9621 default:
9622 gcc_unreachable (); /* Not a type! */
9623 }
9624 }
9625
9626 return qualifiers;
9627 }
9628 \f
9629 /* Nested function support. */
9630
9631 /* Emit RTL insns to initialize the variable parts of a trampoline.
9632 FNADDR is an RTX for the address of the function's pure code.
9633 CXT is an RTX for the static chain value for the function.
9634
9635 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9636 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9637 (to store insns). This is a bit excessive. Perhaps a different
9638 mechanism would be better here.
9639
9640 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9641
9642 static void
9643 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9644 {
9645 /* SPARC 32-bit trampoline:
9646
9647 sethi %hi(fn), %g1
9648 sethi %hi(static), %g2
9649 jmp %g1+%lo(fn)
9650 or %g2, %lo(static), %g2
9651
9652 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9653 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9654 */
9655
9656 emit_move_insn
9657 (adjust_address (m_tramp, SImode, 0),
9658 expand_binop (SImode, ior_optab,
9659 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9660 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9661 NULL_RTX, 1, OPTAB_DIRECT));
9662
9663 emit_move_insn
9664 (adjust_address (m_tramp, SImode, 4),
9665 expand_binop (SImode, ior_optab,
9666 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9667 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9668 NULL_RTX, 1, OPTAB_DIRECT));
9669
9670 emit_move_insn
9671 (adjust_address (m_tramp, SImode, 8),
9672 expand_binop (SImode, ior_optab,
9673 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9674 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9675 NULL_RTX, 1, OPTAB_DIRECT));
9676
9677 emit_move_insn
9678 (adjust_address (m_tramp, SImode, 12),
9679 expand_binop (SImode, ior_optab,
9680 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9681 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9682 NULL_RTX, 1, OPTAB_DIRECT));
9683
9684 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9685 aligned on a 16 byte boundary so one flush clears it all. */
9686 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9687 if (sparc_cpu != PROCESSOR_ULTRASPARC
9688 && sparc_cpu != PROCESSOR_ULTRASPARC3
9689 && sparc_cpu != PROCESSOR_NIAGARA
9690 && sparc_cpu != PROCESSOR_NIAGARA2
9691 && sparc_cpu != PROCESSOR_NIAGARA3
9692 && sparc_cpu != PROCESSOR_NIAGARA4
9693 && sparc_cpu != PROCESSOR_NIAGARA7
9694 && sparc_cpu != PROCESSOR_M8)
9695 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9696
9697 /* Call __enable_execute_stack after writing onto the stack to make sure
9698 the stack address is accessible. */
9699 #ifdef HAVE_ENABLE_EXECUTE_STACK
9700 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9701 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9702 #endif
9703
9704 }
9705
9706 /* The 64-bit version is simpler because it makes more sense to load the
9707 values as "immediate" data out of the trampoline. It's also easier since
9708 we can read the PC without clobbering a register. */
9709
9710 static void
9711 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9712 {
9713 /* SPARC 64-bit trampoline:
9714
9715 rd %pc, %g1
9716 ldx [%g1+24], %g5
9717 jmp %g5
9718 ldx [%g1+16], %g5
9719 +16 bytes data
9720 */
9721
9722 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9723 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9724 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9725 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9726 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9727 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9728 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9729 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9730 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9731 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9732 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9733
9734 if (sparc_cpu != PROCESSOR_ULTRASPARC
9735 && sparc_cpu != PROCESSOR_ULTRASPARC3
9736 && sparc_cpu != PROCESSOR_NIAGARA
9737 && sparc_cpu != PROCESSOR_NIAGARA2
9738 && sparc_cpu != PROCESSOR_NIAGARA3
9739 && sparc_cpu != PROCESSOR_NIAGARA4
9740 && sparc_cpu != PROCESSOR_NIAGARA7
9741 && sparc_cpu != PROCESSOR_M8)
9742 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9743
9744 /* Call __enable_execute_stack after writing onto the stack to make sure
9745 the stack address is accessible. */
9746 #ifdef HAVE_ENABLE_EXECUTE_STACK
9747 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9748 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9749 #endif
9750 }
9751
9752 /* Worker for TARGET_TRAMPOLINE_INIT. */
9753
9754 static void
9755 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9756 {
9757 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9758 cxt = force_reg (Pmode, cxt);
9759 if (TARGET_ARCH64)
9760 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9761 else
9762 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9763 }
9764 \f
9765 /* Adjust the cost of a scheduling dependency. Return the new cost of
9766 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9767
9768 static int
9769 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9770 int cost)
9771 {
9772 enum attr_type insn_type;
9773
9774 if (recog_memoized (insn) < 0)
9775 return cost;
9776
9777 insn_type = get_attr_type (insn);
9778
9779 if (dep_type == 0)
9780 {
9781 /* Data dependency; DEP_INSN writes a register that INSN reads some
9782 cycles later. */
9783
9784 /* if a load, then the dependence must be on the memory address;
9785 add an extra "cycle". Note that the cost could be two cycles
9786 if the reg was written late in an instruction group; we ca not tell
9787 here. */
9788 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9789 return cost + 3;
9790
9791 /* Get the delay only if the address of the store is the dependence. */
9792 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9793 {
9794 rtx pat = PATTERN(insn);
9795 rtx dep_pat = PATTERN (dep_insn);
9796
9797 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9798 return cost; /* This should not happen! */
9799
9800 /* The dependency between the two instructions was on the data that
9801 is being stored. Assume that this implies that the address of the
9802 store is not dependent. */
9803 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9804 return cost;
9805
9806 return cost + 3; /* An approximation. */
9807 }
9808
9809 /* A shift instruction cannot receive its data from an instruction
9810 in the same cycle; add a one cycle penalty. */
9811 if (insn_type == TYPE_SHIFT)
9812 return cost + 3; /* Split before cascade into shift. */
9813 }
9814 else
9815 {
9816 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9817 INSN writes some cycles later. */
9818
9819 /* These are only significant for the fpu unit; writing a fp reg before
9820 the fpu has finished with it stalls the processor. */
9821
9822 /* Reusing an integer register causes no problems. */
9823 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9824 return 0;
9825 }
9826
9827 return cost;
9828 }
9829
9830 static int
9831 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9832 int cost)
9833 {
9834 enum attr_type insn_type, dep_type;
9835 rtx pat = PATTERN(insn);
9836 rtx dep_pat = PATTERN (dep_insn);
9837
9838 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9839 return cost;
9840
9841 insn_type = get_attr_type (insn);
9842 dep_type = get_attr_type (dep_insn);
9843
9844 switch (dtype)
9845 {
9846 case 0:
9847 /* Data dependency; DEP_INSN writes a register that INSN reads some
9848 cycles later. */
9849
9850 switch (insn_type)
9851 {
9852 case TYPE_STORE:
9853 case TYPE_FPSTORE:
9854 /* Get the delay iff the address of the store is the dependence. */
9855 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9856 return cost;
9857
9858 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9859 return cost;
9860 return cost + 3;
9861
9862 case TYPE_LOAD:
9863 case TYPE_SLOAD:
9864 case TYPE_FPLOAD:
9865 /* If a load, then the dependence must be on the memory address. If
9866 the addresses aren't equal, then it might be a false dependency */
9867 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9868 {
9869 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9870 || GET_CODE (SET_DEST (dep_pat)) != MEM
9871 || GET_CODE (SET_SRC (pat)) != MEM
9872 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9873 XEXP (SET_SRC (pat), 0)))
9874 return cost + 2;
9875
9876 return cost + 8;
9877 }
9878 break;
9879
9880 case TYPE_BRANCH:
9881 /* Compare to branch latency is 0. There is no benefit from
9882 separating compare and branch. */
9883 if (dep_type == TYPE_COMPARE)
9884 return 0;
9885 /* Floating point compare to branch latency is less than
9886 compare to conditional move. */
9887 if (dep_type == TYPE_FPCMP)
9888 return cost - 1;
9889 break;
9890 default:
9891 break;
9892 }
9893 break;
9894
9895 case REG_DEP_ANTI:
9896 /* Anti-dependencies only penalize the fpu unit. */
9897 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9898 return 0;
9899 break;
9900
9901 default:
9902 break;
9903 }
9904
9905 return cost;
9906 }
9907
9908 static int
9909 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9910 unsigned int)
9911 {
9912 switch (sparc_cpu)
9913 {
9914 case PROCESSOR_SUPERSPARC:
9915 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9916 break;
9917 case PROCESSOR_HYPERSPARC:
9918 case PROCESSOR_SPARCLITE86X:
9919 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9920 break;
9921 default:
9922 break;
9923 }
9924 return cost;
9925 }
9926
9927 static void
9928 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9929 int sched_verbose ATTRIBUTE_UNUSED,
9930 int max_ready ATTRIBUTE_UNUSED)
9931 {}
9932
9933 static int
9934 sparc_use_sched_lookahead (void)
9935 {
9936 if (sparc_cpu == PROCESSOR_NIAGARA
9937 || sparc_cpu == PROCESSOR_NIAGARA2
9938 || sparc_cpu == PROCESSOR_NIAGARA3)
9939 return 0;
9940 if (sparc_cpu == PROCESSOR_NIAGARA4
9941 || sparc_cpu == PROCESSOR_NIAGARA7
9942 || sparc_cpu == PROCESSOR_M8)
9943 return 2;
9944 if (sparc_cpu == PROCESSOR_ULTRASPARC
9945 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9946 return 4;
9947 if ((1 << sparc_cpu) &
9948 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9949 (1 << PROCESSOR_SPARCLITE86X)))
9950 return 3;
9951 return 0;
9952 }
9953
9954 static int
9955 sparc_issue_rate (void)
9956 {
9957 switch (sparc_cpu)
9958 {
9959 case PROCESSOR_NIAGARA:
9960 case PROCESSOR_NIAGARA2:
9961 case PROCESSOR_NIAGARA3:
9962 default:
9963 return 1;
9964 case PROCESSOR_NIAGARA4:
9965 case PROCESSOR_NIAGARA7:
9966 case PROCESSOR_V9:
9967 /* Assume V9 processors are capable of at least dual-issue. */
9968 return 2;
9969 case PROCESSOR_SUPERSPARC:
9970 return 3;
9971 case PROCESSOR_HYPERSPARC:
9972 case PROCESSOR_SPARCLITE86X:
9973 return 2;
9974 case PROCESSOR_ULTRASPARC:
9975 case PROCESSOR_ULTRASPARC3:
9976 case PROCESSOR_M8:
9977 return 4;
9978 }
9979 }
9980
9981 static int
9982 set_extends (rtx_insn *insn)
9983 {
9984 register rtx pat = PATTERN (insn);
9985
9986 switch (GET_CODE (SET_SRC (pat)))
9987 {
9988 /* Load and some shift instructions zero extend. */
9989 case MEM:
9990 case ZERO_EXTEND:
9991 /* sethi clears the high bits */
9992 case HIGH:
9993 /* LO_SUM is used with sethi. sethi cleared the high
9994 bits and the values used with lo_sum are positive */
9995 case LO_SUM:
9996 /* Store flag stores 0 or 1 */
9997 case LT: case LTU:
9998 case GT: case GTU:
9999 case LE: case LEU:
10000 case GE: case GEU:
10001 case EQ:
10002 case NE:
10003 return 1;
10004 case AND:
10005 {
10006 rtx op0 = XEXP (SET_SRC (pat), 0);
10007 rtx op1 = XEXP (SET_SRC (pat), 1);
10008 if (GET_CODE (op1) == CONST_INT)
10009 return INTVAL (op1) >= 0;
10010 if (GET_CODE (op0) != REG)
10011 return 0;
10012 if (sparc_check_64 (op0, insn) == 1)
10013 return 1;
10014 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10015 }
10016 case IOR:
10017 case XOR:
10018 {
10019 rtx op0 = XEXP (SET_SRC (pat), 0);
10020 rtx op1 = XEXP (SET_SRC (pat), 1);
10021 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10022 return 0;
10023 if (GET_CODE (op1) == CONST_INT)
10024 return INTVAL (op1) >= 0;
10025 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10026 }
10027 case LSHIFTRT:
10028 return GET_MODE (SET_SRC (pat)) == SImode;
10029 /* Positive integers leave the high bits zero. */
10030 case CONST_INT:
10031 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10032 case ASHIFTRT:
10033 case SIGN_EXTEND:
10034 return - (GET_MODE (SET_SRC (pat)) == SImode);
10035 case REG:
10036 return sparc_check_64 (SET_SRC (pat), insn);
10037 default:
10038 return 0;
10039 }
10040 }
10041
10042 /* We _ought_ to have only one kind per function, but... */
10043 static GTY(()) rtx sparc_addr_diff_list;
10044 static GTY(()) rtx sparc_addr_list;
10045
10046 void
10047 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10048 {
10049 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10050 if (diff)
10051 sparc_addr_diff_list
10052 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10053 else
10054 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10055 }
10056
10057 static void
10058 sparc_output_addr_vec (rtx vec)
10059 {
10060 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10061 int idx, vlen = XVECLEN (body, 0);
10062
10063 #ifdef ASM_OUTPUT_ADDR_VEC_START
10064 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10065 #endif
10066
10067 #ifdef ASM_OUTPUT_CASE_LABEL
10068 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10069 NEXT_INSN (lab));
10070 #else
10071 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10072 #endif
10073
10074 for (idx = 0; idx < vlen; idx++)
10075 {
10076 ASM_OUTPUT_ADDR_VEC_ELT
10077 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10078 }
10079
10080 #ifdef ASM_OUTPUT_ADDR_VEC_END
10081 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10082 #endif
10083 }
10084
10085 static void
10086 sparc_output_addr_diff_vec (rtx vec)
10087 {
10088 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10089 rtx base = XEXP (XEXP (body, 0), 0);
10090 int idx, vlen = XVECLEN (body, 1);
10091
10092 #ifdef ASM_OUTPUT_ADDR_VEC_START
10093 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10094 #endif
10095
10096 #ifdef ASM_OUTPUT_CASE_LABEL
10097 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10098 NEXT_INSN (lab));
10099 #else
10100 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10101 #endif
10102
10103 for (idx = 0; idx < vlen; idx++)
10104 {
10105 ASM_OUTPUT_ADDR_DIFF_ELT
10106 (asm_out_file,
10107 body,
10108 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10109 CODE_LABEL_NUMBER (base));
10110 }
10111
10112 #ifdef ASM_OUTPUT_ADDR_VEC_END
10113 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10114 #endif
10115 }
10116
10117 static void
10118 sparc_output_deferred_case_vectors (void)
10119 {
10120 rtx t;
10121 int align;
10122
10123 if (sparc_addr_list == NULL_RTX
10124 && sparc_addr_diff_list == NULL_RTX)
10125 return;
10126
10127 /* Align to cache line in the function's code section. */
10128 switch_to_section (current_function_section ());
10129
10130 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10131 if (align > 0)
10132 ASM_OUTPUT_ALIGN (asm_out_file, align);
10133
10134 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10135 sparc_output_addr_vec (XEXP (t, 0));
10136 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10137 sparc_output_addr_diff_vec (XEXP (t, 0));
10138
10139 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10140 }
10141
10142 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10143 unknown. Return 1 if the high bits are zero, -1 if the register is
10144 sign extended. */
10145 int
10146 sparc_check_64 (rtx x, rtx_insn *insn)
10147 {
10148 /* If a register is set only once it is safe to ignore insns this
10149 code does not know how to handle. The loop will either recognize
10150 the single set and return the correct value or fail to recognize
10151 it and return 0. */
10152 int set_once = 0;
10153 rtx y = x;
10154
10155 gcc_assert (GET_CODE (x) == REG);
10156
10157 if (GET_MODE (x) == DImode)
10158 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10159
10160 if (flag_expensive_optimizations
10161 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10162 set_once = 1;
10163
10164 if (insn == 0)
10165 {
10166 if (set_once)
10167 insn = get_last_insn_anywhere ();
10168 else
10169 return 0;
10170 }
10171
10172 while ((insn = PREV_INSN (insn)))
10173 {
10174 switch (GET_CODE (insn))
10175 {
10176 case JUMP_INSN:
10177 case NOTE:
10178 break;
10179 case CODE_LABEL:
10180 case CALL_INSN:
10181 default:
10182 if (! set_once)
10183 return 0;
10184 break;
10185 case INSN:
10186 {
10187 rtx pat = PATTERN (insn);
10188 if (GET_CODE (pat) != SET)
10189 return 0;
10190 if (rtx_equal_p (x, SET_DEST (pat)))
10191 return set_extends (insn);
10192 if (y && rtx_equal_p (y, SET_DEST (pat)))
10193 return set_extends (insn);
10194 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10195 return 0;
10196 }
10197 }
10198 }
10199 return 0;
10200 }
10201
10202 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10203 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10204
10205 const char *
10206 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10207 {
10208 static char asm_code[60];
10209
10210 /* The scratch register is only required when the destination
10211 register is not a 64-bit global or out register. */
10212 if (which_alternative != 2)
10213 operands[3] = operands[0];
10214
10215 /* We can only shift by constants <= 63. */
10216 if (GET_CODE (operands[2]) == CONST_INT)
10217 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10218
10219 if (GET_CODE (operands[1]) == CONST_INT)
10220 {
10221 output_asm_insn ("mov\t%1, %3", operands);
10222 }
10223 else
10224 {
10225 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10226 if (sparc_check_64 (operands[1], insn) <= 0)
10227 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10228 output_asm_insn ("or\t%L1, %3, %3", operands);
10229 }
10230
10231 strcpy (asm_code, opcode);
10232
10233 if (which_alternative != 2)
10234 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10235 else
10236 return
10237 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10238 }
10239 \f
10240 /* Output rtl to increment the profiler label LABELNO
10241 for profiling a function entry. */
10242
10243 void
10244 sparc_profile_hook (int labelno)
10245 {
10246 char buf[32];
10247 rtx lab, fun;
10248
10249 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10250 if (NO_PROFILE_COUNTERS)
10251 {
10252 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10253 }
10254 else
10255 {
10256 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10257 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10258 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10259 }
10260 }
10261 \f
10262 #ifdef TARGET_SOLARIS
10263 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10264
10265 static void
10266 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10267 tree decl ATTRIBUTE_UNUSED)
10268 {
10269 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10270 {
10271 solaris_elf_asm_comdat_section (name, flags, decl);
10272 return;
10273 }
10274
10275 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10276
10277 if (!(flags & SECTION_DEBUG))
10278 fputs (",#alloc", asm_out_file);
10279 if (flags & SECTION_WRITE)
10280 fputs (",#write", asm_out_file);
10281 if (flags & SECTION_TLS)
10282 fputs (",#tls", asm_out_file);
10283 if (flags & SECTION_CODE)
10284 fputs (",#execinstr", asm_out_file);
10285
10286 if (flags & SECTION_NOTYPE)
10287 ;
10288 else if (flags & SECTION_BSS)
10289 fputs (",#nobits", asm_out_file);
10290 else
10291 fputs (",#progbits", asm_out_file);
10292
10293 fputc ('\n', asm_out_file);
10294 }
10295 #endif /* TARGET_SOLARIS */
10296
10297 /* We do not allow indirect calls to be optimized into sibling calls.
10298
10299 We cannot use sibling calls when delayed branches are disabled
10300 because they will likely require the call delay slot to be filled.
10301
10302 Also, on SPARC 32-bit we cannot emit a sibling call when the
10303 current function returns a structure. This is because the "unimp
10304 after call" convention would cause the callee to return to the
10305 wrong place. The generic code already disallows cases where the
10306 function being called returns a structure.
10307
10308 It may seem strange how this last case could occur. Usually there
10309 is code after the call which jumps to epilogue code which dumps the
10310 return value into the struct return area. That ought to invalidate
10311 the sibling call right? Well, in the C++ case we can end up passing
10312 the pointer to the struct return area to a constructor (which returns
10313 void) and then nothing else happens. Such a sibling call would look
10314 valid without the added check here.
10315
10316 VxWorks PIC PLT entries require the global pointer to be initialized
10317 on entry. We therefore can't emit sibling calls to them. */
10318 static bool
10319 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10320 {
10321 return (decl
10322 && flag_delayed_branch
10323 && (TARGET_ARCH64 || ! cfun->returns_struct)
10324 && !(TARGET_VXWORKS_RTP
10325 && flag_pic
10326 && !targetm.binds_local_p (decl)));
10327 }
10328 \f
10329 /* libfunc renaming. */
10330
10331 static void
10332 sparc_init_libfuncs (void)
10333 {
10334 if (TARGET_ARCH32)
10335 {
10336 /* Use the subroutines that Sun's library provides for integer
10337 multiply and divide. The `*' prevents an underscore from
10338 being prepended by the compiler. .umul is a little faster
10339 than .mul. */
10340 set_optab_libfunc (smul_optab, SImode, "*.umul");
10341 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10342 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10343 set_optab_libfunc (smod_optab, SImode, "*.rem");
10344 set_optab_libfunc (umod_optab, SImode, "*.urem");
10345
10346 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10347 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10348 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10349 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10350 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10351 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10352
10353 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10354 is because with soft-float, the SFmode and DFmode sqrt
10355 instructions will be absent, and the compiler will notice and
10356 try to use the TFmode sqrt instruction for calls to the
10357 builtin function sqrt, but this fails. */
10358 if (TARGET_FPU)
10359 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10360
10361 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10362 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10363 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10364 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10365 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10366 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10367
10368 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10369 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10370 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10371 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10372
10373 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10374 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10375 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10376 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10377
10378 if (DITF_CONVERSION_LIBFUNCS)
10379 {
10380 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10381 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10382 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10383 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10384 }
10385
10386 if (SUN_CONVERSION_LIBFUNCS)
10387 {
10388 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10389 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10390 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10391 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10392 }
10393 }
10394 if (TARGET_ARCH64)
10395 {
10396 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10397 do not exist in the library. Make sure the compiler does not
10398 emit calls to them by accident. (It should always use the
10399 hardware instructions.) */
10400 set_optab_libfunc (smul_optab, SImode, 0);
10401 set_optab_libfunc (sdiv_optab, SImode, 0);
10402 set_optab_libfunc (udiv_optab, SImode, 0);
10403 set_optab_libfunc (smod_optab, SImode, 0);
10404 set_optab_libfunc (umod_optab, SImode, 0);
10405
10406 if (SUN_INTEGER_MULTIPLY_64)
10407 {
10408 set_optab_libfunc (smul_optab, DImode, "__mul64");
10409 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10410 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10411 set_optab_libfunc (smod_optab, DImode, "__rem64");
10412 set_optab_libfunc (umod_optab, DImode, "__urem64");
10413 }
10414
10415 if (SUN_CONVERSION_LIBFUNCS)
10416 {
10417 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10418 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10419 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10420 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10421 }
10422 }
10423 }
10424 \f
10425 /* SPARC builtins. */
10426 enum sparc_builtins
10427 {
10428 /* FPU builtins. */
10429 SPARC_BUILTIN_LDFSR,
10430 SPARC_BUILTIN_STFSR,
10431
10432 /* VIS 1.0 builtins. */
10433 SPARC_BUILTIN_FPACK16,
10434 SPARC_BUILTIN_FPACK32,
10435 SPARC_BUILTIN_FPACKFIX,
10436 SPARC_BUILTIN_FEXPAND,
10437 SPARC_BUILTIN_FPMERGE,
10438 SPARC_BUILTIN_FMUL8X16,
10439 SPARC_BUILTIN_FMUL8X16AU,
10440 SPARC_BUILTIN_FMUL8X16AL,
10441 SPARC_BUILTIN_FMUL8SUX16,
10442 SPARC_BUILTIN_FMUL8ULX16,
10443 SPARC_BUILTIN_FMULD8SUX16,
10444 SPARC_BUILTIN_FMULD8ULX16,
10445 SPARC_BUILTIN_FALIGNDATAV4HI,
10446 SPARC_BUILTIN_FALIGNDATAV8QI,
10447 SPARC_BUILTIN_FALIGNDATAV2SI,
10448 SPARC_BUILTIN_FALIGNDATADI,
10449 SPARC_BUILTIN_WRGSR,
10450 SPARC_BUILTIN_RDGSR,
10451 SPARC_BUILTIN_ALIGNADDR,
10452 SPARC_BUILTIN_ALIGNADDRL,
10453 SPARC_BUILTIN_PDIST,
10454 SPARC_BUILTIN_EDGE8,
10455 SPARC_BUILTIN_EDGE8L,
10456 SPARC_BUILTIN_EDGE16,
10457 SPARC_BUILTIN_EDGE16L,
10458 SPARC_BUILTIN_EDGE32,
10459 SPARC_BUILTIN_EDGE32L,
10460 SPARC_BUILTIN_FCMPLE16,
10461 SPARC_BUILTIN_FCMPLE32,
10462 SPARC_BUILTIN_FCMPNE16,
10463 SPARC_BUILTIN_FCMPNE32,
10464 SPARC_BUILTIN_FCMPGT16,
10465 SPARC_BUILTIN_FCMPGT32,
10466 SPARC_BUILTIN_FCMPEQ16,
10467 SPARC_BUILTIN_FCMPEQ32,
10468 SPARC_BUILTIN_FPADD16,
10469 SPARC_BUILTIN_FPADD16S,
10470 SPARC_BUILTIN_FPADD32,
10471 SPARC_BUILTIN_FPADD32S,
10472 SPARC_BUILTIN_FPSUB16,
10473 SPARC_BUILTIN_FPSUB16S,
10474 SPARC_BUILTIN_FPSUB32,
10475 SPARC_BUILTIN_FPSUB32S,
10476 SPARC_BUILTIN_ARRAY8,
10477 SPARC_BUILTIN_ARRAY16,
10478 SPARC_BUILTIN_ARRAY32,
10479
10480 /* VIS 2.0 builtins. */
10481 SPARC_BUILTIN_EDGE8N,
10482 SPARC_BUILTIN_EDGE8LN,
10483 SPARC_BUILTIN_EDGE16N,
10484 SPARC_BUILTIN_EDGE16LN,
10485 SPARC_BUILTIN_EDGE32N,
10486 SPARC_BUILTIN_EDGE32LN,
10487 SPARC_BUILTIN_BMASK,
10488 SPARC_BUILTIN_BSHUFFLEV4HI,
10489 SPARC_BUILTIN_BSHUFFLEV8QI,
10490 SPARC_BUILTIN_BSHUFFLEV2SI,
10491 SPARC_BUILTIN_BSHUFFLEDI,
10492
10493 /* VIS 3.0 builtins. */
10494 SPARC_BUILTIN_CMASK8,
10495 SPARC_BUILTIN_CMASK16,
10496 SPARC_BUILTIN_CMASK32,
10497 SPARC_BUILTIN_FCHKSM16,
10498 SPARC_BUILTIN_FSLL16,
10499 SPARC_BUILTIN_FSLAS16,
10500 SPARC_BUILTIN_FSRL16,
10501 SPARC_BUILTIN_FSRA16,
10502 SPARC_BUILTIN_FSLL32,
10503 SPARC_BUILTIN_FSLAS32,
10504 SPARC_BUILTIN_FSRL32,
10505 SPARC_BUILTIN_FSRA32,
10506 SPARC_BUILTIN_PDISTN,
10507 SPARC_BUILTIN_FMEAN16,
10508 SPARC_BUILTIN_FPADD64,
10509 SPARC_BUILTIN_FPSUB64,
10510 SPARC_BUILTIN_FPADDS16,
10511 SPARC_BUILTIN_FPADDS16S,
10512 SPARC_BUILTIN_FPSUBS16,
10513 SPARC_BUILTIN_FPSUBS16S,
10514 SPARC_BUILTIN_FPADDS32,
10515 SPARC_BUILTIN_FPADDS32S,
10516 SPARC_BUILTIN_FPSUBS32,
10517 SPARC_BUILTIN_FPSUBS32S,
10518 SPARC_BUILTIN_FUCMPLE8,
10519 SPARC_BUILTIN_FUCMPNE8,
10520 SPARC_BUILTIN_FUCMPGT8,
10521 SPARC_BUILTIN_FUCMPEQ8,
10522 SPARC_BUILTIN_FHADDS,
10523 SPARC_BUILTIN_FHADDD,
10524 SPARC_BUILTIN_FHSUBS,
10525 SPARC_BUILTIN_FHSUBD,
10526 SPARC_BUILTIN_FNHADDS,
10527 SPARC_BUILTIN_FNHADDD,
10528 SPARC_BUILTIN_UMULXHI,
10529 SPARC_BUILTIN_XMULX,
10530 SPARC_BUILTIN_XMULXHI,
10531
10532 /* VIS 4.0 builtins. */
10533 SPARC_BUILTIN_FPADD8,
10534 SPARC_BUILTIN_FPADDS8,
10535 SPARC_BUILTIN_FPADDUS8,
10536 SPARC_BUILTIN_FPADDUS16,
10537 SPARC_BUILTIN_FPCMPLE8,
10538 SPARC_BUILTIN_FPCMPGT8,
10539 SPARC_BUILTIN_FPCMPULE16,
10540 SPARC_BUILTIN_FPCMPUGT16,
10541 SPARC_BUILTIN_FPCMPULE32,
10542 SPARC_BUILTIN_FPCMPUGT32,
10543 SPARC_BUILTIN_FPMAX8,
10544 SPARC_BUILTIN_FPMAX16,
10545 SPARC_BUILTIN_FPMAX32,
10546 SPARC_BUILTIN_FPMAXU8,
10547 SPARC_BUILTIN_FPMAXU16,
10548 SPARC_BUILTIN_FPMAXU32,
10549 SPARC_BUILTIN_FPMIN8,
10550 SPARC_BUILTIN_FPMIN16,
10551 SPARC_BUILTIN_FPMIN32,
10552 SPARC_BUILTIN_FPMINU8,
10553 SPARC_BUILTIN_FPMINU16,
10554 SPARC_BUILTIN_FPMINU32,
10555 SPARC_BUILTIN_FPSUB8,
10556 SPARC_BUILTIN_FPSUBS8,
10557 SPARC_BUILTIN_FPSUBUS8,
10558 SPARC_BUILTIN_FPSUBUS16,
10559
10560 /* VIS 4.0B builtins. */
10561
10562 /* Note that all the DICTUNPACK* entries should be kept
10563 contiguous. */
10564 SPARC_BUILTIN_FIRST_DICTUNPACK,
10565 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10566 SPARC_BUILTIN_DICTUNPACK16,
10567 SPARC_BUILTIN_DICTUNPACK32,
10568 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10569
10570 /* Note that all the FPCMP*SHL entries should be kept
10571 contiguous. */
10572 SPARC_BUILTIN_FIRST_FPCMPSHL,
10573 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10574 SPARC_BUILTIN_FPCMPGT8SHL,
10575 SPARC_BUILTIN_FPCMPEQ8SHL,
10576 SPARC_BUILTIN_FPCMPNE8SHL,
10577 SPARC_BUILTIN_FPCMPLE16SHL,
10578 SPARC_BUILTIN_FPCMPGT16SHL,
10579 SPARC_BUILTIN_FPCMPEQ16SHL,
10580 SPARC_BUILTIN_FPCMPNE16SHL,
10581 SPARC_BUILTIN_FPCMPLE32SHL,
10582 SPARC_BUILTIN_FPCMPGT32SHL,
10583 SPARC_BUILTIN_FPCMPEQ32SHL,
10584 SPARC_BUILTIN_FPCMPNE32SHL,
10585 SPARC_BUILTIN_FPCMPULE8SHL,
10586 SPARC_BUILTIN_FPCMPUGT8SHL,
10587 SPARC_BUILTIN_FPCMPULE16SHL,
10588 SPARC_BUILTIN_FPCMPUGT16SHL,
10589 SPARC_BUILTIN_FPCMPULE32SHL,
10590 SPARC_BUILTIN_FPCMPUGT32SHL,
10591 SPARC_BUILTIN_FPCMPDE8SHL,
10592 SPARC_BUILTIN_FPCMPDE16SHL,
10593 SPARC_BUILTIN_FPCMPDE32SHL,
10594 SPARC_BUILTIN_FPCMPUR8SHL,
10595 SPARC_BUILTIN_FPCMPUR16SHL,
10596 SPARC_BUILTIN_FPCMPUR32SHL,
10597 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10598
10599 SPARC_BUILTIN_MAX
10600 };
10601
10602 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10603 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10604
10605 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10606 The instruction should require a constant operand of some sort. The
10607 function prints an error if OPVAL is not valid. */
10608
10609 static int
10610 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10611 {
10612 if (GET_CODE (opval) != CONST_INT)
10613 {
10614 error ("%qs expects a constant argument", insn_data[icode].name);
10615 return false;
10616 }
10617
10618 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10619 {
10620 error ("constant argument out of range for %qs", insn_data[icode].name);
10621 return false;
10622 }
10623 return true;
10624 }
10625
10626 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10627 function decl or NULL_TREE if the builtin was not added. */
10628
10629 static tree
10630 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10631 tree type)
10632 {
10633 tree t
10634 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10635
10636 if (t)
10637 {
10638 sparc_builtins[code] = t;
10639 sparc_builtins_icode[code] = icode;
10640 }
10641
10642 return t;
10643 }
10644
10645 /* Likewise, but also marks the function as "const". */
10646
10647 static tree
10648 def_builtin_const (const char *name, enum insn_code icode,
10649 enum sparc_builtins code, tree type)
10650 {
10651 tree t = def_builtin (name, icode, code, type);
10652
10653 if (t)
10654 TREE_READONLY (t) = 1;
10655
10656 return t;
10657 }
10658
10659 /* Implement the TARGET_INIT_BUILTINS target hook.
10660 Create builtin functions for special SPARC instructions. */
10661
10662 static void
10663 sparc_init_builtins (void)
10664 {
10665 if (TARGET_FPU)
10666 sparc_fpu_init_builtins ();
10667
10668 if (TARGET_VIS)
10669 sparc_vis_init_builtins ();
10670 }
10671
10672 /* Create builtin functions for FPU instructions. */
10673
10674 static void
10675 sparc_fpu_init_builtins (void)
10676 {
10677 tree ftype
10678 = build_function_type_list (void_type_node,
10679 build_pointer_type (unsigned_type_node), 0);
10680 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10681 SPARC_BUILTIN_LDFSR, ftype);
10682 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10683 SPARC_BUILTIN_STFSR, ftype);
10684 }
10685
10686 /* Create builtin functions for VIS instructions. */
10687
10688 static void
10689 sparc_vis_init_builtins (void)
10690 {
10691 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10692 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10693 tree v4hi = build_vector_type (intHI_type_node, 4);
10694 tree v2hi = build_vector_type (intHI_type_node, 2);
10695 tree v2si = build_vector_type (intSI_type_node, 2);
10696 tree v1si = build_vector_type (intSI_type_node, 1);
10697
10698 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10699 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10700 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10701 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10702 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10703 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10704 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10705 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10706 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10707 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10708 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10709 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10710 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10711 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10712 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10713 v8qi, v8qi,
10714 intDI_type_node, 0);
10715 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10716 v8qi, v8qi, 0);
10717 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10718 v8qi, v8qi, 0);
10719 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10720 intSI_type_node, 0);
10721 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10722 intSI_type_node, 0);
10723 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10724 intDI_type_node, 0);
10725 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10726 intDI_type_node,
10727 intDI_type_node, 0);
10728 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10729 intSI_type_node,
10730 intSI_type_node, 0);
10731 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10732 ptr_type_node,
10733 intSI_type_node, 0);
10734 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10735 ptr_type_node,
10736 intDI_type_node, 0);
10737 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10738 ptr_type_node,
10739 ptr_type_node, 0);
10740 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10741 ptr_type_node,
10742 ptr_type_node, 0);
10743 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10744 v4hi, v4hi, 0);
10745 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10746 v2si, v2si, 0);
10747 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10748 v4hi, v4hi, 0);
10749 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10750 v2si, v2si, 0);
10751 tree void_ftype_di = build_function_type_list (void_type_node,
10752 intDI_type_node, 0);
10753 tree di_ftype_void = build_function_type_list (intDI_type_node,
10754 void_type_node, 0);
10755 tree void_ftype_si = build_function_type_list (void_type_node,
10756 intSI_type_node, 0);
10757 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10758 float_type_node,
10759 float_type_node, 0);
10760 tree df_ftype_df_df = build_function_type_list (double_type_node,
10761 double_type_node,
10762 double_type_node, 0);
10763
10764 /* Packing and expanding vectors. */
10765 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10766 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10767 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10768 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10769 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10770 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10771 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10772 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10773 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10774 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10775
10776 /* Multiplications. */
10777 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10778 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10779 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10780 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10781 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10782 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10783 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10784 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10785 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10786 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10787 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10788 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10789 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10790 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10791
10792 /* Data aligning. */
10793 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10794 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10795 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10796 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10797 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10798 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10799 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10800 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10801
10802 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10803 SPARC_BUILTIN_WRGSR, void_ftype_di);
10804 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10805 SPARC_BUILTIN_RDGSR, di_ftype_void);
10806
10807 if (TARGET_ARCH64)
10808 {
10809 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10810 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10811 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10812 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10813 }
10814 else
10815 {
10816 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10817 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10818 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10819 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10820 }
10821
10822 /* Pixel distance. */
10823 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10824 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10825
10826 /* Edge handling. */
10827 if (TARGET_ARCH64)
10828 {
10829 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10830 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10831 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10832 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10833 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10834 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10835 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10836 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10837 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10838 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10839 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10840 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10841 }
10842 else
10843 {
10844 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10845 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10846 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10847 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10848 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10849 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10850 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10851 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10852 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10853 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10854 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10855 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10856 }
10857
10858 /* Pixel compare. */
10859 if (TARGET_ARCH64)
10860 {
10861 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10862 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10863 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10864 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10865 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10866 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10867 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10868 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10869 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10870 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10871 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10872 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10873 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10874 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10875 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10876 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10877 }
10878 else
10879 {
10880 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10881 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10882 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10883 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10884 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10885 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10886 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10887 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10888 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10889 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10890 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10891 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10892 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10893 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10894 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10895 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10896 }
10897
10898 /* Addition and subtraction. */
10899 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10900 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10901 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10902 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10903 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10904 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10905 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10906 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10907 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10908 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10909 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10910 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10911 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10912 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10913 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10914 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10915
10916 /* Three-dimensional array addressing. */
10917 if (TARGET_ARCH64)
10918 {
10919 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10920 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10921 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10922 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10923 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10924 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10925 }
10926 else
10927 {
10928 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10929 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10930 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10931 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10932 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10933 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10934 }
10935
10936 if (TARGET_VIS2)
10937 {
10938 /* Edge handling. */
10939 if (TARGET_ARCH64)
10940 {
10941 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10942 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10943 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10944 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10945 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10946 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10947 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10948 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10949 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10950 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10951 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10952 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10953 }
10954 else
10955 {
10956 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10957 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10958 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10959 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10960 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10961 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10962 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10963 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10964 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10965 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10966 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10967 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10968 }
10969
10970 /* Byte mask and shuffle. */
10971 if (TARGET_ARCH64)
10972 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10973 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10974 else
10975 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10976 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10977 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10978 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10979 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10980 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10981 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10982 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10983 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10984 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10985 }
10986
10987 if (TARGET_VIS3)
10988 {
10989 if (TARGET_ARCH64)
10990 {
10991 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10992 SPARC_BUILTIN_CMASK8, void_ftype_di);
10993 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10994 SPARC_BUILTIN_CMASK16, void_ftype_di);
10995 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10996 SPARC_BUILTIN_CMASK32, void_ftype_di);
10997 }
10998 else
10999 {
11000 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11001 SPARC_BUILTIN_CMASK8, void_ftype_si);
11002 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11003 SPARC_BUILTIN_CMASK16, void_ftype_si);
11004 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11005 SPARC_BUILTIN_CMASK32, void_ftype_si);
11006 }
11007
11008 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11009 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11010
11011 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11012 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11013 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11014 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11015 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11016 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11017 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11018 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11019 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11020 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11021 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11022 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11023 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11024 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11025 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11026 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11027
11028 if (TARGET_ARCH64)
11029 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11030 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11031 else
11032 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11033 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11034
11035 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11036 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11037 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11038 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11039 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11040 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11041
11042 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11043 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11044 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11045 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11046 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11047 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11048 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11049 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11050 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11051 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11052 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11053 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11054 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11055 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11056 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11057 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11058
11059 if (TARGET_ARCH64)
11060 {
11061 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11062 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11063 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11064 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11065 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11066 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11067 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11068 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11069 }
11070 else
11071 {
11072 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11073 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11074 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11075 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11076 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11077 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11078 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11079 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11080 }
11081
11082 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11083 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11084 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11085 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11086 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11087 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11088 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11089 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11090 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11091 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11092 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11093 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11094
11095 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11096 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11097 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11098 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11099 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11100 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11101 }
11102
11103 if (TARGET_VIS4)
11104 {
11105 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11106 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11107 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11108 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11109 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11110 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11111 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11112 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11113
11114
11115 if (TARGET_ARCH64)
11116 {
11117 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11118 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11119 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11120 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11121 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11122 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11123 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11124 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11125 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11126 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11127 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11128 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11129 }
11130 else
11131 {
11132 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11133 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11134 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11135 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11136 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11137 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11138 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11139 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11140 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11141 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11142 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11143 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11144 }
11145
11146 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11147 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11148 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11149 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11150 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11151 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11152 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11153 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11154 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11155 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11156 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11157 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11158 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11159 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11160 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11161 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11162 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11163 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11164 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11165 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11166 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11167 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11168 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11169 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11170 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11171 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11172 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11173 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11174 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11175 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11176 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11177 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11178 }
11179
11180 if (TARGET_VIS4B)
11181 {
11182 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11183 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11184 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11185 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11186 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11187 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11188
11189 if (TARGET_ARCH64)
11190 {
11191 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11192 v8qi, v8qi,
11193 intSI_type_node, 0);
11194 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11195 v4hi, v4hi,
11196 intSI_type_node, 0);
11197 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11198 v2si, v2si,
11199 intSI_type_node, 0);
11200
11201 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11202 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11203 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11204 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11205 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11206 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11207 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11208 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11209
11210 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11211 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11212 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11213 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11214 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11215 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11216 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11217 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11218
11219 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11220 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11221 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11222 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11223 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11224 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11225 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11226 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11227
11228
11229 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11230 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11231 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11232 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11233
11234 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11235 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11236 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11237 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11238
11239 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11240 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11241 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11242 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11243
11244 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11245 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11246 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11247 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11248 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11249 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11250
11251 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11252 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11253 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11254 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11255 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11256 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11257
11258 }
11259 else
11260 {
11261 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11262 v8qi, v8qi,
11263 intSI_type_node, 0);
11264 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11265 v4hi, v4hi,
11266 intSI_type_node, 0);
11267 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11268 v2si, v2si,
11269 intSI_type_node, 0);
11270
11271 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11272 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11273 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11274 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11275 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11276 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11277 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11278 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11279
11280 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11281 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11282 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11283 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11284 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11285 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11286 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11287 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11288
11289 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11290 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11291 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11292 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11293 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11294 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11295 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11296 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11297
11298
11299 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11300 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11301 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11302 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11303
11304 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11305 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11306 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11307 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11308
11309 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11310 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11311 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11312 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11313
11314 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11315 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11316 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11317 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11318 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11319 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11320
11321 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11322 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11323 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11324 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11325 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11326 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11327 }
11328 }
11329 }
11330
11331 /* Implement TARGET_BUILTIN_DECL hook. */
11332
11333 static tree
11334 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11335 {
11336 if (code >= SPARC_BUILTIN_MAX)
11337 return error_mark_node;
11338
11339 return sparc_builtins[code];
11340 }
11341
11342 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11343
11344 static rtx
11345 sparc_expand_builtin (tree exp, rtx target,
11346 rtx subtarget ATTRIBUTE_UNUSED,
11347 machine_mode tmode ATTRIBUTE_UNUSED,
11348 int ignore ATTRIBUTE_UNUSED)
11349 {
11350 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11351 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11352 enum insn_code icode = sparc_builtins_icode[code];
11353 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11354 call_expr_arg_iterator iter;
11355 int arg_count = 0;
11356 rtx pat, op[4];
11357 tree arg;
11358
11359 if (nonvoid)
11360 {
11361 machine_mode tmode = insn_data[icode].operand[0].mode;
11362 if (!target
11363 || GET_MODE (target) != tmode
11364 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11365 op[0] = gen_reg_rtx (tmode);
11366 else
11367 op[0] = target;
11368 }
11369
11370 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11371 {
11372 const struct insn_operand_data *insn_op;
11373 int idx;
11374
11375 if (arg == error_mark_node)
11376 return NULL_RTX;
11377
11378 arg_count++;
11379 idx = arg_count - !nonvoid;
11380 insn_op = &insn_data[icode].operand[idx];
11381 op[arg_count] = expand_normal (arg);
11382
11383 /* Some of the builtins require constant arguments. We check
11384 for this here. */
11385 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11386 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11387 && arg_count == 3)
11388 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11389 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11390 && arg_count == 2))
11391 {
11392 if (!check_constant_argument (icode, idx, op[arg_count]))
11393 return const0_rtx;
11394 }
11395
11396 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11397 {
11398 if (!address_operand (op[arg_count], SImode))
11399 {
11400 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11401 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11402 }
11403 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11404 }
11405
11406 else if (insn_op->mode == V1DImode
11407 && GET_MODE (op[arg_count]) == DImode)
11408 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11409
11410 else if (insn_op->mode == V1SImode
11411 && GET_MODE (op[arg_count]) == SImode)
11412 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11413
11414 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11415 insn_op->mode))
11416 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11417 }
11418
11419 switch (arg_count)
11420 {
11421 case 0:
11422 pat = GEN_FCN (icode) (op[0]);
11423 break;
11424 case 1:
11425 if (nonvoid)
11426 pat = GEN_FCN (icode) (op[0], op[1]);
11427 else
11428 pat = GEN_FCN (icode) (op[1]);
11429 break;
11430 case 2:
11431 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11432 break;
11433 case 3:
11434 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11435 break;
11436 default:
11437 gcc_unreachable ();
11438 }
11439
11440 if (!pat)
11441 return NULL_RTX;
11442
11443 emit_insn (pat);
11444
11445 return (nonvoid ? op[0] : const0_rtx);
11446 }
11447
11448 /* Return the upper 16 bits of the 8x16 multiplication. */
11449
11450 static int
11451 sparc_vis_mul8x16 (int e8, int e16)
11452 {
11453 return (e8 * e16 + 128) / 256;
11454 }
11455
11456 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11457 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11458
11459 static void
11460 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11461 tree inner_type, tree cst0, tree cst1)
11462 {
11463 unsigned i, num = VECTOR_CST_NELTS (cst0);
11464 int scale;
11465
11466 switch (fncode)
11467 {
11468 case SPARC_BUILTIN_FMUL8X16:
11469 for (i = 0; i < num; ++i)
11470 {
11471 int val
11472 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11473 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11474 n_elts->quick_push (build_int_cst (inner_type, val));
11475 }
11476 break;
11477
11478 case SPARC_BUILTIN_FMUL8X16AU:
11479 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11480
11481 for (i = 0; i < num; ++i)
11482 {
11483 int val
11484 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11485 scale);
11486 n_elts->quick_push (build_int_cst (inner_type, val));
11487 }
11488 break;
11489
11490 case SPARC_BUILTIN_FMUL8X16AL:
11491 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11492
11493 for (i = 0; i < num; ++i)
11494 {
11495 int val
11496 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11497 scale);
11498 n_elts->quick_push (build_int_cst (inner_type, val));
11499 }
11500 break;
11501
11502 default:
11503 gcc_unreachable ();
11504 }
11505 }
11506
11507 /* Implement TARGET_FOLD_BUILTIN hook.
11508
11509 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11510 result of the function call is ignored. NULL_TREE is returned if the
11511 function could not be folded. */
11512
11513 static tree
11514 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11515 tree *args, bool ignore)
11516 {
11517 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11518 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11519 tree arg0, arg1, arg2;
11520
11521 if (ignore)
11522 switch (code)
11523 {
11524 case SPARC_BUILTIN_LDFSR:
11525 case SPARC_BUILTIN_STFSR:
11526 case SPARC_BUILTIN_ALIGNADDR:
11527 case SPARC_BUILTIN_WRGSR:
11528 case SPARC_BUILTIN_BMASK:
11529 case SPARC_BUILTIN_CMASK8:
11530 case SPARC_BUILTIN_CMASK16:
11531 case SPARC_BUILTIN_CMASK32:
11532 break;
11533
11534 default:
11535 return build_zero_cst (rtype);
11536 }
11537
11538 switch (code)
11539 {
11540 case SPARC_BUILTIN_FEXPAND:
11541 arg0 = args[0];
11542 STRIP_NOPS (arg0);
11543
11544 if (TREE_CODE (arg0) == VECTOR_CST)
11545 {
11546 tree inner_type = TREE_TYPE (rtype);
11547 unsigned i;
11548
11549 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11550 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11551 {
11552 unsigned HOST_WIDE_INT val
11553 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11554 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11555 }
11556 return build_vector (rtype, n_elts);
11557 }
11558 break;
11559
11560 case SPARC_BUILTIN_FMUL8X16:
11561 case SPARC_BUILTIN_FMUL8X16AU:
11562 case SPARC_BUILTIN_FMUL8X16AL:
11563 arg0 = args[0];
11564 arg1 = args[1];
11565 STRIP_NOPS (arg0);
11566 STRIP_NOPS (arg1);
11567
11568 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11569 {
11570 tree inner_type = TREE_TYPE (rtype);
11571 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11572 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11573 return build_vector (rtype, n_elts);
11574 }
11575 break;
11576
11577 case SPARC_BUILTIN_FPMERGE:
11578 arg0 = args[0];
11579 arg1 = args[1];
11580 STRIP_NOPS (arg0);
11581 STRIP_NOPS (arg1);
11582
11583 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11584 {
11585 auto_vec<tree, 32> n_elts (2 * VECTOR_CST_NELTS (arg0));
11586 unsigned i;
11587 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11588 {
11589 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11590 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11591 }
11592
11593 return build_vector (rtype, n_elts);
11594 }
11595 break;
11596
11597 case SPARC_BUILTIN_PDIST:
11598 case SPARC_BUILTIN_PDISTN:
11599 arg0 = args[0];
11600 arg1 = args[1];
11601 STRIP_NOPS (arg0);
11602 STRIP_NOPS (arg1);
11603 if (code == SPARC_BUILTIN_PDIST)
11604 {
11605 arg2 = args[2];
11606 STRIP_NOPS (arg2);
11607 }
11608 else
11609 arg2 = integer_zero_node;
11610
11611 if (TREE_CODE (arg0) == VECTOR_CST
11612 && TREE_CODE (arg1) == VECTOR_CST
11613 && TREE_CODE (arg2) == INTEGER_CST)
11614 {
11615 bool overflow = false;
11616 widest_int result = wi::to_widest (arg2);
11617 widest_int tmp;
11618 unsigned i;
11619
11620 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11621 {
11622 tree e0 = VECTOR_CST_ELT (arg0, i);
11623 tree e1 = VECTOR_CST_ELT (arg1, i);
11624
11625 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11626
11627 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11628 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11629 if (wi::neg_p (tmp))
11630 tmp = wi::neg (tmp, &neg2_ovf);
11631 else
11632 neg2_ovf = false;
11633 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11634 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11635 }
11636
11637 gcc_assert (!overflow);
11638
11639 return wide_int_to_tree (rtype, result);
11640 }
11641
11642 default:
11643 break;
11644 }
11645
11646 return NULL_TREE;
11647 }
11648 \f
11649 /* ??? This duplicates information provided to the compiler by the
11650 ??? scheduler description. Some day, teach genautomata to output
11651 ??? the latencies and then CSE will just use that. */
11652
11653 static bool
11654 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11655 int opno ATTRIBUTE_UNUSED,
11656 int *total, bool speed ATTRIBUTE_UNUSED)
11657 {
11658 int code = GET_CODE (x);
11659 bool float_mode_p = FLOAT_MODE_P (mode);
11660
11661 switch (code)
11662 {
11663 case CONST_INT:
11664 if (SMALL_INT (x))
11665 *total = 0;
11666 else
11667 *total = 2;
11668 return true;
11669
11670 case CONST_WIDE_INT:
11671 *total = 0;
11672 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11673 *total += 2;
11674 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11675 *total += 2;
11676 return true;
11677
11678 case HIGH:
11679 *total = 2;
11680 return true;
11681
11682 case CONST:
11683 case LABEL_REF:
11684 case SYMBOL_REF:
11685 *total = 4;
11686 return true;
11687
11688 case CONST_DOUBLE:
11689 *total = 8;
11690 return true;
11691
11692 case MEM:
11693 /* If outer-code was a sign or zero extension, a cost
11694 of COSTS_N_INSNS (1) was already added in. This is
11695 why we are subtracting it back out. */
11696 if (outer_code == ZERO_EXTEND)
11697 {
11698 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11699 }
11700 else if (outer_code == SIGN_EXTEND)
11701 {
11702 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11703 }
11704 else if (float_mode_p)
11705 {
11706 *total = sparc_costs->float_load;
11707 }
11708 else
11709 {
11710 *total = sparc_costs->int_load;
11711 }
11712
11713 return true;
11714
11715 case PLUS:
11716 case MINUS:
11717 if (float_mode_p)
11718 *total = sparc_costs->float_plusminus;
11719 else
11720 *total = COSTS_N_INSNS (1);
11721 return false;
11722
11723 case FMA:
11724 {
11725 rtx sub;
11726
11727 gcc_assert (float_mode_p);
11728 *total = sparc_costs->float_mul;
11729
11730 sub = XEXP (x, 0);
11731 if (GET_CODE (sub) == NEG)
11732 sub = XEXP (sub, 0);
11733 *total += rtx_cost (sub, mode, FMA, 0, speed);
11734
11735 sub = XEXP (x, 2);
11736 if (GET_CODE (sub) == NEG)
11737 sub = XEXP (sub, 0);
11738 *total += rtx_cost (sub, mode, FMA, 2, speed);
11739 return true;
11740 }
11741
11742 case MULT:
11743 if (float_mode_p)
11744 *total = sparc_costs->float_mul;
11745 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11746 *total = COSTS_N_INSNS (25);
11747 else
11748 {
11749 int bit_cost;
11750
11751 bit_cost = 0;
11752 if (sparc_costs->int_mul_bit_factor)
11753 {
11754 int nbits;
11755
11756 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11757 {
11758 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11759 for (nbits = 0; value != 0; value &= value - 1)
11760 nbits++;
11761 }
11762 else
11763 nbits = 7;
11764
11765 if (nbits < 3)
11766 nbits = 3;
11767 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11768 bit_cost = COSTS_N_INSNS (bit_cost);
11769 }
11770
11771 if (mode == DImode || !TARGET_HARD_MUL)
11772 *total = sparc_costs->int_mulX + bit_cost;
11773 else
11774 *total = sparc_costs->int_mul + bit_cost;
11775 }
11776 return false;
11777
11778 case ASHIFT:
11779 case ASHIFTRT:
11780 case LSHIFTRT:
11781 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11782 return false;
11783
11784 case DIV:
11785 case UDIV:
11786 case MOD:
11787 case UMOD:
11788 if (float_mode_p)
11789 {
11790 if (mode == DFmode)
11791 *total = sparc_costs->float_div_df;
11792 else
11793 *total = sparc_costs->float_div_sf;
11794 }
11795 else
11796 {
11797 if (mode == DImode)
11798 *total = sparc_costs->int_divX;
11799 else
11800 *total = sparc_costs->int_div;
11801 }
11802 return false;
11803
11804 case NEG:
11805 if (! float_mode_p)
11806 {
11807 *total = COSTS_N_INSNS (1);
11808 return false;
11809 }
11810 /* FALLTHRU */
11811
11812 case ABS:
11813 case FLOAT:
11814 case UNSIGNED_FLOAT:
11815 case FIX:
11816 case UNSIGNED_FIX:
11817 case FLOAT_EXTEND:
11818 case FLOAT_TRUNCATE:
11819 *total = sparc_costs->float_move;
11820 return false;
11821
11822 case SQRT:
11823 if (mode == DFmode)
11824 *total = sparc_costs->float_sqrt_df;
11825 else
11826 *total = sparc_costs->float_sqrt_sf;
11827 return false;
11828
11829 case COMPARE:
11830 if (float_mode_p)
11831 *total = sparc_costs->float_cmp;
11832 else
11833 *total = COSTS_N_INSNS (1);
11834 return false;
11835
11836 case IF_THEN_ELSE:
11837 if (float_mode_p)
11838 *total = sparc_costs->float_cmove;
11839 else
11840 *total = sparc_costs->int_cmove;
11841 return false;
11842
11843 case IOR:
11844 /* Handle the NAND vector patterns. */
11845 if (sparc_vector_mode_supported_p (mode)
11846 && GET_CODE (XEXP (x, 0)) == NOT
11847 && GET_CODE (XEXP (x, 1)) == NOT)
11848 {
11849 *total = COSTS_N_INSNS (1);
11850 return true;
11851 }
11852 else
11853 return false;
11854
11855 default:
11856 return false;
11857 }
11858 }
11859
11860 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11861
11862 static inline bool
11863 general_or_i64_p (reg_class_t rclass)
11864 {
11865 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11866 }
11867
11868 /* Implement TARGET_REGISTER_MOVE_COST. */
11869
11870 static int
11871 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11872 reg_class_t from, reg_class_t to)
11873 {
11874 bool need_memory = false;
11875
11876 /* This helps postreload CSE to eliminate redundant comparisons. */
11877 if (from == NO_REGS || to == NO_REGS)
11878 return 100;
11879
11880 if (from == FPCC_REGS || to == FPCC_REGS)
11881 need_memory = true;
11882 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11883 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11884 {
11885 if (TARGET_VIS3)
11886 {
11887 int size = GET_MODE_SIZE (mode);
11888 if (size == 8 || size == 4)
11889 {
11890 if (! TARGET_ARCH32 || size == 4)
11891 return 4;
11892 else
11893 return 6;
11894 }
11895 }
11896 need_memory = true;
11897 }
11898
11899 if (need_memory)
11900 {
11901 if (sparc_cpu == PROCESSOR_ULTRASPARC
11902 || sparc_cpu == PROCESSOR_ULTRASPARC3
11903 || sparc_cpu == PROCESSOR_NIAGARA
11904 || sparc_cpu == PROCESSOR_NIAGARA2
11905 || sparc_cpu == PROCESSOR_NIAGARA3
11906 || sparc_cpu == PROCESSOR_NIAGARA4
11907 || sparc_cpu == PROCESSOR_NIAGARA7
11908 || sparc_cpu == PROCESSOR_M8)
11909 return 12;
11910
11911 return 6;
11912 }
11913
11914 return 2;
11915 }
11916
11917 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11918 This is achieved by means of a manual dynamic stack space allocation in
11919 the current frame. We make the assumption that SEQ doesn't contain any
11920 function calls, with the possible exception of calls to the GOT helper. */
11921
11922 static void
11923 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11924 {
11925 /* We must preserve the lowest 16 words for the register save area. */
11926 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11927 /* We really need only 2 words of fresh stack space. */
11928 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11929
11930 rtx slot
11931 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11932 SPARC_STACK_BIAS + offset));
11933
11934 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11935 emit_insn (gen_rtx_SET (slot, reg));
11936 if (reg2)
11937 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11938 reg2));
11939 emit_insn (seq);
11940 if (reg2)
11941 emit_insn (gen_rtx_SET (reg2,
11942 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11943 emit_insn (gen_rtx_SET (reg, slot));
11944 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11945 }
11946
11947 /* Output the assembler code for a thunk function. THUNK_DECL is the
11948 declaration for the thunk function itself, FUNCTION is the decl for
11949 the target function. DELTA is an immediate constant offset to be
11950 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11951 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11952
11953 static void
11954 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11955 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11956 tree function)
11957 {
11958 rtx this_rtx, funexp;
11959 rtx_insn *insn;
11960 unsigned int int_arg_first;
11961
11962 reload_completed = 1;
11963 epilogue_completed = 1;
11964
11965 emit_note (NOTE_INSN_PROLOGUE_END);
11966
11967 if (TARGET_FLAT)
11968 {
11969 sparc_leaf_function_p = 1;
11970
11971 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11972 }
11973 else if (flag_delayed_branch)
11974 {
11975 /* We will emit a regular sibcall below, so we need to instruct
11976 output_sibcall that we are in a leaf function. */
11977 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11978
11979 /* This will cause final.c to invoke leaf_renumber_regs so we
11980 must behave as if we were in a not-yet-leafified function. */
11981 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11982 }
11983 else
11984 {
11985 /* We will emit the sibcall manually below, so we will need to
11986 manually spill non-leaf registers. */
11987 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11988
11989 /* We really are in a leaf function. */
11990 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11991 }
11992
11993 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11994 returns a structure, the structure return pointer is there instead. */
11995 if (TARGET_ARCH64
11996 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11997 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11998 else
11999 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12000
12001 /* Add DELTA. When possible use a plain add, otherwise load it into
12002 a register first. */
12003 if (delta)
12004 {
12005 rtx delta_rtx = GEN_INT (delta);
12006
12007 if (! SPARC_SIMM13_P (delta))
12008 {
12009 rtx scratch = gen_rtx_REG (Pmode, 1);
12010 emit_move_insn (scratch, delta_rtx);
12011 delta_rtx = scratch;
12012 }
12013
12014 /* THIS_RTX += DELTA. */
12015 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12016 }
12017
12018 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12019 if (vcall_offset)
12020 {
12021 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12022 rtx scratch = gen_rtx_REG (Pmode, 1);
12023
12024 gcc_assert (vcall_offset < 0);
12025
12026 /* SCRATCH = *THIS_RTX. */
12027 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12028
12029 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12030 may not have any available scratch register at this point. */
12031 if (SPARC_SIMM13_P (vcall_offset))
12032 ;
12033 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12034 else if (! fixed_regs[5]
12035 /* The below sequence is made up of at least 2 insns,
12036 while the default method may need only one. */
12037 && vcall_offset < -8192)
12038 {
12039 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12040 emit_move_insn (scratch2, vcall_offset_rtx);
12041 vcall_offset_rtx = scratch2;
12042 }
12043 else
12044 {
12045 rtx increment = GEN_INT (-4096);
12046
12047 /* VCALL_OFFSET is a negative number whose typical range can be
12048 estimated as -32768..0 in 32-bit mode. In almost all cases
12049 it is therefore cheaper to emit multiple add insns than
12050 spilling and loading the constant into a register (at least
12051 6 insns). */
12052 while (! SPARC_SIMM13_P (vcall_offset))
12053 {
12054 emit_insn (gen_add2_insn (scratch, increment));
12055 vcall_offset += 4096;
12056 }
12057 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12058 }
12059
12060 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12061 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12062 gen_rtx_PLUS (Pmode,
12063 scratch,
12064 vcall_offset_rtx)));
12065
12066 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12067 emit_insn (gen_add2_insn (this_rtx, scratch));
12068 }
12069
12070 /* Generate a tail call to the target function. */
12071 if (! TREE_USED (function))
12072 {
12073 assemble_external (function);
12074 TREE_USED (function) = 1;
12075 }
12076 funexp = XEXP (DECL_RTL (function), 0);
12077
12078 if (flag_delayed_branch)
12079 {
12080 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12081 insn = emit_call_insn (gen_sibcall (funexp));
12082 SIBLING_CALL_P (insn) = 1;
12083 }
12084 else
12085 {
12086 /* The hoops we have to jump through in order to generate a sibcall
12087 without using delay slots... */
12088 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12089
12090 if (flag_pic)
12091 {
12092 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12093 start_sequence ();
12094 load_got_register (); /* clobbers %o7 */
12095 scratch = sparc_legitimize_pic_address (funexp, scratch);
12096 seq = get_insns ();
12097 end_sequence ();
12098 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12099 }
12100 else if (TARGET_ARCH32)
12101 {
12102 emit_insn (gen_rtx_SET (scratch,
12103 gen_rtx_HIGH (SImode, funexp)));
12104 emit_insn (gen_rtx_SET (scratch,
12105 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12106 }
12107 else /* TARGET_ARCH64 */
12108 {
12109 switch (sparc_cmodel)
12110 {
12111 case CM_MEDLOW:
12112 case CM_MEDMID:
12113 /* The destination can serve as a temporary. */
12114 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12115 break;
12116
12117 case CM_MEDANY:
12118 case CM_EMBMEDANY:
12119 /* The destination cannot serve as a temporary. */
12120 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12121 start_sequence ();
12122 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12123 seq = get_insns ();
12124 end_sequence ();
12125 emit_and_preserve (seq, spill_reg, 0);
12126 break;
12127
12128 default:
12129 gcc_unreachable ();
12130 }
12131 }
12132
12133 emit_jump_insn (gen_indirect_jump (scratch));
12134 }
12135
12136 emit_barrier ();
12137
12138 /* Run just enough of rest_of_compilation to get the insns emitted.
12139 There's not really enough bulk here to make other passes such as
12140 instruction scheduling worth while. Note that use_thunk calls
12141 assemble_start_function and assemble_end_function. */
12142 insn = get_insns ();
12143 shorten_branches (insn);
12144 final_start_function (insn, file, 1);
12145 final (insn, file, 1);
12146 final_end_function ();
12147
12148 reload_completed = 0;
12149 epilogue_completed = 0;
12150 }
12151
12152 /* Return true if sparc_output_mi_thunk would be able to output the
12153 assembler code for the thunk function specified by the arguments
12154 it is passed, and false otherwise. */
12155 static bool
12156 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12157 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12158 HOST_WIDE_INT vcall_offset,
12159 const_tree function ATTRIBUTE_UNUSED)
12160 {
12161 /* Bound the loop used in the default method above. */
12162 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12163 }
12164
12165 /* How to allocate a 'struct machine_function'. */
12166
12167 static struct machine_function *
12168 sparc_init_machine_status (void)
12169 {
12170 return ggc_cleared_alloc<machine_function> ();
12171 }
12172
12173 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12174 We need to emit DTP-relative relocations. */
12175
12176 static void
12177 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12178 {
12179 switch (size)
12180 {
12181 case 4:
12182 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12183 break;
12184 case 8:
12185 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12186 break;
12187 default:
12188 gcc_unreachable ();
12189 }
12190 output_addr_const (file, x);
12191 fputs (")", file);
12192 }
12193
12194 /* Do whatever processing is required at the end of a file. */
12195
12196 static void
12197 sparc_file_end (void)
12198 {
12199 /* If we need to emit the special GOT helper function, do so now. */
12200 if (got_helper_rtx)
12201 {
12202 const char *name = XSTR (got_helper_rtx, 0);
12203 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12204 #ifdef DWARF2_UNWIND_INFO
12205 bool do_cfi;
12206 #endif
12207
12208 if (USE_HIDDEN_LINKONCE)
12209 {
12210 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12211 get_identifier (name),
12212 build_function_type_list (void_type_node,
12213 NULL_TREE));
12214 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12215 NULL_TREE, void_type_node);
12216 TREE_PUBLIC (decl) = 1;
12217 TREE_STATIC (decl) = 1;
12218 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12219 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12220 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12221 resolve_unique_section (decl, 0, flag_function_sections);
12222 allocate_struct_function (decl, true);
12223 cfun->is_thunk = 1;
12224 current_function_decl = decl;
12225 init_varasm_status ();
12226 assemble_start_function (decl, name);
12227 }
12228 else
12229 {
12230 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12231 switch_to_section (text_section);
12232 if (align > 0)
12233 ASM_OUTPUT_ALIGN (asm_out_file, align);
12234 ASM_OUTPUT_LABEL (asm_out_file, name);
12235 }
12236
12237 #ifdef DWARF2_UNWIND_INFO
12238 do_cfi = dwarf2out_do_cfi_asm ();
12239 if (do_cfi)
12240 fprintf (asm_out_file, "\t.cfi_startproc\n");
12241 #endif
12242 if (flag_delayed_branch)
12243 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12244 reg_name, reg_name);
12245 else
12246 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12247 reg_name, reg_name);
12248 #ifdef DWARF2_UNWIND_INFO
12249 if (do_cfi)
12250 fprintf (asm_out_file, "\t.cfi_endproc\n");
12251 #endif
12252 }
12253
12254 if (NEED_INDICATE_EXEC_STACK)
12255 file_end_indicate_exec_stack ();
12256
12257 #ifdef TARGET_SOLARIS
12258 solaris_file_end ();
12259 #endif
12260 }
12261
12262 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12263 /* Implement TARGET_MANGLE_TYPE. */
12264
12265 static const char *
12266 sparc_mangle_type (const_tree type)
12267 {
12268 if (TARGET_ARCH32
12269 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12270 && TARGET_LONG_DOUBLE_128)
12271 return "g";
12272
12273 /* For all other types, use normal C++ mangling. */
12274 return NULL;
12275 }
12276 #endif
12277
12278 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12279 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12280 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12281
12282 void
12283 sparc_emit_membar_for_model (enum memmodel model,
12284 int load_store, int before_after)
12285 {
12286 /* Bits for the MEMBAR mmask field. */
12287 const int LoadLoad = 1;
12288 const int StoreLoad = 2;
12289 const int LoadStore = 4;
12290 const int StoreStore = 8;
12291
12292 int mm = 0, implied = 0;
12293
12294 switch (sparc_memory_model)
12295 {
12296 case SMM_SC:
12297 /* Sequential Consistency. All memory transactions are immediately
12298 visible in sequential execution order. No barriers needed. */
12299 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12300 break;
12301
12302 case SMM_TSO:
12303 /* Total Store Ordering: all memory transactions with store semantics
12304 are followed by an implied StoreStore. */
12305 implied |= StoreStore;
12306
12307 /* If we're not looking for a raw barrer (before+after), then atomic
12308 operations get the benefit of being both load and store. */
12309 if (load_store == 3 && before_after == 1)
12310 implied |= StoreLoad;
12311 /* FALLTHRU */
12312
12313 case SMM_PSO:
12314 /* Partial Store Ordering: all memory transactions with load semantics
12315 are followed by an implied LoadLoad | LoadStore. */
12316 implied |= LoadLoad | LoadStore;
12317
12318 /* If we're not looking for a raw barrer (before+after), then atomic
12319 operations get the benefit of being both load and store. */
12320 if (load_store == 3 && before_after == 2)
12321 implied |= StoreLoad | StoreStore;
12322 /* FALLTHRU */
12323
12324 case SMM_RMO:
12325 /* Relaxed Memory Ordering: no implicit bits. */
12326 break;
12327
12328 default:
12329 gcc_unreachable ();
12330 }
12331
12332 if (before_after & 1)
12333 {
12334 if (is_mm_release (model) || is_mm_acq_rel (model)
12335 || is_mm_seq_cst (model))
12336 {
12337 if (load_store & 1)
12338 mm |= LoadLoad | StoreLoad;
12339 if (load_store & 2)
12340 mm |= LoadStore | StoreStore;
12341 }
12342 }
12343 if (before_after & 2)
12344 {
12345 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12346 || is_mm_seq_cst (model))
12347 {
12348 if (load_store & 1)
12349 mm |= LoadLoad | LoadStore;
12350 if (load_store & 2)
12351 mm |= StoreLoad | StoreStore;
12352 }
12353 }
12354
12355 /* Remove the bits implied by the system memory model. */
12356 mm &= ~implied;
12357
12358 /* For raw barriers (before+after), always emit a barrier.
12359 This will become a compile-time barrier if needed. */
12360 if (mm || before_after == 3)
12361 emit_insn (gen_membar (GEN_INT (mm)));
12362 }
12363
12364 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12365 compare and swap on the word containing the byte or half-word. */
12366
12367 static void
12368 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12369 rtx oldval, rtx newval)
12370 {
12371 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12372 rtx addr = gen_reg_rtx (Pmode);
12373 rtx off = gen_reg_rtx (SImode);
12374 rtx oldv = gen_reg_rtx (SImode);
12375 rtx newv = gen_reg_rtx (SImode);
12376 rtx oldvalue = gen_reg_rtx (SImode);
12377 rtx newvalue = gen_reg_rtx (SImode);
12378 rtx res = gen_reg_rtx (SImode);
12379 rtx resv = gen_reg_rtx (SImode);
12380 rtx memsi, val, mask, cc;
12381
12382 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12383
12384 if (Pmode != SImode)
12385 addr1 = gen_lowpart (SImode, addr1);
12386 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12387
12388 memsi = gen_rtx_MEM (SImode, addr);
12389 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12390 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12391
12392 val = copy_to_reg (memsi);
12393
12394 emit_insn (gen_rtx_SET (off,
12395 gen_rtx_XOR (SImode, off,
12396 GEN_INT (GET_MODE (mem) == QImode
12397 ? 3 : 2))));
12398
12399 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12400
12401 if (GET_MODE (mem) == QImode)
12402 mask = force_reg (SImode, GEN_INT (0xff));
12403 else
12404 mask = force_reg (SImode, GEN_INT (0xffff));
12405
12406 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12407
12408 emit_insn (gen_rtx_SET (val,
12409 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12410 val)));
12411
12412 oldval = gen_lowpart (SImode, oldval);
12413 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12414
12415 newval = gen_lowpart_common (SImode, newval);
12416 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12417
12418 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12419
12420 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12421
12422 rtx_code_label *end_label = gen_label_rtx ();
12423 rtx_code_label *loop_label = gen_label_rtx ();
12424 emit_label (loop_label);
12425
12426 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12427
12428 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12429
12430 emit_move_insn (bool_result, const1_rtx);
12431
12432 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12433
12434 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12435
12436 emit_insn (gen_rtx_SET (resv,
12437 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12438 res)));
12439
12440 emit_move_insn (bool_result, const0_rtx);
12441
12442 cc = gen_compare_reg_1 (NE, resv, val);
12443 emit_insn (gen_rtx_SET (val, resv));
12444
12445 /* Use cbranchcc4 to separate the compare and branch! */
12446 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12447 cc, const0_rtx, loop_label));
12448
12449 emit_label (end_label);
12450
12451 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12452
12453 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12454
12455 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12456 }
12457
12458 /* Expand code to perform a compare-and-swap. */
12459
12460 void
12461 sparc_expand_compare_and_swap (rtx operands[])
12462 {
12463 rtx bval, retval, mem, oldval, newval;
12464 machine_mode mode;
12465 enum memmodel model;
12466
12467 bval = operands[0];
12468 retval = operands[1];
12469 mem = operands[2];
12470 oldval = operands[3];
12471 newval = operands[4];
12472 model = (enum memmodel) INTVAL (operands[6]);
12473 mode = GET_MODE (mem);
12474
12475 sparc_emit_membar_for_model (model, 3, 1);
12476
12477 if (reg_overlap_mentioned_p (retval, oldval))
12478 oldval = copy_to_reg (oldval);
12479
12480 if (mode == QImode || mode == HImode)
12481 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12482 else
12483 {
12484 rtx (*gen) (rtx, rtx, rtx, rtx);
12485 rtx x;
12486
12487 if (mode == SImode)
12488 gen = gen_atomic_compare_and_swapsi_1;
12489 else
12490 gen = gen_atomic_compare_and_swapdi_1;
12491 emit_insn (gen (retval, mem, oldval, newval));
12492
12493 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12494 if (x != bval)
12495 convert_move (bval, x, 1);
12496 }
12497
12498 sparc_emit_membar_for_model (model, 3, 2);
12499 }
12500
12501 void
12502 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12503 {
12504 rtx t_1, t_2, t_3;
12505
12506 sel = gen_lowpart (DImode, sel);
12507 switch (vmode)
12508 {
12509 case E_V2SImode:
12510 /* inp = xxxxxxxAxxxxxxxB */
12511 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12512 NULL_RTX, 1, OPTAB_DIRECT);
12513 /* t_1 = ....xxxxxxxAxxx. */
12514 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12515 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12516 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12517 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12518 /* sel = .......B */
12519 /* t_1 = ...A.... */
12520 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12521 /* sel = ...A...B */
12522 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12523 /* sel = AAAABBBB * 4 */
12524 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12525 /* sel = { A*4, A*4+1, A*4+2, ... } */
12526 break;
12527
12528 case E_V4HImode:
12529 /* inp = xxxAxxxBxxxCxxxD */
12530 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12531 NULL_RTX, 1, OPTAB_DIRECT);
12532 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12533 NULL_RTX, 1, OPTAB_DIRECT);
12534 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12535 NULL_RTX, 1, OPTAB_DIRECT);
12536 /* t_1 = ..xxxAxxxBxxxCxx */
12537 /* t_2 = ....xxxAxxxBxxxC */
12538 /* t_3 = ......xxxAxxxBxx */
12539 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12540 GEN_INT (0x07),
12541 NULL_RTX, 1, OPTAB_DIRECT);
12542 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12543 GEN_INT (0x0700),
12544 NULL_RTX, 1, OPTAB_DIRECT);
12545 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12546 GEN_INT (0x070000),
12547 NULL_RTX, 1, OPTAB_DIRECT);
12548 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12549 GEN_INT (0x07000000),
12550 NULL_RTX, 1, OPTAB_DIRECT);
12551 /* sel = .......D */
12552 /* t_1 = .....C.. */
12553 /* t_2 = ...B.... */
12554 /* t_3 = .A...... */
12555 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12556 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12557 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12558 /* sel = .A.B.C.D */
12559 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12560 /* sel = AABBCCDD * 2 */
12561 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12562 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12563 break;
12564
12565 case E_V8QImode:
12566 /* input = xAxBxCxDxExFxGxH */
12567 sel = expand_simple_binop (DImode, AND, sel,
12568 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12569 | 0x0f0f0f0f),
12570 NULL_RTX, 1, OPTAB_DIRECT);
12571 /* sel = .A.B.C.D.E.F.G.H */
12572 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12573 NULL_RTX, 1, OPTAB_DIRECT);
12574 /* t_1 = ..A.B.C.D.E.F.G. */
12575 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12576 NULL_RTX, 1, OPTAB_DIRECT);
12577 /* sel = .AABBCCDDEEFFGGH */
12578 sel = expand_simple_binop (DImode, AND, sel,
12579 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12580 | 0xff00ff),
12581 NULL_RTX, 1, OPTAB_DIRECT);
12582 /* sel = ..AB..CD..EF..GH */
12583 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12584 NULL_RTX, 1, OPTAB_DIRECT);
12585 /* t_1 = ....AB..CD..EF.. */
12586 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12587 NULL_RTX, 1, OPTAB_DIRECT);
12588 /* sel = ..ABABCDCDEFEFGH */
12589 sel = expand_simple_binop (DImode, AND, sel,
12590 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12591 NULL_RTX, 1, OPTAB_DIRECT);
12592 /* sel = ....ABCD....EFGH */
12593 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12594 NULL_RTX, 1, OPTAB_DIRECT);
12595 /* t_1 = ........ABCD.... */
12596 sel = gen_lowpart (SImode, sel);
12597 t_1 = gen_lowpart (SImode, t_1);
12598 break;
12599
12600 default:
12601 gcc_unreachable ();
12602 }
12603
12604 /* Always perform the final addition/merge within the bmask insn. */
12605 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12606 }
12607
12608 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12609
12610 static bool
12611 sparc_frame_pointer_required (void)
12612 {
12613 /* If the stack pointer is dynamically modified in the function, it cannot
12614 serve as the frame pointer. */
12615 if (cfun->calls_alloca)
12616 return true;
12617
12618 /* If the function receives nonlocal gotos, it needs to save the frame
12619 pointer in the nonlocal_goto_save_area object. */
12620 if (cfun->has_nonlocal_label)
12621 return true;
12622
12623 /* In flat mode, that's it. */
12624 if (TARGET_FLAT)
12625 return false;
12626
12627 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12628 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12629 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12630 }
12631
12632 /* The way this is structured, we can't eliminate SFP in favor of SP
12633 if the frame pointer is required: we want to use the SFP->HFP elimination
12634 in that case. But the test in update_eliminables doesn't know we are
12635 assuming below that we only do the former elimination. */
12636
12637 static bool
12638 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12639 {
12640 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12641 }
12642
12643 /* Return the hard frame pointer directly to bypass the stack bias. */
12644
12645 static rtx
12646 sparc_builtin_setjmp_frame_value (void)
12647 {
12648 return hard_frame_pointer_rtx;
12649 }
12650
12651 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12652 they won't be allocated. */
12653
12654 static void
12655 sparc_conditional_register_usage (void)
12656 {
12657 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12658 {
12659 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12660 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12661 }
12662 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12663 /* then honor it. */
12664 if (TARGET_ARCH32 && fixed_regs[5])
12665 fixed_regs[5] = 1;
12666 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12667 fixed_regs[5] = 0;
12668 if (! TARGET_V9)
12669 {
12670 int regno;
12671 for (regno = SPARC_FIRST_V9_FP_REG;
12672 regno <= SPARC_LAST_V9_FP_REG;
12673 regno++)
12674 fixed_regs[regno] = 1;
12675 /* %fcc0 is used by v8 and v9. */
12676 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12677 regno <= SPARC_LAST_V9_FCC_REG;
12678 regno++)
12679 fixed_regs[regno] = 1;
12680 }
12681 if (! TARGET_FPU)
12682 {
12683 int regno;
12684 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12685 fixed_regs[regno] = 1;
12686 }
12687 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12688 /* then honor it. Likewise with g3 and g4. */
12689 if (fixed_regs[2] == 2)
12690 fixed_regs[2] = ! TARGET_APP_REGS;
12691 if (fixed_regs[3] == 2)
12692 fixed_regs[3] = ! TARGET_APP_REGS;
12693 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12694 fixed_regs[4] = ! TARGET_APP_REGS;
12695 else if (TARGET_CM_EMBMEDANY)
12696 fixed_regs[4] = 1;
12697 else if (fixed_regs[4] == 2)
12698 fixed_regs[4] = 0;
12699 if (TARGET_FLAT)
12700 {
12701 int regno;
12702 /* Disable leaf functions. */
12703 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12704 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12705 leaf_reg_remap [regno] = regno;
12706 }
12707 if (TARGET_VIS)
12708 global_regs[SPARC_GSR_REG] = 1;
12709 }
12710
12711 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12712
12713 - We can't load constants into FP registers.
12714 - We can't load FP constants into integer registers when soft-float,
12715 because there is no soft-float pattern with a r/F constraint.
12716 - We can't load FP constants into integer registers for TFmode unless
12717 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12718 - Try and reload integer constants (symbolic or otherwise) back into
12719 registers directly, rather than having them dumped to memory. */
12720
12721 static reg_class_t
12722 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12723 {
12724 machine_mode mode = GET_MODE (x);
12725 if (CONSTANT_P (x))
12726 {
12727 if (FP_REG_CLASS_P (rclass)
12728 || rclass == GENERAL_OR_FP_REGS
12729 || rclass == GENERAL_OR_EXTRA_FP_REGS
12730 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12731 || (mode == TFmode && ! const_zero_operand (x, mode)))
12732 return NO_REGS;
12733
12734 if (GET_MODE_CLASS (mode) == MODE_INT)
12735 return GENERAL_REGS;
12736
12737 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12738 {
12739 if (! FP_REG_CLASS_P (rclass)
12740 || !(const_zero_operand (x, mode)
12741 || const_all_ones_operand (x, mode)))
12742 return NO_REGS;
12743 }
12744 }
12745
12746 if (TARGET_VIS3
12747 && ! TARGET_ARCH64
12748 && (rclass == EXTRA_FP_REGS
12749 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12750 {
12751 int regno = true_regnum (x);
12752
12753 if (SPARC_INT_REG_P (regno))
12754 return (rclass == EXTRA_FP_REGS
12755 ? FP_REGS : GENERAL_OR_FP_REGS);
12756 }
12757
12758 return rclass;
12759 }
12760
12761 /* Return true if we use LRA instead of reload pass. */
12762
12763 static bool
12764 sparc_lra_p (void)
12765 {
12766 return TARGET_LRA;
12767 }
12768
12769 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12770 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12771
12772 const char *
12773 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12774 {
12775 char mulstr[32];
12776
12777 gcc_assert (! TARGET_ARCH64);
12778
12779 if (sparc_check_64 (operands[1], insn) <= 0)
12780 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12781 if (which_alternative == 1)
12782 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12783 if (GET_CODE (operands[2]) == CONST_INT)
12784 {
12785 if (which_alternative == 1)
12786 {
12787 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12788 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12789 output_asm_insn (mulstr, operands);
12790 return "srlx\t%L0, 32, %H0";
12791 }
12792 else
12793 {
12794 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12795 output_asm_insn ("or\t%L1, %3, %3", operands);
12796 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12797 output_asm_insn (mulstr, operands);
12798 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12799 return "mov\t%3, %L0";
12800 }
12801 }
12802 else if (rtx_equal_p (operands[1], operands[2]))
12803 {
12804 if (which_alternative == 1)
12805 {
12806 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12807 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12808 output_asm_insn (mulstr, operands);
12809 return "srlx\t%L0, 32, %H0";
12810 }
12811 else
12812 {
12813 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12814 output_asm_insn ("or\t%L1, %3, %3", operands);
12815 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12816 output_asm_insn (mulstr, operands);
12817 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12818 return "mov\t%3, %L0";
12819 }
12820 }
12821 if (sparc_check_64 (operands[2], insn) <= 0)
12822 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12823 if (which_alternative == 1)
12824 {
12825 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12826 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12827 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12828 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12829 output_asm_insn (mulstr, operands);
12830 return "srlx\t%L0, 32, %H0";
12831 }
12832 else
12833 {
12834 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12835 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12836 output_asm_insn ("or\t%L1, %3, %3", operands);
12837 output_asm_insn ("or\t%L2, %4, %4", operands);
12838 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12839 output_asm_insn (mulstr, operands);
12840 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12841 return "mov\t%3, %L0";
12842 }
12843 }
12844
12845 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12846 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12847 and INNER_MODE are the modes describing TARGET. */
12848
12849 static void
12850 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12851 machine_mode inner_mode)
12852 {
12853 rtx t1, final_insn, sel;
12854 int bmask;
12855
12856 t1 = gen_reg_rtx (mode);
12857
12858 elt = convert_modes (SImode, inner_mode, elt, true);
12859 emit_move_insn (gen_lowpart(SImode, t1), elt);
12860
12861 switch (mode)
12862 {
12863 case E_V2SImode:
12864 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12865 bmask = 0x45674567;
12866 break;
12867 case E_V4HImode:
12868 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12869 bmask = 0x67676767;
12870 break;
12871 case E_V8QImode:
12872 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12873 bmask = 0x77777777;
12874 break;
12875 default:
12876 gcc_unreachable ();
12877 }
12878
12879 sel = force_reg (SImode, GEN_INT (bmask));
12880 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12881 emit_insn (final_insn);
12882 }
12883
12884 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12885 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12886
12887 static void
12888 vector_init_fpmerge (rtx target, rtx elt)
12889 {
12890 rtx t1, t2, t2_low, t3, t3_low;
12891
12892 t1 = gen_reg_rtx (V4QImode);
12893 elt = convert_modes (SImode, QImode, elt, true);
12894 emit_move_insn (gen_lowpart (SImode, t1), elt);
12895
12896 t2 = gen_reg_rtx (V8QImode);
12897 t2_low = gen_lowpart (V4QImode, t2);
12898 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12899
12900 t3 = gen_reg_rtx (V8QImode);
12901 t3_low = gen_lowpart (V4QImode, t3);
12902 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12903
12904 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12905 }
12906
12907 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12908 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12909
12910 static void
12911 vector_init_faligndata (rtx target, rtx elt)
12912 {
12913 rtx t1 = gen_reg_rtx (V4HImode);
12914 int i;
12915
12916 elt = convert_modes (SImode, HImode, elt, true);
12917 emit_move_insn (gen_lowpart (SImode, t1), elt);
12918
12919 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12920 force_reg (SImode, GEN_INT (6)),
12921 const0_rtx));
12922
12923 for (i = 0; i < 4; i++)
12924 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12925 }
12926
12927 /* Emit code to initialize TARGET to values for individual fields VALS. */
12928
12929 void
12930 sparc_expand_vector_init (rtx target, rtx vals)
12931 {
12932 const machine_mode mode = GET_MODE (target);
12933 const machine_mode inner_mode = GET_MODE_INNER (mode);
12934 const int n_elts = GET_MODE_NUNITS (mode);
12935 int i, n_var = 0;
12936 bool all_same = true;
12937 rtx mem;
12938
12939 for (i = 0; i < n_elts; i++)
12940 {
12941 rtx x = XVECEXP (vals, 0, i);
12942 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12943 n_var++;
12944
12945 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12946 all_same = false;
12947 }
12948
12949 if (n_var == 0)
12950 {
12951 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12952 return;
12953 }
12954
12955 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12956 {
12957 if (GET_MODE_SIZE (inner_mode) == 4)
12958 {
12959 emit_move_insn (gen_lowpart (SImode, target),
12960 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12961 return;
12962 }
12963 else if (GET_MODE_SIZE (inner_mode) == 8)
12964 {
12965 emit_move_insn (gen_lowpart (DImode, target),
12966 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12967 return;
12968 }
12969 }
12970 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12971 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12972 {
12973 emit_move_insn (gen_highpart (word_mode, target),
12974 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12975 emit_move_insn (gen_lowpart (word_mode, target),
12976 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12977 return;
12978 }
12979
12980 if (all_same && GET_MODE_SIZE (mode) == 8)
12981 {
12982 if (TARGET_VIS2)
12983 {
12984 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12985 return;
12986 }
12987 if (mode == V8QImode)
12988 {
12989 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12990 return;
12991 }
12992 if (mode == V4HImode)
12993 {
12994 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12995 return;
12996 }
12997 }
12998
12999 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13000 for (i = 0; i < n_elts; i++)
13001 emit_move_insn (adjust_address_nv (mem, inner_mode,
13002 i * GET_MODE_SIZE (inner_mode)),
13003 XVECEXP (vals, 0, i));
13004 emit_move_insn (target, mem);
13005 }
13006
13007 /* Implement TARGET_SECONDARY_RELOAD. */
13008
13009 static reg_class_t
13010 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13011 machine_mode mode, secondary_reload_info *sri)
13012 {
13013 enum reg_class rclass = (enum reg_class) rclass_i;
13014
13015 sri->icode = CODE_FOR_nothing;
13016 sri->extra_cost = 0;
13017
13018 /* We need a temporary when loading/storing a HImode/QImode value
13019 between memory and the FPU registers. This can happen when combine puts
13020 a paradoxical subreg in a float/fix conversion insn. */
13021 if (FP_REG_CLASS_P (rclass)
13022 && (mode == HImode || mode == QImode)
13023 && (GET_CODE (x) == MEM
13024 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13025 && true_regnum (x) == -1)))
13026 return GENERAL_REGS;
13027
13028 /* On 32-bit we need a temporary when loading/storing a DFmode value
13029 between unaligned memory and the upper FPU registers. */
13030 if (TARGET_ARCH32
13031 && rclass == EXTRA_FP_REGS
13032 && mode == DFmode
13033 && GET_CODE (x) == MEM
13034 && ! mem_min_alignment (x, 8))
13035 return FP_REGS;
13036
13037 if (((TARGET_CM_MEDANY
13038 && symbolic_operand (x, mode))
13039 || (TARGET_CM_EMBMEDANY
13040 && text_segment_operand (x, mode)))
13041 && ! flag_pic)
13042 {
13043 if (in_p)
13044 sri->icode = direct_optab_handler (reload_in_optab, mode);
13045 else
13046 sri->icode = direct_optab_handler (reload_out_optab, mode);
13047 return NO_REGS;
13048 }
13049
13050 if (TARGET_VIS3 && TARGET_ARCH32)
13051 {
13052 int regno = true_regnum (x);
13053
13054 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13055 to move 8-byte values in 4-byte pieces. This only works via
13056 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13057 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13058 an FP_REGS intermediate move. */
13059 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13060 || ((general_or_i64_p (rclass)
13061 || rclass == GENERAL_OR_FP_REGS)
13062 && SPARC_FP_REG_P (regno)))
13063 {
13064 sri->extra_cost = 2;
13065 return FP_REGS;
13066 }
13067 }
13068
13069 return NO_REGS;
13070 }
13071
13072 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13073
13074 On SPARC when not VIS3 it is not possible to directly move data
13075 between GENERAL_REGS and FP_REGS. */
13076
13077 static bool
13078 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13079 reg_class_t class2)
13080 {
13081 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13082 && (! TARGET_VIS3
13083 || GET_MODE_SIZE (mode) > 8
13084 || GET_MODE_SIZE (mode) < 4));
13085 }
13086
13087 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13088
13089 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13090 because the movsi and movsf patterns don't handle r/f moves.
13091 For v8 we copy the default definition. */
13092
13093 static machine_mode
13094 sparc_secondary_memory_needed_mode (machine_mode mode)
13095 {
13096 if (TARGET_ARCH64)
13097 {
13098 if (GET_MODE_BITSIZE (mode) < 32)
13099 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13100 return mode;
13101 }
13102 else
13103 {
13104 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13105 return mode_for_size (BITS_PER_WORD,
13106 GET_MODE_CLASS (mode), 0).require ();
13107 return mode;
13108 }
13109 }
13110
13111 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13112 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13113
13114 bool
13115 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13116 {
13117 enum rtx_code rc = GET_CODE (operands[1]);
13118 machine_mode cmp_mode;
13119 rtx cc_reg, dst, cmp;
13120
13121 cmp = operands[1];
13122 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13123 return false;
13124
13125 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13126 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13127
13128 cmp_mode = GET_MODE (XEXP (cmp, 0));
13129 rc = GET_CODE (cmp);
13130
13131 dst = operands[0];
13132 if (! rtx_equal_p (operands[2], dst)
13133 && ! rtx_equal_p (operands[3], dst))
13134 {
13135 if (reg_overlap_mentioned_p (dst, cmp))
13136 dst = gen_reg_rtx (mode);
13137
13138 emit_move_insn (dst, operands[3]);
13139 }
13140 else if (operands[2] == dst)
13141 {
13142 operands[2] = operands[3];
13143
13144 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13145 rc = reverse_condition_maybe_unordered (rc);
13146 else
13147 rc = reverse_condition (rc);
13148 }
13149
13150 if (XEXP (cmp, 1) == const0_rtx
13151 && GET_CODE (XEXP (cmp, 0)) == REG
13152 && cmp_mode == DImode
13153 && v9_regcmp_p (rc))
13154 cc_reg = XEXP (cmp, 0);
13155 else
13156 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13157
13158 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13159
13160 emit_insn (gen_rtx_SET (dst,
13161 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13162
13163 if (dst != operands[0])
13164 emit_move_insn (operands[0], dst);
13165
13166 return true;
13167 }
13168
13169 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13170 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13171 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13172 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13173 code to be used for the condition mask. */
13174
13175 void
13176 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13177 {
13178 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13179 enum rtx_code code = GET_CODE (operands[3]);
13180
13181 mask = gen_reg_rtx (Pmode);
13182 cop0 = operands[4];
13183 cop1 = operands[5];
13184 if (code == LT || code == GE)
13185 {
13186 rtx t;
13187
13188 code = swap_condition (code);
13189 t = cop0; cop0 = cop1; cop1 = t;
13190 }
13191
13192 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13193
13194 fcmp = gen_rtx_UNSPEC (Pmode,
13195 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13196 fcode);
13197
13198 cmask = gen_rtx_UNSPEC (DImode,
13199 gen_rtvec (2, mask, gsr),
13200 ccode);
13201
13202 bshuf = gen_rtx_UNSPEC (mode,
13203 gen_rtvec (3, operands[1], operands[2], gsr),
13204 UNSPEC_BSHUFFLE);
13205
13206 emit_insn (gen_rtx_SET (mask, fcmp));
13207 emit_insn (gen_rtx_SET (gsr, cmask));
13208
13209 emit_insn (gen_rtx_SET (operands[0], bshuf));
13210 }
13211
13212 /* On sparc, any mode which naturally allocates into the float
13213 registers should return 4 here. */
13214
13215 unsigned int
13216 sparc_regmode_natural_size (machine_mode mode)
13217 {
13218 int size = UNITS_PER_WORD;
13219
13220 if (TARGET_ARCH64)
13221 {
13222 enum mode_class mclass = GET_MODE_CLASS (mode);
13223
13224 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13225 size = 4;
13226 }
13227
13228 return size;
13229 }
13230
13231 /* Implement TARGET_HARD_REGNO_NREGS.
13232
13233 On SPARC, ordinary registers hold 32 bits worth; this means both
13234 integer and floating point registers. On v9, integer regs hold 64
13235 bits worth; floating point regs hold 32 bits worth (this includes the
13236 new fp regs as even the odd ones are included in the hard register
13237 count). */
13238
13239 static unsigned int
13240 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13241 {
13242 if (regno == SPARC_GSR_REG)
13243 return 1;
13244 if (TARGET_ARCH64)
13245 {
13246 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13247 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13248 return CEIL (GET_MODE_SIZE (mode), 4);
13249 }
13250 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13251 }
13252
13253 /* Implement TARGET_HARD_REGNO_MODE_OK.
13254
13255 ??? Because of the funny way we pass parameters we should allow certain
13256 ??? types of float/complex values to be in integer registers during
13257 ??? RTL generation. This only matters on arch32. */
13258
13259 static bool
13260 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13261 {
13262 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13263 }
13264
13265 /* Implement TARGET_MODES_TIEABLE_P.
13266
13267 For V9 we have to deal with the fact that only the lower 32 floating
13268 point registers are 32-bit addressable. */
13269
13270 static bool
13271 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13272 {
13273 enum mode_class mclass1, mclass2;
13274 unsigned short size1, size2;
13275
13276 if (mode1 == mode2)
13277 return true;
13278
13279 mclass1 = GET_MODE_CLASS (mode1);
13280 mclass2 = GET_MODE_CLASS (mode2);
13281 if (mclass1 != mclass2)
13282 return false;
13283
13284 if (! TARGET_V9)
13285 return true;
13286
13287 /* Classes are the same and we are V9 so we have to deal with upper
13288 vs. lower floating point registers. If one of the modes is a
13289 4-byte mode, and the other is not, we have to mark them as not
13290 tieable because only the lower 32 floating point register are
13291 addressable 32-bits at a time.
13292
13293 We can't just test explicitly for SFmode, otherwise we won't
13294 cover the vector mode cases properly. */
13295
13296 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13297 return true;
13298
13299 size1 = GET_MODE_SIZE (mode1);
13300 size2 = GET_MODE_SIZE (mode2);
13301 if ((size1 > 4 && size2 == 4)
13302 || (size2 > 4 && size1 == 4))
13303 return false;
13304
13305 return true;
13306 }
13307
13308 /* Implement TARGET_CSTORE_MODE. */
13309
13310 static scalar_int_mode
13311 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13312 {
13313 return (TARGET_ARCH64 ? DImode : SImode);
13314 }
13315
13316 /* Return the compound expression made of T1 and T2. */
13317
13318 static inline tree
13319 compound_expr (tree t1, tree t2)
13320 {
13321 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13322 }
13323
13324 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13325
13326 static void
13327 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13328 {
13329 if (!TARGET_FPU)
13330 return;
13331
13332 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13333 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13334
13335 /* We generate the equivalent of feholdexcept (&fenv_var):
13336
13337 unsigned int fenv_var;
13338 __builtin_store_fsr (&fenv_var);
13339
13340 unsigned int tmp1_var;
13341 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13342
13343 __builtin_load_fsr (&tmp1_var); */
13344
13345 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13346 TREE_ADDRESSABLE (fenv_var) = 1;
13347 tree fenv_addr = build_fold_addr_expr (fenv_var);
13348 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13349 tree hold_stfsr
13350 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13351 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13352
13353 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13354 TREE_ADDRESSABLE (tmp1_var) = 1;
13355 tree masked_fenv_var
13356 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13357 build_int_cst (unsigned_type_node,
13358 ~(accrued_exception_mask | trap_enable_mask)));
13359 tree hold_mask
13360 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13361 NULL_TREE, NULL_TREE);
13362
13363 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13364 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13365 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13366
13367 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13368
13369 /* We reload the value of tmp1_var to clear the exceptions:
13370
13371 __builtin_load_fsr (&tmp1_var); */
13372
13373 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13374
13375 /* We generate the equivalent of feupdateenv (&fenv_var):
13376
13377 unsigned int tmp2_var;
13378 __builtin_store_fsr (&tmp2_var);
13379
13380 __builtin_load_fsr (&fenv_var);
13381
13382 if (SPARC_LOW_FE_EXCEPT_VALUES)
13383 tmp2_var >>= 5;
13384 __atomic_feraiseexcept ((int) tmp2_var); */
13385
13386 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13387 TREE_ADDRESSABLE (tmp2_var) = 1;
13388 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13389 tree update_stfsr
13390 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13391 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13392
13393 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13394
13395 tree atomic_feraiseexcept
13396 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13397 tree update_call
13398 = build_call_expr (atomic_feraiseexcept, 1,
13399 fold_convert (integer_type_node, tmp2_var));
13400
13401 if (SPARC_LOW_FE_EXCEPT_VALUES)
13402 {
13403 tree shifted_tmp2_var
13404 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13405 build_int_cst (unsigned_type_node, 5));
13406 tree update_shift
13407 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13408 update_call = compound_expr (update_shift, update_call);
13409 }
13410
13411 *update
13412 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13413 }
13414
13415 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13416
13417 SImode loads to floating-point registers are not zero-extended.
13418 The definition for LOAD_EXTEND_OP specifies that integer loads
13419 narrower than BITS_PER_WORD will be zero-extended. As a result,
13420 we inhibit changes from SImode unless they are to a mode that is
13421 identical in size.
13422
13423 Likewise for SFmode, since word-mode paradoxical subregs are
13424 problematic on big-endian architectures. */
13425
13426 static bool
13427 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13428 reg_class_t rclass)
13429 {
13430 if (TARGET_ARCH64
13431 && GET_MODE_SIZE (from) == 4
13432 && GET_MODE_SIZE (to) != 4)
13433 return !reg_classes_intersect_p (rclass, FP_REGS);
13434 return true;
13435 }
13436
13437 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13438
13439 static HOST_WIDE_INT
13440 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13441 {
13442 if (TREE_CODE (exp) == STRING_CST)
13443 return MAX (align, FASTEST_ALIGNMENT);
13444 return align;
13445 }
13446
13447 #include "gt-sparc.h"