]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
Turn CANNOT_CHANGE_MODE_CLASS into a hook
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
60
61 /* This file should be included last. */
62 #include "target-def.h"
63
64 /* Processor costs */
65
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
69
70 /* Integer signed load */
71 const int int_sload;
72
73 /* Integer zeroed load */
74 const int int_zload;
75
76 /* Float load */
77 const int float_load;
78
79 /* fmov, fneg, fabs */
80 const int float_move;
81
82 /* fadd, fsub */
83 const int float_plusminus;
84
85 /* fcmp */
86 const int float_cmp;
87
88 /* fmov, fmovr */
89 const int float_cmove;
90
91 /* fmul */
92 const int float_mul;
93
94 /* fdivs */
95 const int float_div_sf;
96
97 /* fdivd */
98 const int float_div_df;
99
100 /* fsqrts */
101 const int float_sqrt_sf;
102
103 /* fsqrtd */
104 const int float_sqrt_df;
105
106 /* umul/smul */
107 const int int_mul;
108
109 /* mulX */
110 const int int_mulX;
111
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
114
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
122
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
126
127 /* udiv/sdiv */
128 const int int_div;
129
130 /* divX */
131 const int int_divX;
132
133 /* movcc, movr */
134 const int int_cmove;
135
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
138 };
139
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
162 };
163
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
186 };
187
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
210 };
211
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
234 };
235
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
258 };
259
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
282 };
283
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
306 };
307
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
330 };
331
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
354 };
355
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
378 };
379
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
402 };
403
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
426 };
427
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
450 };
451
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
474 };
475
476 static const struct processor_costs *sparc_costs = &cypress_costs;
477
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
488
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
497
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
507
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
525
526 struct GTY(()) machine_function
527 {
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
530
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
534
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
540
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
543
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
549
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
552
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
555 };
556
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
565
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
568
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
573
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
576
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
598
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
604
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
676 reg_class_t);
677 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
678 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
679 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
680 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
681 static unsigned int sparc_min_arithmetic_precision (void);
682 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
683 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
684 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
685 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
686 reg_class_t);
687 \f
688 #ifdef SUBTARGET_ATTRIBUTE_TABLE
689 /* Table of valid machine attributes. */
690 static const struct attribute_spec sparc_attribute_table[] =
691 {
692 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
693 do_diagnostic } */
694 SUBTARGET_ATTRIBUTE_TABLE,
695 { NULL, 0, 0, false, false, false, NULL, false }
696 };
697 #endif
698 \f
699 /* Option handling. */
700
701 /* Parsed value. */
702 enum cmodel sparc_cmodel;
703
704 char sparc_hard_reg_printed[8];
705
706 /* Initialize the GCC target structure. */
707
708 /* The default is to use .half rather than .short for aligned HI objects. */
709 #undef TARGET_ASM_ALIGNED_HI_OP
710 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
711
712 #undef TARGET_ASM_UNALIGNED_HI_OP
713 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
714 #undef TARGET_ASM_UNALIGNED_SI_OP
715 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
716 #undef TARGET_ASM_UNALIGNED_DI_OP
717 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
718
719 /* The target hook has to handle DI-mode values. */
720 #undef TARGET_ASM_INTEGER
721 #define TARGET_ASM_INTEGER sparc_assemble_integer
722
723 #undef TARGET_ASM_FUNCTION_PROLOGUE
724 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
725 #undef TARGET_ASM_FUNCTION_EPILOGUE
726 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
727
728 #undef TARGET_SCHED_ADJUST_COST
729 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
730 #undef TARGET_SCHED_ISSUE_RATE
731 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
732 #undef TARGET_SCHED_INIT
733 #define TARGET_SCHED_INIT sparc_sched_init
734 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
735 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
736
737 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
738 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
739
740 #undef TARGET_INIT_LIBFUNCS
741 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
742
743 #undef TARGET_LEGITIMIZE_ADDRESS
744 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
745 #undef TARGET_DELEGITIMIZE_ADDRESS
746 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
747 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
748 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
749
750 #undef TARGET_INIT_BUILTINS
751 #define TARGET_INIT_BUILTINS sparc_init_builtins
752 #undef TARGET_BUILTIN_DECL
753 #define TARGET_BUILTIN_DECL sparc_builtin_decl
754 #undef TARGET_EXPAND_BUILTIN
755 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
756 #undef TARGET_FOLD_BUILTIN
757 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
758
759 #if TARGET_TLS
760 #undef TARGET_HAVE_TLS
761 #define TARGET_HAVE_TLS true
762 #endif
763
764 #undef TARGET_CANNOT_FORCE_CONST_MEM
765 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
766
767 #undef TARGET_ASM_OUTPUT_MI_THUNK
768 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
769 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
770 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
771
772 #undef TARGET_RTX_COSTS
773 #define TARGET_RTX_COSTS sparc_rtx_costs
774 #undef TARGET_ADDRESS_COST
775 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
776 #undef TARGET_REGISTER_MOVE_COST
777 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
778
779 #undef TARGET_PROMOTE_FUNCTION_MODE
780 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
781
782 #undef TARGET_FUNCTION_VALUE
783 #define TARGET_FUNCTION_VALUE sparc_function_value
784 #undef TARGET_LIBCALL_VALUE
785 #define TARGET_LIBCALL_VALUE sparc_libcall_value
786 #undef TARGET_FUNCTION_VALUE_REGNO_P
787 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
788
789 #undef TARGET_STRUCT_VALUE_RTX
790 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
791 #undef TARGET_RETURN_IN_MEMORY
792 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
793 #undef TARGET_MUST_PASS_IN_STACK
794 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
795 #undef TARGET_PASS_BY_REFERENCE
796 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
797 #undef TARGET_ARG_PARTIAL_BYTES
798 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
799 #undef TARGET_FUNCTION_ARG_ADVANCE
800 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
801 #undef TARGET_FUNCTION_ARG
802 #define TARGET_FUNCTION_ARG sparc_function_arg
803 #undef TARGET_FUNCTION_INCOMING_ARG
804 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
805 #undef TARGET_FUNCTION_ARG_PADDING
806 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
807 #undef TARGET_FUNCTION_ARG_BOUNDARY
808 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
809
810 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
811 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
812 #undef TARGET_STRICT_ARGUMENT_NAMING
813 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
814
815 #undef TARGET_EXPAND_BUILTIN_VA_START
816 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
817 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
818 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
819
820 #undef TARGET_VECTOR_MODE_SUPPORTED_P
821 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
822
823 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
824 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
825
826 #ifdef SUBTARGET_INSERT_ATTRIBUTES
827 #undef TARGET_INSERT_ATTRIBUTES
828 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
829 #endif
830
831 #ifdef SUBTARGET_ATTRIBUTE_TABLE
832 #undef TARGET_ATTRIBUTE_TABLE
833 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
834 #endif
835
836 #undef TARGET_OPTION_OVERRIDE
837 #define TARGET_OPTION_OVERRIDE sparc_option_override
838
839 #ifdef TARGET_THREAD_SSP_OFFSET
840 #undef TARGET_STACK_PROTECT_GUARD
841 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
842 #endif
843
844 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
845 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
846 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
847 #endif
848
849 #undef TARGET_ASM_FILE_END
850 #define TARGET_ASM_FILE_END sparc_file_end
851
852 #undef TARGET_FRAME_POINTER_REQUIRED
853 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
854
855 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
856 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
857
858 #undef TARGET_CAN_ELIMINATE
859 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
860
861 #undef TARGET_PREFERRED_RELOAD_CLASS
862 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
863
864 #undef TARGET_SECONDARY_RELOAD
865 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
866 #undef TARGET_SECONDARY_MEMORY_NEEDED
867 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
868 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
869 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
870
871 #undef TARGET_CONDITIONAL_REGISTER_USAGE
872 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
873
874 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
875 #undef TARGET_MANGLE_TYPE
876 #define TARGET_MANGLE_TYPE sparc_mangle_type
877 #endif
878
879 #undef TARGET_LRA_P
880 #define TARGET_LRA_P sparc_lra_p
881
882 #undef TARGET_LEGITIMATE_ADDRESS_P
883 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
884
885 #undef TARGET_LEGITIMATE_CONSTANT_P
886 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
887
888 #undef TARGET_TRAMPOLINE_INIT
889 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
890
891 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
892 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
893 #undef TARGET_PRINT_OPERAND
894 #define TARGET_PRINT_OPERAND sparc_print_operand
895 #undef TARGET_PRINT_OPERAND_ADDRESS
896 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
897
898 /* The value stored by LDSTUB. */
899 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
900 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
901
902 #undef TARGET_CSTORE_MODE
903 #define TARGET_CSTORE_MODE sparc_cstore_mode
904
905 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
906 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
907
908 #undef TARGET_FIXED_CONDITION_CODE_REGS
909 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
910
911 #undef TARGET_MIN_ARITHMETIC_PRECISION
912 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
913
914 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
915 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
916
917 #undef TARGET_HARD_REGNO_NREGS
918 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
919 #undef TARGET_HARD_REGNO_MODE_OK
920 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
921
922 #undef TARGET_MODES_TIEABLE_P
923 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
924
925 #undef TARGET_CAN_CHANGE_MODE_CLASS
926 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
927
928 struct gcc_target targetm = TARGET_INITIALIZER;
929
930 /* Return the memory reference contained in X if any, zero otherwise. */
931
932 static rtx
933 mem_ref (rtx x)
934 {
935 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
936 x = XEXP (x, 0);
937
938 if (MEM_P (x))
939 return x;
940
941 return NULL_RTX;
942 }
943
944 /* We use a machine specific pass to enable workarounds for errata.
945
946 We need to have the (essentially) final form of the insn stream in order
947 to properly detect the various hazards. Therefore, this machine specific
948 pass runs as late as possible. */
949
950 /* True if INSN is a md pattern or asm statement. */
951 #define USEFUL_INSN_P(INSN) \
952 (NONDEBUG_INSN_P (INSN) \
953 && GET_CODE (PATTERN (INSN)) != USE \
954 && GET_CODE (PATTERN (INSN)) != CLOBBER)
955
956 static unsigned int
957 sparc_do_work_around_errata (void)
958 {
959 rtx_insn *insn, *next;
960
961 /* Force all instructions to be split into their final form. */
962 split_all_insns_noflow ();
963
964 /* Now look for specific patterns in the insn stream. */
965 for (insn = get_insns (); insn; insn = next)
966 {
967 bool insert_nop = false;
968 rtx set;
969
970 /* Look into the instruction in a delay slot. */
971 if (NONJUMP_INSN_P (insn))
972 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
973 insn = seq->insn (1);
974
975 /* Look for either of these two sequences:
976
977 Sequence A:
978 1. store of word size or less (e.g. st / stb / sth / stf)
979 2. any single instruction that is not a load or store
980 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
981
982 Sequence B:
983 1. store of double word size (e.g. std / stdf)
984 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
985 if (sparc_fix_b2bst
986 && NONJUMP_INSN_P (insn)
987 && (set = single_set (insn)) != NULL_RTX
988 && MEM_P (SET_DEST (set)))
989 {
990 /* Sequence B begins with a double-word store. */
991 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
992 rtx_insn *after;
993 int i;
994
995 next = next_active_insn (insn);
996 if (!next)
997 break;
998
999 for (after = next, i = 0; i < 2; i++)
1000 {
1001 /* Skip empty assembly statements. */
1002 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1003 || (USEFUL_INSN_P (after)
1004 && (asm_noperands (PATTERN (after))>=0)
1005 && !strcmp (decode_asm_operands (PATTERN (after),
1006 NULL, NULL, NULL,
1007 NULL, NULL), "")))
1008 after = next_active_insn (after);
1009 if (!after)
1010 break;
1011
1012 /* If the insn is a branch, then it cannot be problematic. */
1013 if (!NONJUMP_INSN_P (after)
1014 || GET_CODE (PATTERN (after)) == SEQUENCE)
1015 break;
1016
1017 /* Sequence B is only two instructions long. */
1018 if (seq_b)
1019 {
1020 /* Add NOP if followed by a store. */
1021 if ((set = single_set (after)) != NULL_RTX
1022 && MEM_P (SET_DEST (set)))
1023 insert_nop = true;
1024
1025 /* Otherwise it is ok. */
1026 break;
1027 }
1028
1029 /* If the second instruction is a load or a store,
1030 then the sequence cannot be problematic. */
1031 if (i == 0)
1032 {
1033 if (((set = single_set (after)) != NULL_RTX)
1034 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1035 break;
1036
1037 after = next_active_insn (after);
1038 if (!after)
1039 break;
1040 }
1041
1042 /* Add NOP if third instruction is a store. */
1043 if (i == 1
1044 && ((set = single_set (after)) != NULL_RTX)
1045 && MEM_P (SET_DEST (set)))
1046 insert_nop = true;
1047 }
1048 }
1049 else
1050 /* Look for a single-word load into an odd-numbered FP register. */
1051 if (sparc_fix_at697f
1052 && NONJUMP_INSN_P (insn)
1053 && (set = single_set (insn)) != NULL_RTX
1054 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1055 && MEM_P (SET_SRC (set))
1056 && REG_P (SET_DEST (set))
1057 && REGNO (SET_DEST (set)) > 31
1058 && REGNO (SET_DEST (set)) % 2 != 0)
1059 {
1060 /* The wrong dependency is on the enclosing double register. */
1061 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1062 unsigned int src1, src2, dest;
1063 int code;
1064
1065 next = next_active_insn (insn);
1066 if (!next)
1067 break;
1068 /* If the insn is a branch, then it cannot be problematic. */
1069 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1070 continue;
1071
1072 extract_insn (next);
1073 code = INSN_CODE (next);
1074
1075 switch (code)
1076 {
1077 case CODE_FOR_adddf3:
1078 case CODE_FOR_subdf3:
1079 case CODE_FOR_muldf3:
1080 case CODE_FOR_divdf3:
1081 dest = REGNO (recog_data.operand[0]);
1082 src1 = REGNO (recog_data.operand[1]);
1083 src2 = REGNO (recog_data.operand[2]);
1084 if (src1 != src2)
1085 {
1086 /* Case [1-4]:
1087 ld [address], %fx+1
1088 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1089 if ((src1 == x || src2 == x)
1090 && (dest == src1 || dest == src2))
1091 insert_nop = true;
1092 }
1093 else
1094 {
1095 /* Case 5:
1096 ld [address], %fx+1
1097 FPOPd %fx, %fx, %fx */
1098 if (src1 == x
1099 && dest == src1
1100 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1101 insert_nop = true;
1102 }
1103 break;
1104
1105 case CODE_FOR_sqrtdf2:
1106 dest = REGNO (recog_data.operand[0]);
1107 src1 = REGNO (recog_data.operand[1]);
1108 /* Case 6:
1109 ld [address], %fx+1
1110 fsqrtd %fx, %fx */
1111 if (src1 == x && dest == src1)
1112 insert_nop = true;
1113 break;
1114
1115 default:
1116 break;
1117 }
1118 }
1119
1120 /* Look for a single-word load into an integer register. */
1121 else if (sparc_fix_ut699
1122 && NONJUMP_INSN_P (insn)
1123 && (set = single_set (insn)) != NULL_RTX
1124 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1125 && mem_ref (SET_SRC (set)) != NULL_RTX
1126 && REG_P (SET_DEST (set))
1127 && REGNO (SET_DEST (set)) < 32)
1128 {
1129 /* There is no problem if the second memory access has a data
1130 dependency on the first single-cycle load. */
1131 rtx x = SET_DEST (set);
1132
1133 next = next_active_insn (insn);
1134 if (!next)
1135 break;
1136 /* If the insn is a branch, then it cannot be problematic. */
1137 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1138 continue;
1139
1140 /* Look for a second memory access to/from an integer register. */
1141 if ((set = single_set (next)) != NULL_RTX)
1142 {
1143 rtx src = SET_SRC (set);
1144 rtx dest = SET_DEST (set);
1145 rtx mem;
1146
1147 /* LDD is affected. */
1148 if ((mem = mem_ref (src)) != NULL_RTX
1149 && REG_P (dest)
1150 && REGNO (dest) < 32
1151 && !reg_mentioned_p (x, XEXP (mem, 0)))
1152 insert_nop = true;
1153
1154 /* STD is *not* affected. */
1155 else if (MEM_P (dest)
1156 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1157 && (src == CONST0_RTX (GET_MODE (dest))
1158 || (REG_P (src)
1159 && REGNO (src) < 32
1160 && REGNO (src) != REGNO (x)))
1161 && !reg_mentioned_p (x, XEXP (dest, 0)))
1162 insert_nop = true;
1163 }
1164 }
1165
1166 /* Look for a single-word load/operation into an FP register. */
1167 else if (sparc_fix_ut699
1168 && NONJUMP_INSN_P (insn)
1169 && (set = single_set (insn)) != NULL_RTX
1170 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1171 && REG_P (SET_DEST (set))
1172 && REGNO (SET_DEST (set)) > 31)
1173 {
1174 /* Number of instructions in the problematic window. */
1175 const int n_insns = 4;
1176 /* The problematic combination is with the sibling FP register. */
1177 const unsigned int x = REGNO (SET_DEST (set));
1178 const unsigned int y = x ^ 1;
1179 rtx_insn *after;
1180 int i;
1181
1182 next = next_active_insn (insn);
1183 if (!next)
1184 break;
1185 /* If the insn is a branch, then it cannot be problematic. */
1186 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1187 continue;
1188
1189 /* Look for a second load/operation into the sibling FP register. */
1190 if (!((set = single_set (next)) != NULL_RTX
1191 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1192 && REG_P (SET_DEST (set))
1193 && REGNO (SET_DEST (set)) == y))
1194 continue;
1195
1196 /* Look for a (possible) store from the FP register in the next N
1197 instructions, but bail out if it is again modified or if there
1198 is a store from the sibling FP register before this store. */
1199 for (after = next, i = 0; i < n_insns; i++)
1200 {
1201 bool branch_p;
1202
1203 after = next_active_insn (after);
1204 if (!after)
1205 break;
1206
1207 /* This is a branch with an empty delay slot. */
1208 if (!NONJUMP_INSN_P (after))
1209 {
1210 if (++i == n_insns)
1211 break;
1212 branch_p = true;
1213 after = NULL;
1214 }
1215 /* This is a branch with a filled delay slot. */
1216 else if (rtx_sequence *seq =
1217 dyn_cast <rtx_sequence *> (PATTERN (after)))
1218 {
1219 if (++i == n_insns)
1220 break;
1221 branch_p = true;
1222 after = seq->insn (1);
1223 }
1224 /* This is a regular instruction. */
1225 else
1226 branch_p = false;
1227
1228 if (after && (set = single_set (after)) != NULL_RTX)
1229 {
1230 const rtx src = SET_SRC (set);
1231 const rtx dest = SET_DEST (set);
1232 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1233
1234 /* If the FP register is again modified before the store,
1235 then the store isn't affected. */
1236 if (REG_P (dest)
1237 && (REGNO (dest) == x
1238 || (REGNO (dest) == y && size == 8)))
1239 break;
1240
1241 if (MEM_P (dest) && REG_P (src))
1242 {
1243 /* If there is a store from the sibling FP register
1244 before the store, then the store is not affected. */
1245 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1246 break;
1247
1248 /* Otherwise, the store is affected. */
1249 if (REGNO (src) == x && size == 4)
1250 {
1251 insert_nop = true;
1252 break;
1253 }
1254 }
1255 }
1256
1257 /* If we have a branch in the first M instructions, then we
1258 cannot see the (M+2)th instruction so we play safe. */
1259 if (branch_p && i <= (n_insns - 2))
1260 {
1261 insert_nop = true;
1262 break;
1263 }
1264 }
1265 }
1266
1267 else
1268 next = NEXT_INSN (insn);
1269
1270 if (insert_nop)
1271 emit_insn_before (gen_nop (), next);
1272 }
1273
1274 return 0;
1275 }
1276
1277 namespace {
1278
1279 const pass_data pass_data_work_around_errata =
1280 {
1281 RTL_PASS, /* type */
1282 "errata", /* name */
1283 OPTGROUP_NONE, /* optinfo_flags */
1284 TV_MACH_DEP, /* tv_id */
1285 0, /* properties_required */
1286 0, /* properties_provided */
1287 0, /* properties_destroyed */
1288 0, /* todo_flags_start */
1289 0, /* todo_flags_finish */
1290 };
1291
1292 class pass_work_around_errata : public rtl_opt_pass
1293 {
1294 public:
1295 pass_work_around_errata(gcc::context *ctxt)
1296 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1297 {}
1298
1299 /* opt_pass methods: */
1300 virtual bool gate (function *)
1301 {
1302 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1303 }
1304
1305 virtual unsigned int execute (function *)
1306 {
1307 return sparc_do_work_around_errata ();
1308 }
1309
1310 }; // class pass_work_around_errata
1311
1312 } // anon namespace
1313
1314 rtl_opt_pass *
1315 make_pass_work_around_errata (gcc::context *ctxt)
1316 {
1317 return new pass_work_around_errata (ctxt);
1318 }
1319
1320 /* Helpers for TARGET_DEBUG_OPTIONS. */
1321 static void
1322 dump_target_flag_bits (const int flags)
1323 {
1324 if (flags & MASK_64BIT)
1325 fprintf (stderr, "64BIT ");
1326 if (flags & MASK_APP_REGS)
1327 fprintf (stderr, "APP_REGS ");
1328 if (flags & MASK_FASTER_STRUCTS)
1329 fprintf (stderr, "FASTER_STRUCTS ");
1330 if (flags & MASK_FLAT)
1331 fprintf (stderr, "FLAT ");
1332 if (flags & MASK_FMAF)
1333 fprintf (stderr, "FMAF ");
1334 if (flags & MASK_FSMULD)
1335 fprintf (stderr, "FSMULD ");
1336 if (flags & MASK_FPU)
1337 fprintf (stderr, "FPU ");
1338 if (flags & MASK_HARD_QUAD)
1339 fprintf (stderr, "HARD_QUAD ");
1340 if (flags & MASK_POPC)
1341 fprintf (stderr, "POPC ");
1342 if (flags & MASK_PTR64)
1343 fprintf (stderr, "PTR64 ");
1344 if (flags & MASK_STACK_BIAS)
1345 fprintf (stderr, "STACK_BIAS ");
1346 if (flags & MASK_UNALIGNED_DOUBLES)
1347 fprintf (stderr, "UNALIGNED_DOUBLES ");
1348 if (flags & MASK_V8PLUS)
1349 fprintf (stderr, "V8PLUS ");
1350 if (flags & MASK_VIS)
1351 fprintf (stderr, "VIS ");
1352 if (flags & MASK_VIS2)
1353 fprintf (stderr, "VIS2 ");
1354 if (flags & MASK_VIS3)
1355 fprintf (stderr, "VIS3 ");
1356 if (flags & MASK_VIS4)
1357 fprintf (stderr, "VIS4 ");
1358 if (flags & MASK_VIS4B)
1359 fprintf (stderr, "VIS4B ");
1360 if (flags & MASK_CBCOND)
1361 fprintf (stderr, "CBCOND ");
1362 if (flags & MASK_DEPRECATED_V8_INSNS)
1363 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1364 if (flags & MASK_SPARCLET)
1365 fprintf (stderr, "SPARCLET ");
1366 if (flags & MASK_SPARCLITE)
1367 fprintf (stderr, "SPARCLITE ");
1368 if (flags & MASK_V8)
1369 fprintf (stderr, "V8 ");
1370 if (flags & MASK_V9)
1371 fprintf (stderr, "V9 ");
1372 }
1373
1374 static void
1375 dump_target_flags (const char *prefix, const int flags)
1376 {
1377 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1378 dump_target_flag_bits (flags);
1379 fprintf(stderr, "]\n");
1380 }
1381
1382 /* Validate and override various options, and do some machine dependent
1383 initialization. */
1384
1385 static void
1386 sparc_option_override (void)
1387 {
1388 static struct code_model {
1389 const char *const name;
1390 const enum cmodel value;
1391 } const cmodels[] = {
1392 { "32", CM_32 },
1393 { "medlow", CM_MEDLOW },
1394 { "medmid", CM_MEDMID },
1395 { "medany", CM_MEDANY },
1396 { "embmedany", CM_EMBMEDANY },
1397 { NULL, (enum cmodel) 0 }
1398 };
1399 const struct code_model *cmodel;
1400 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1401 static struct cpu_default {
1402 const int cpu;
1403 const enum processor_type processor;
1404 } const cpu_default[] = {
1405 /* There must be one entry here for each TARGET_CPU value. */
1406 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1407 { TARGET_CPU_v8, PROCESSOR_V8 },
1408 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1409 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1410 { TARGET_CPU_leon, PROCESSOR_LEON },
1411 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1412 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1413 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1414 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1415 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1416 { TARGET_CPU_v9, PROCESSOR_V9 },
1417 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1418 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1419 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1420 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1421 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1422 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1423 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1424 { TARGET_CPU_m8, PROCESSOR_M8 },
1425 { -1, PROCESSOR_V7 }
1426 };
1427 const struct cpu_default *def;
1428 /* Table of values for -m{cpu,tune}=. This must match the order of
1429 the enum processor_type in sparc-opts.h. */
1430 static struct cpu_table {
1431 const char *const name;
1432 const int disable;
1433 const int enable;
1434 } const cpu_table[] = {
1435 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1436 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1437 { "v8", MASK_ISA, MASK_V8 },
1438 /* TI TMS390Z55 supersparc */
1439 { "supersparc", MASK_ISA, MASK_V8 },
1440 { "hypersparc", MASK_ISA, MASK_V8 },
1441 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1442 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1443 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1444 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1445 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1446 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1447 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1448 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1449 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1450 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1451 /* TEMIC sparclet */
1452 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1453 { "v9", MASK_ISA, MASK_V9 },
1454 /* UltraSPARC I, II, IIi */
1455 { "ultrasparc", MASK_ISA,
1456 /* Although insns using %y are deprecated, it is a clear win. */
1457 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1458 /* UltraSPARC III */
1459 /* ??? Check if %y issue still holds true. */
1460 { "ultrasparc3", MASK_ISA,
1461 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1462 /* UltraSPARC T1 */
1463 { "niagara", MASK_ISA,
1464 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1465 /* UltraSPARC T2 */
1466 { "niagara2", MASK_ISA,
1467 MASK_V9|MASK_POPC|MASK_VIS2 },
1468 /* UltraSPARC T3 */
1469 { "niagara3", MASK_ISA,
1470 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1471 /* UltraSPARC T4 */
1472 { "niagara4", MASK_ISA,
1473 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1474 /* UltraSPARC M7 */
1475 { "niagara7", MASK_ISA,
1476 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1477 /* UltraSPARC M8 */
1478 { "m8", MASK_ISA,
1479 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1480 };
1481 const struct cpu_table *cpu;
1482 unsigned int i;
1483
1484 if (sparc_debug_string != NULL)
1485 {
1486 const char *q;
1487 char *p;
1488
1489 p = ASTRDUP (sparc_debug_string);
1490 while ((q = strtok (p, ",")) != NULL)
1491 {
1492 bool invert;
1493 int mask;
1494
1495 p = NULL;
1496 if (*q == '!')
1497 {
1498 invert = true;
1499 q++;
1500 }
1501 else
1502 invert = false;
1503
1504 if (! strcmp (q, "all"))
1505 mask = MASK_DEBUG_ALL;
1506 else if (! strcmp (q, "options"))
1507 mask = MASK_DEBUG_OPTIONS;
1508 else
1509 error ("unknown -mdebug-%s switch", q);
1510
1511 if (invert)
1512 sparc_debug &= ~mask;
1513 else
1514 sparc_debug |= mask;
1515 }
1516 }
1517
1518 /* Enable the FsMULd instruction by default if not explicitly specified by
1519 the user. It may be later disabled by the CPU (explicitly or not). */
1520 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1521 target_flags |= MASK_FSMULD;
1522
1523 if (TARGET_DEBUG_OPTIONS)
1524 {
1525 dump_target_flags("Initial target_flags", target_flags);
1526 dump_target_flags("target_flags_explicit", target_flags_explicit);
1527 }
1528
1529 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1530 SUBTARGET_OVERRIDE_OPTIONS;
1531 #endif
1532
1533 #ifndef SPARC_BI_ARCH
1534 /* Check for unsupported architecture size. */
1535 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1536 error ("%s is not supported by this configuration",
1537 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1538 #endif
1539
1540 /* We force all 64bit archs to use 128 bit long double */
1541 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1542 {
1543 error ("-mlong-double-64 not allowed with -m64");
1544 target_flags |= MASK_LONG_DOUBLE_128;
1545 }
1546
1547 /* Code model selection. */
1548 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1549
1550 #ifdef SPARC_BI_ARCH
1551 if (TARGET_ARCH32)
1552 sparc_cmodel = CM_32;
1553 #endif
1554
1555 if (sparc_cmodel_string != NULL)
1556 {
1557 if (TARGET_ARCH64)
1558 {
1559 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1560 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1561 break;
1562 if (cmodel->name == NULL)
1563 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1564 else
1565 sparc_cmodel = cmodel->value;
1566 }
1567 else
1568 error ("-mcmodel= is not supported on 32-bit systems");
1569 }
1570
1571 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1572 for (i = 8; i < 16; i++)
1573 if (!call_used_regs [i])
1574 {
1575 error ("-fcall-saved-REG is not supported for out registers");
1576 call_used_regs [i] = 1;
1577 }
1578
1579 /* Set the default CPU if no -mcpu option was specified. */
1580 if (!global_options_set.x_sparc_cpu_and_features)
1581 {
1582 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1583 if (def->cpu == TARGET_CPU_DEFAULT)
1584 break;
1585 gcc_assert (def->cpu != -1);
1586 sparc_cpu_and_features = def->processor;
1587 }
1588
1589 /* Set the default CPU if no -mtune option was specified. */
1590 if (!global_options_set.x_sparc_cpu)
1591 sparc_cpu = sparc_cpu_and_features;
1592
1593 cpu = &cpu_table[(int) sparc_cpu_and_features];
1594
1595 if (TARGET_DEBUG_OPTIONS)
1596 {
1597 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1598 dump_target_flags ("cpu->disable", cpu->disable);
1599 dump_target_flags ("cpu->enable", cpu->enable);
1600 }
1601
1602 target_flags &= ~cpu->disable;
1603 target_flags |= (cpu->enable
1604 #ifndef HAVE_AS_FMAF_HPC_VIS3
1605 & ~(MASK_FMAF | MASK_VIS3)
1606 #endif
1607 #ifndef HAVE_AS_SPARC4
1608 & ~MASK_CBCOND
1609 #endif
1610 #ifndef HAVE_AS_SPARC5_VIS4
1611 & ~(MASK_VIS4 | MASK_SUBXC)
1612 #endif
1613 #ifndef HAVE_AS_SPARC6
1614 & ~(MASK_VIS4B)
1615 #endif
1616 #ifndef HAVE_AS_LEON
1617 & ~(MASK_LEON | MASK_LEON3)
1618 #endif
1619 & ~(target_flags_explicit & MASK_FEATURES)
1620 );
1621
1622 /* -mvis2 implies -mvis. */
1623 if (TARGET_VIS2)
1624 target_flags |= MASK_VIS;
1625
1626 /* -mvis3 implies -mvis2 and -mvis. */
1627 if (TARGET_VIS3)
1628 target_flags |= MASK_VIS2 | MASK_VIS;
1629
1630 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1631 if (TARGET_VIS4)
1632 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1633
1634 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1635 if (TARGET_VIS4B)
1636 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1637
1638 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1639 FPU is disabled. */
1640 if (!TARGET_FPU)
1641 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1642 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1643
1644 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1645 are available; -m64 also implies v9. */
1646 if (TARGET_VIS || TARGET_ARCH64)
1647 {
1648 target_flags |= MASK_V9;
1649 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1650 }
1651
1652 /* -mvis also implies -mv8plus on 32-bit. */
1653 if (TARGET_VIS && !TARGET_ARCH64)
1654 target_flags |= MASK_V8PLUS;
1655
1656 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1657 if (TARGET_V9 && TARGET_ARCH32)
1658 target_flags |= MASK_DEPRECATED_V8_INSNS;
1659
1660 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1661 if (!TARGET_V9 || TARGET_ARCH64)
1662 target_flags &= ~MASK_V8PLUS;
1663
1664 /* Don't use stack biasing in 32-bit mode. */
1665 if (TARGET_ARCH32)
1666 target_flags &= ~MASK_STACK_BIAS;
1667
1668 /* Use LRA instead of reload, unless otherwise instructed. */
1669 if (!(target_flags_explicit & MASK_LRA))
1670 target_flags |= MASK_LRA;
1671
1672 /* Enable the back-to-back store errata workaround for LEON3FT. */
1673 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1674 sparc_fix_b2bst = 1;
1675
1676 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1677 if (sparc_fix_ut699)
1678 target_flags &= ~MASK_FSMULD;
1679
1680 /* Supply a default value for align_functions. */
1681 if (align_functions == 0)
1682 {
1683 if (sparc_cpu == PROCESSOR_ULTRASPARC
1684 || sparc_cpu == PROCESSOR_ULTRASPARC3
1685 || sparc_cpu == PROCESSOR_NIAGARA
1686 || sparc_cpu == PROCESSOR_NIAGARA2
1687 || sparc_cpu == PROCESSOR_NIAGARA3
1688 || sparc_cpu == PROCESSOR_NIAGARA4)
1689 align_functions = 32;
1690 else if (sparc_cpu == PROCESSOR_NIAGARA7
1691 || sparc_cpu == PROCESSOR_M8)
1692 align_functions = 64;
1693 }
1694
1695 /* Validate PCC_STRUCT_RETURN. */
1696 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1697 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1698
1699 /* Only use .uaxword when compiling for a 64-bit target. */
1700 if (!TARGET_ARCH64)
1701 targetm.asm_out.unaligned_op.di = NULL;
1702
1703 /* Do various machine dependent initializations. */
1704 sparc_init_modes ();
1705
1706 /* Set up function hooks. */
1707 init_machine_status = sparc_init_machine_status;
1708
1709 switch (sparc_cpu)
1710 {
1711 case PROCESSOR_V7:
1712 case PROCESSOR_CYPRESS:
1713 sparc_costs = &cypress_costs;
1714 break;
1715 case PROCESSOR_V8:
1716 case PROCESSOR_SPARCLITE:
1717 case PROCESSOR_SUPERSPARC:
1718 sparc_costs = &supersparc_costs;
1719 break;
1720 case PROCESSOR_F930:
1721 case PROCESSOR_F934:
1722 case PROCESSOR_HYPERSPARC:
1723 case PROCESSOR_SPARCLITE86X:
1724 sparc_costs = &hypersparc_costs;
1725 break;
1726 case PROCESSOR_LEON:
1727 sparc_costs = &leon_costs;
1728 break;
1729 case PROCESSOR_LEON3:
1730 case PROCESSOR_LEON3V7:
1731 sparc_costs = &leon3_costs;
1732 break;
1733 case PROCESSOR_SPARCLET:
1734 case PROCESSOR_TSC701:
1735 sparc_costs = &sparclet_costs;
1736 break;
1737 case PROCESSOR_V9:
1738 case PROCESSOR_ULTRASPARC:
1739 sparc_costs = &ultrasparc_costs;
1740 break;
1741 case PROCESSOR_ULTRASPARC3:
1742 sparc_costs = &ultrasparc3_costs;
1743 break;
1744 case PROCESSOR_NIAGARA:
1745 sparc_costs = &niagara_costs;
1746 break;
1747 case PROCESSOR_NIAGARA2:
1748 sparc_costs = &niagara2_costs;
1749 break;
1750 case PROCESSOR_NIAGARA3:
1751 sparc_costs = &niagara3_costs;
1752 break;
1753 case PROCESSOR_NIAGARA4:
1754 sparc_costs = &niagara4_costs;
1755 break;
1756 case PROCESSOR_NIAGARA7:
1757 sparc_costs = &niagara7_costs;
1758 break;
1759 case PROCESSOR_M8:
1760 sparc_costs = &m8_costs;
1761 break;
1762 case PROCESSOR_NATIVE:
1763 gcc_unreachable ();
1764 };
1765
1766 if (sparc_memory_model == SMM_DEFAULT)
1767 {
1768 /* Choose the memory model for the operating system. */
1769 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1770 if (os_default != SMM_DEFAULT)
1771 sparc_memory_model = os_default;
1772 /* Choose the most relaxed model for the processor. */
1773 else if (TARGET_V9)
1774 sparc_memory_model = SMM_RMO;
1775 else if (TARGET_LEON3)
1776 sparc_memory_model = SMM_TSO;
1777 else if (TARGET_LEON)
1778 sparc_memory_model = SMM_SC;
1779 else if (TARGET_V8)
1780 sparc_memory_model = SMM_PSO;
1781 else
1782 sparc_memory_model = SMM_SC;
1783 }
1784
1785 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1786 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1787 target_flags |= MASK_LONG_DOUBLE_128;
1788 #endif
1789
1790 if (TARGET_DEBUG_OPTIONS)
1791 dump_target_flags ("Final target_flags", target_flags);
1792
1793 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1794 can run at the same time. More important, it is the threshold
1795 defining when additional prefetches will be dropped by the
1796 hardware.
1797
1798 The UltraSPARC-III features a documented prefetch queue with a
1799 size of 8. Additional prefetches issued in the cpu are
1800 dropped.
1801
1802 Niagara processors are different. In these processors prefetches
1803 are handled much like regular loads. The L1 miss buffer is 32
1804 entries, but prefetches start getting affected when 30 entries
1805 become occupied. That occupation could be a mix of regular loads
1806 and prefetches though. And that buffer is shared by all threads.
1807 Once the threshold is reached, if the core is running a single
1808 thread the prefetch will retry. If more than one thread is
1809 running, the prefetch will be dropped.
1810
1811 All this makes it very difficult to determine how many
1812 simultaneous prefetches can be issued simultaneously, even in a
1813 single-threaded program. Experimental results show that setting
1814 this parameter to 32 works well when the number of threads is not
1815 high. */
1816 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1817 ((sparc_cpu == PROCESSOR_ULTRASPARC
1818 || sparc_cpu == PROCESSOR_NIAGARA
1819 || sparc_cpu == PROCESSOR_NIAGARA2
1820 || sparc_cpu == PROCESSOR_NIAGARA3
1821 || sparc_cpu == PROCESSOR_NIAGARA4)
1822 ? 2
1823 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1824 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1825 || sparc_cpu == PROCESSOR_M8)
1826 ? 32 : 3))),
1827 global_options.x_param_values,
1828 global_options_set.x_param_values);
1829
1830 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1831 bytes.
1832
1833 The Oracle SPARC Architecture (previously the UltraSPARC
1834 Architecture) specification states that when a PREFETCH[A]
1835 instruction is executed an implementation-specific amount of data
1836 is prefetched, and that it is at least 64 bytes long (aligned to
1837 at least 64 bytes).
1838
1839 However, this is not correct. The M7 (and implementations prior
1840 to that) does not guarantee a 64B prefetch into a cache if the
1841 line size is smaller. A single cache line is all that is ever
1842 prefetched. So for the M7, where the L1D$ has 32B lines and the
1843 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1844 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1845 is a read_n prefetch, which is the only type which allocates to
1846 the L1.) */
1847 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1848 (sparc_cpu == PROCESSOR_M8
1849 ? 64 : 32),
1850 global_options.x_param_values,
1851 global_options_set.x_param_values);
1852
1853 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1854 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1855 Niagara processors feature a L1D$ of 16KB. */
1856 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1857 ((sparc_cpu == PROCESSOR_ULTRASPARC
1858 || sparc_cpu == PROCESSOR_ULTRASPARC3
1859 || sparc_cpu == PROCESSOR_NIAGARA
1860 || sparc_cpu == PROCESSOR_NIAGARA2
1861 || sparc_cpu == PROCESSOR_NIAGARA3
1862 || sparc_cpu == PROCESSOR_NIAGARA4
1863 || sparc_cpu == PROCESSOR_NIAGARA7
1864 || sparc_cpu == PROCESSOR_M8)
1865 ? 16 : 64),
1866 global_options.x_param_values,
1867 global_options_set.x_param_values);
1868
1869
1870 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1871 that 512 is the default in params.def. */
1872 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1873 ((sparc_cpu == PROCESSOR_NIAGARA4
1874 || sparc_cpu == PROCESSOR_M8)
1875 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1876 ? 256 : 512)),
1877 global_options.x_param_values,
1878 global_options_set.x_param_values);
1879
1880
1881 /* Disable save slot sharing for call-clobbered registers by default.
1882 The IRA sharing algorithm works on single registers only and this
1883 pessimizes for double floating-point registers. */
1884 if (!global_options_set.x_flag_ira_share_save_slots)
1885 flag_ira_share_save_slots = 0;
1886
1887 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1888 redundant 32-to-64-bit extensions. */
1889 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1890 flag_ree = 0;
1891 }
1892 \f
1893 /* Miscellaneous utilities. */
1894
1895 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1896 or branch on register contents instructions. */
1897
1898 int
1899 v9_regcmp_p (enum rtx_code code)
1900 {
1901 return (code == EQ || code == NE || code == GE || code == LT
1902 || code == LE || code == GT);
1903 }
1904
1905 /* Nonzero if OP is a floating point constant which can
1906 be loaded into an integer register using a single
1907 sethi instruction. */
1908
1909 int
1910 fp_sethi_p (rtx op)
1911 {
1912 if (GET_CODE (op) == CONST_DOUBLE)
1913 {
1914 long i;
1915
1916 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1917 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1918 }
1919
1920 return 0;
1921 }
1922
1923 /* Nonzero if OP is a floating point constant which can
1924 be loaded into an integer register using a single
1925 mov instruction. */
1926
1927 int
1928 fp_mov_p (rtx op)
1929 {
1930 if (GET_CODE (op) == CONST_DOUBLE)
1931 {
1932 long i;
1933
1934 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1935 return SPARC_SIMM13_P (i);
1936 }
1937
1938 return 0;
1939 }
1940
1941 /* Nonzero if OP is a floating point constant which can
1942 be loaded into an integer register using a high/losum
1943 instruction sequence. */
1944
1945 int
1946 fp_high_losum_p (rtx op)
1947 {
1948 /* The constraints calling this should only be in
1949 SFmode move insns, so any constant which cannot
1950 be moved using a single insn will do. */
1951 if (GET_CODE (op) == CONST_DOUBLE)
1952 {
1953 long i;
1954
1955 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1956 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1957 }
1958
1959 return 0;
1960 }
1961
1962 /* Return true if the address of LABEL can be loaded by means of the
1963 mov{si,di}_pic_label_ref patterns in PIC mode. */
1964
1965 static bool
1966 can_use_mov_pic_label_ref (rtx label)
1967 {
1968 /* VxWorks does not impose a fixed gap between segments; the run-time
1969 gap can be different from the object-file gap. We therefore can't
1970 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1971 are absolutely sure that X is in the same segment as the GOT.
1972 Unfortunately, the flexibility of linker scripts means that we
1973 can't be sure of that in general, so assume that GOT-relative
1974 accesses are never valid on VxWorks. */
1975 if (TARGET_VXWORKS_RTP)
1976 return false;
1977
1978 /* Similarly, if the label is non-local, it might end up being placed
1979 in a different section than the current one; now mov_pic_label_ref
1980 requires the label and the code to be in the same section. */
1981 if (LABEL_REF_NONLOCAL_P (label))
1982 return false;
1983
1984 /* Finally, if we are reordering basic blocks and partition into hot
1985 and cold sections, this might happen for any label. */
1986 if (flag_reorder_blocks_and_partition)
1987 return false;
1988
1989 return true;
1990 }
1991
1992 /* Expand a move instruction. Return true if all work is done. */
1993
1994 bool
1995 sparc_expand_move (machine_mode mode, rtx *operands)
1996 {
1997 /* Handle sets of MEM first. */
1998 if (GET_CODE (operands[0]) == MEM)
1999 {
2000 /* 0 is a register (or a pair of registers) on SPARC. */
2001 if (register_or_zero_operand (operands[1], mode))
2002 return false;
2003
2004 if (!reload_in_progress)
2005 {
2006 operands[0] = validize_mem (operands[0]);
2007 operands[1] = force_reg (mode, operands[1]);
2008 }
2009 }
2010
2011 /* Fixup TLS cases. */
2012 if (TARGET_HAVE_TLS
2013 && CONSTANT_P (operands[1])
2014 && sparc_tls_referenced_p (operands [1]))
2015 {
2016 operands[1] = sparc_legitimize_tls_address (operands[1]);
2017 return false;
2018 }
2019
2020 /* Fixup PIC cases. */
2021 if (flag_pic && CONSTANT_P (operands[1]))
2022 {
2023 if (pic_address_needs_scratch (operands[1]))
2024 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2025
2026 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2027 if (GET_CODE (operands[1]) == LABEL_REF
2028 && can_use_mov_pic_label_ref (operands[1]))
2029 {
2030 if (mode == SImode)
2031 {
2032 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2033 return true;
2034 }
2035
2036 if (mode == DImode)
2037 {
2038 gcc_assert (TARGET_ARCH64);
2039 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2040 return true;
2041 }
2042 }
2043
2044 if (symbolic_operand (operands[1], mode))
2045 {
2046 operands[1]
2047 = sparc_legitimize_pic_address (operands[1],
2048 reload_in_progress
2049 ? operands[0] : NULL_RTX);
2050 return false;
2051 }
2052 }
2053
2054 /* If we are trying to toss an integer constant into FP registers,
2055 or loading a FP or vector constant, force it into memory. */
2056 if (CONSTANT_P (operands[1])
2057 && REG_P (operands[0])
2058 && (SPARC_FP_REG_P (REGNO (operands[0]))
2059 || SCALAR_FLOAT_MODE_P (mode)
2060 || VECTOR_MODE_P (mode)))
2061 {
2062 /* emit_group_store will send such bogosity to us when it is
2063 not storing directly into memory. So fix this up to avoid
2064 crashes in output_constant_pool. */
2065 if (operands [1] == const0_rtx)
2066 operands[1] = CONST0_RTX (mode);
2067
2068 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2069 always other regs. */
2070 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2071 && (const_zero_operand (operands[1], mode)
2072 || const_all_ones_operand (operands[1], mode)))
2073 return false;
2074
2075 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2076 /* We are able to build any SF constant in integer registers
2077 with at most 2 instructions. */
2078 && (mode == SFmode
2079 /* And any DF constant in integer registers if needed. */
2080 || (mode == DFmode && !can_create_pseudo_p ())))
2081 return false;
2082
2083 operands[1] = force_const_mem (mode, operands[1]);
2084 if (!reload_in_progress)
2085 operands[1] = validize_mem (operands[1]);
2086 return false;
2087 }
2088
2089 /* Accept non-constants and valid constants unmodified. */
2090 if (!CONSTANT_P (operands[1])
2091 || GET_CODE (operands[1]) == HIGH
2092 || input_operand (operands[1], mode))
2093 return false;
2094
2095 switch (mode)
2096 {
2097 case E_QImode:
2098 /* All QImode constants require only one insn, so proceed. */
2099 break;
2100
2101 case E_HImode:
2102 case E_SImode:
2103 sparc_emit_set_const32 (operands[0], operands[1]);
2104 return true;
2105
2106 case E_DImode:
2107 /* input_operand should have filtered out 32-bit mode. */
2108 sparc_emit_set_const64 (operands[0], operands[1]);
2109 return true;
2110
2111 case E_TImode:
2112 {
2113 rtx high, low;
2114 /* TImode isn't available in 32-bit mode. */
2115 split_double (operands[1], &high, &low);
2116 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2117 high));
2118 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2119 low));
2120 }
2121 return true;
2122
2123 default:
2124 gcc_unreachable ();
2125 }
2126
2127 return false;
2128 }
2129
2130 /* Load OP1, a 32-bit constant, into OP0, a register.
2131 We know it can't be done in one insn when we get
2132 here, the move expander guarantees this. */
2133
2134 static void
2135 sparc_emit_set_const32 (rtx op0, rtx op1)
2136 {
2137 machine_mode mode = GET_MODE (op0);
2138 rtx temp = op0;
2139
2140 if (can_create_pseudo_p ())
2141 temp = gen_reg_rtx (mode);
2142
2143 if (GET_CODE (op1) == CONST_INT)
2144 {
2145 gcc_assert (!small_int_operand (op1, mode)
2146 && !const_high_operand (op1, mode));
2147
2148 /* Emit them as real moves instead of a HIGH/LO_SUM,
2149 this way CSE can see everything and reuse intermediate
2150 values if it wants. */
2151 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2152 & ~(HOST_WIDE_INT) 0x3ff)));
2153
2154 emit_insn (gen_rtx_SET (op0,
2155 gen_rtx_IOR (mode, temp,
2156 GEN_INT (INTVAL (op1) & 0x3ff))));
2157 }
2158 else
2159 {
2160 /* A symbol, emit in the traditional way. */
2161 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2162 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2163 }
2164 }
2165
2166 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2167 If TEMP is nonzero, we are forbidden to use any other scratch
2168 registers. Otherwise, we are allowed to generate them as needed.
2169
2170 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2171 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2172
2173 void
2174 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2175 {
2176 rtx cst, temp1, temp2, temp3, temp4, temp5;
2177 rtx ti_temp = 0;
2178
2179 /* Deal with too large offsets. */
2180 if (GET_CODE (op1) == CONST
2181 && GET_CODE (XEXP (op1, 0)) == PLUS
2182 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2183 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2184 {
2185 gcc_assert (!temp);
2186 temp1 = gen_reg_rtx (DImode);
2187 temp2 = gen_reg_rtx (DImode);
2188 sparc_emit_set_const64 (temp2, cst);
2189 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2190 NULL_RTX);
2191 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2192 return;
2193 }
2194
2195 if (temp && GET_MODE (temp) == TImode)
2196 {
2197 ti_temp = temp;
2198 temp = gen_rtx_REG (DImode, REGNO (temp));
2199 }
2200
2201 /* SPARC-V9 code-model support. */
2202 switch (sparc_cmodel)
2203 {
2204 case CM_MEDLOW:
2205 /* The range spanned by all instructions in the object is less
2206 than 2^31 bytes (2GB) and the distance from any instruction
2207 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2208 than 2^31 bytes (2GB).
2209
2210 The executable must be in the low 4TB of the virtual address
2211 space.
2212
2213 sethi %hi(symbol), %temp1
2214 or %temp1, %lo(symbol), %reg */
2215 if (temp)
2216 temp1 = temp; /* op0 is allowed. */
2217 else
2218 temp1 = gen_reg_rtx (DImode);
2219
2220 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2221 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2222 break;
2223
2224 case CM_MEDMID:
2225 /* The range spanned by all instructions in the object is less
2226 than 2^31 bytes (2GB) and the distance from any instruction
2227 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2228 than 2^31 bytes (2GB).
2229
2230 The executable must be in the low 16TB of the virtual address
2231 space.
2232
2233 sethi %h44(symbol), %temp1
2234 or %temp1, %m44(symbol), %temp2
2235 sllx %temp2, 12, %temp3
2236 or %temp3, %l44(symbol), %reg */
2237 if (temp)
2238 {
2239 temp1 = op0;
2240 temp2 = op0;
2241 temp3 = temp; /* op0 is allowed. */
2242 }
2243 else
2244 {
2245 temp1 = gen_reg_rtx (DImode);
2246 temp2 = gen_reg_rtx (DImode);
2247 temp3 = gen_reg_rtx (DImode);
2248 }
2249
2250 emit_insn (gen_seth44 (temp1, op1));
2251 emit_insn (gen_setm44 (temp2, temp1, op1));
2252 emit_insn (gen_rtx_SET (temp3,
2253 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2254 emit_insn (gen_setl44 (op0, temp3, op1));
2255 break;
2256
2257 case CM_MEDANY:
2258 /* The range spanned by all instructions in the object is less
2259 than 2^31 bytes (2GB) and the distance from any instruction
2260 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2261 than 2^31 bytes (2GB).
2262
2263 The executable can be placed anywhere in the virtual address
2264 space.
2265
2266 sethi %hh(symbol), %temp1
2267 sethi %lm(symbol), %temp2
2268 or %temp1, %hm(symbol), %temp3
2269 sllx %temp3, 32, %temp4
2270 or %temp4, %temp2, %temp5
2271 or %temp5, %lo(symbol), %reg */
2272 if (temp)
2273 {
2274 /* It is possible that one of the registers we got for operands[2]
2275 might coincide with that of operands[0] (which is why we made
2276 it TImode). Pick the other one to use as our scratch. */
2277 if (rtx_equal_p (temp, op0))
2278 {
2279 gcc_assert (ti_temp);
2280 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2281 }
2282 temp1 = op0;
2283 temp2 = temp; /* op0 is _not_ allowed, see above. */
2284 temp3 = op0;
2285 temp4 = op0;
2286 temp5 = op0;
2287 }
2288 else
2289 {
2290 temp1 = gen_reg_rtx (DImode);
2291 temp2 = gen_reg_rtx (DImode);
2292 temp3 = gen_reg_rtx (DImode);
2293 temp4 = gen_reg_rtx (DImode);
2294 temp5 = gen_reg_rtx (DImode);
2295 }
2296
2297 emit_insn (gen_sethh (temp1, op1));
2298 emit_insn (gen_setlm (temp2, op1));
2299 emit_insn (gen_sethm (temp3, temp1, op1));
2300 emit_insn (gen_rtx_SET (temp4,
2301 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2302 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2303 emit_insn (gen_setlo (op0, temp5, op1));
2304 break;
2305
2306 case CM_EMBMEDANY:
2307 /* Old old old backwards compatibility kruft here.
2308 Essentially it is MEDLOW with a fixed 64-bit
2309 virtual base added to all data segment addresses.
2310 Text-segment stuff is computed like MEDANY, we can't
2311 reuse the code above because the relocation knobs
2312 look different.
2313
2314 Data segment: sethi %hi(symbol), %temp1
2315 add %temp1, EMBMEDANY_BASE_REG, %temp2
2316 or %temp2, %lo(symbol), %reg */
2317 if (data_segment_operand (op1, GET_MODE (op1)))
2318 {
2319 if (temp)
2320 {
2321 temp1 = temp; /* op0 is allowed. */
2322 temp2 = op0;
2323 }
2324 else
2325 {
2326 temp1 = gen_reg_rtx (DImode);
2327 temp2 = gen_reg_rtx (DImode);
2328 }
2329
2330 emit_insn (gen_embmedany_sethi (temp1, op1));
2331 emit_insn (gen_embmedany_brsum (temp2, temp1));
2332 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2333 }
2334
2335 /* Text segment: sethi %uhi(symbol), %temp1
2336 sethi %hi(symbol), %temp2
2337 or %temp1, %ulo(symbol), %temp3
2338 sllx %temp3, 32, %temp4
2339 or %temp4, %temp2, %temp5
2340 or %temp5, %lo(symbol), %reg */
2341 else
2342 {
2343 if (temp)
2344 {
2345 /* It is possible that one of the registers we got for operands[2]
2346 might coincide with that of operands[0] (which is why we made
2347 it TImode). Pick the other one to use as our scratch. */
2348 if (rtx_equal_p (temp, op0))
2349 {
2350 gcc_assert (ti_temp);
2351 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2352 }
2353 temp1 = op0;
2354 temp2 = temp; /* op0 is _not_ allowed, see above. */
2355 temp3 = op0;
2356 temp4 = op0;
2357 temp5 = op0;
2358 }
2359 else
2360 {
2361 temp1 = gen_reg_rtx (DImode);
2362 temp2 = gen_reg_rtx (DImode);
2363 temp3 = gen_reg_rtx (DImode);
2364 temp4 = gen_reg_rtx (DImode);
2365 temp5 = gen_reg_rtx (DImode);
2366 }
2367
2368 emit_insn (gen_embmedany_textuhi (temp1, op1));
2369 emit_insn (gen_embmedany_texthi (temp2, op1));
2370 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2371 emit_insn (gen_rtx_SET (temp4,
2372 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2373 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2374 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2375 }
2376 break;
2377
2378 default:
2379 gcc_unreachable ();
2380 }
2381 }
2382
2383 /* These avoid problems when cross compiling. If we do not
2384 go through all this hair then the optimizer will see
2385 invalid REG_EQUAL notes or in some cases none at all. */
2386 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2387 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2388 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2389 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2390
2391 /* The optimizer is not to assume anything about exactly
2392 which bits are set for a HIGH, they are unspecified.
2393 Unfortunately this leads to many missed optimizations
2394 during CSE. We mask out the non-HIGH bits, and matches
2395 a plain movdi, to alleviate this problem. */
2396 static rtx
2397 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2398 {
2399 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2400 }
2401
2402 static rtx
2403 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2404 {
2405 return gen_rtx_SET (dest, GEN_INT (val));
2406 }
2407
2408 static rtx
2409 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2410 {
2411 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2412 }
2413
2414 static rtx
2415 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2416 {
2417 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2418 }
2419
2420 /* Worker routines for 64-bit constant formation on arch64.
2421 One of the key things to be doing in these emissions is
2422 to create as many temp REGs as possible. This makes it
2423 possible for half-built constants to be used later when
2424 such values are similar to something required later on.
2425 Without doing this, the optimizer cannot see such
2426 opportunities. */
2427
2428 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2429 unsigned HOST_WIDE_INT, int);
2430
2431 static void
2432 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2433 unsigned HOST_WIDE_INT low_bits, int is_neg)
2434 {
2435 unsigned HOST_WIDE_INT high_bits;
2436
2437 if (is_neg)
2438 high_bits = (~low_bits) & 0xffffffff;
2439 else
2440 high_bits = low_bits;
2441
2442 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2443 if (!is_neg)
2444 {
2445 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2446 }
2447 else
2448 {
2449 /* If we are XOR'ing with -1, then we should emit a one's complement
2450 instead. This way the combiner will notice logical operations
2451 such as ANDN later on and substitute. */
2452 if ((low_bits & 0x3ff) == 0x3ff)
2453 {
2454 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2455 }
2456 else
2457 {
2458 emit_insn (gen_rtx_SET (op0,
2459 gen_safe_XOR64 (temp,
2460 (-(HOST_WIDE_INT)0x400
2461 | (low_bits & 0x3ff)))));
2462 }
2463 }
2464 }
2465
2466 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2467 unsigned HOST_WIDE_INT, int);
2468
2469 static void
2470 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2471 unsigned HOST_WIDE_INT high_bits,
2472 unsigned HOST_WIDE_INT low_immediate,
2473 int shift_count)
2474 {
2475 rtx temp2 = op0;
2476
2477 if ((high_bits & 0xfffffc00) != 0)
2478 {
2479 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2480 if ((high_bits & ~0xfffffc00) != 0)
2481 emit_insn (gen_rtx_SET (op0,
2482 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2483 else
2484 temp2 = temp;
2485 }
2486 else
2487 {
2488 emit_insn (gen_safe_SET64 (temp, high_bits));
2489 temp2 = temp;
2490 }
2491
2492 /* Now shift it up into place. */
2493 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2494 GEN_INT (shift_count))));
2495
2496 /* If there is a low immediate part piece, finish up by
2497 putting that in as well. */
2498 if (low_immediate != 0)
2499 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2500 }
2501
2502 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2503 unsigned HOST_WIDE_INT);
2504
2505 /* Full 64-bit constant decomposition. Even though this is the
2506 'worst' case, we still optimize a few things away. */
2507 static void
2508 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2509 unsigned HOST_WIDE_INT high_bits,
2510 unsigned HOST_WIDE_INT low_bits)
2511 {
2512 rtx sub_temp = op0;
2513
2514 if (can_create_pseudo_p ())
2515 sub_temp = gen_reg_rtx (DImode);
2516
2517 if ((high_bits & 0xfffffc00) != 0)
2518 {
2519 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2520 if ((high_bits & ~0xfffffc00) != 0)
2521 emit_insn (gen_rtx_SET (sub_temp,
2522 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2523 else
2524 sub_temp = temp;
2525 }
2526 else
2527 {
2528 emit_insn (gen_safe_SET64 (temp, high_bits));
2529 sub_temp = temp;
2530 }
2531
2532 if (can_create_pseudo_p ())
2533 {
2534 rtx temp2 = gen_reg_rtx (DImode);
2535 rtx temp3 = gen_reg_rtx (DImode);
2536 rtx temp4 = gen_reg_rtx (DImode);
2537
2538 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2539 GEN_INT (32))));
2540
2541 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2542 if ((low_bits & ~0xfffffc00) != 0)
2543 {
2544 emit_insn (gen_rtx_SET (temp3,
2545 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2546 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2547 }
2548 else
2549 {
2550 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2551 }
2552 }
2553 else
2554 {
2555 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2556 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2557 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2558 int to_shift = 12;
2559
2560 /* We are in the middle of reload, so this is really
2561 painful. However we do still make an attempt to
2562 avoid emitting truly stupid code. */
2563 if (low1 != const0_rtx)
2564 {
2565 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2566 GEN_INT (to_shift))));
2567 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2568 sub_temp = op0;
2569 to_shift = 12;
2570 }
2571 else
2572 {
2573 to_shift += 12;
2574 }
2575 if (low2 != const0_rtx)
2576 {
2577 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2578 GEN_INT (to_shift))));
2579 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2580 sub_temp = op0;
2581 to_shift = 8;
2582 }
2583 else
2584 {
2585 to_shift += 8;
2586 }
2587 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2588 GEN_INT (to_shift))));
2589 if (low3 != const0_rtx)
2590 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2591 /* phew... */
2592 }
2593 }
2594
2595 /* Analyze a 64-bit constant for certain properties. */
2596 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2597 unsigned HOST_WIDE_INT,
2598 int *, int *, int *);
2599
2600 static void
2601 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2602 unsigned HOST_WIDE_INT low_bits,
2603 int *hbsp, int *lbsp, int *abbasp)
2604 {
2605 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2606 int i;
2607
2608 lowest_bit_set = highest_bit_set = -1;
2609 i = 0;
2610 do
2611 {
2612 if ((lowest_bit_set == -1)
2613 && ((low_bits >> i) & 1))
2614 lowest_bit_set = i;
2615 if ((highest_bit_set == -1)
2616 && ((high_bits >> (32 - i - 1)) & 1))
2617 highest_bit_set = (64 - i - 1);
2618 }
2619 while (++i < 32
2620 && ((highest_bit_set == -1)
2621 || (lowest_bit_set == -1)));
2622 if (i == 32)
2623 {
2624 i = 0;
2625 do
2626 {
2627 if ((lowest_bit_set == -1)
2628 && ((high_bits >> i) & 1))
2629 lowest_bit_set = i + 32;
2630 if ((highest_bit_set == -1)
2631 && ((low_bits >> (32 - i - 1)) & 1))
2632 highest_bit_set = 32 - i - 1;
2633 }
2634 while (++i < 32
2635 && ((highest_bit_set == -1)
2636 || (lowest_bit_set == -1)));
2637 }
2638 /* If there are no bits set this should have gone out
2639 as one instruction! */
2640 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2641 all_bits_between_are_set = 1;
2642 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2643 {
2644 if (i < 32)
2645 {
2646 if ((low_bits & (1 << i)) != 0)
2647 continue;
2648 }
2649 else
2650 {
2651 if ((high_bits & (1 << (i - 32))) != 0)
2652 continue;
2653 }
2654 all_bits_between_are_set = 0;
2655 break;
2656 }
2657 *hbsp = highest_bit_set;
2658 *lbsp = lowest_bit_set;
2659 *abbasp = all_bits_between_are_set;
2660 }
2661
2662 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2663
2664 static int
2665 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2666 unsigned HOST_WIDE_INT low_bits)
2667 {
2668 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2669
2670 if (high_bits == 0
2671 || high_bits == 0xffffffff)
2672 return 1;
2673
2674 analyze_64bit_constant (high_bits, low_bits,
2675 &highest_bit_set, &lowest_bit_set,
2676 &all_bits_between_are_set);
2677
2678 if ((highest_bit_set == 63
2679 || lowest_bit_set == 0)
2680 && all_bits_between_are_set != 0)
2681 return 1;
2682
2683 if ((highest_bit_set - lowest_bit_set) < 21)
2684 return 1;
2685
2686 return 0;
2687 }
2688
2689 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2690 unsigned HOST_WIDE_INT,
2691 int, int);
2692
2693 static unsigned HOST_WIDE_INT
2694 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2695 unsigned HOST_WIDE_INT low_bits,
2696 int lowest_bit_set, int shift)
2697 {
2698 HOST_WIDE_INT hi, lo;
2699
2700 if (lowest_bit_set < 32)
2701 {
2702 lo = (low_bits >> lowest_bit_set) << shift;
2703 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2704 }
2705 else
2706 {
2707 lo = 0;
2708 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2709 }
2710 gcc_assert (! (hi & lo));
2711 return (hi | lo);
2712 }
2713
2714 /* Here we are sure to be arch64 and this is an integer constant
2715 being loaded into a register. Emit the most efficient
2716 insn sequence possible. Detection of all the 1-insn cases
2717 has been done already. */
2718 static void
2719 sparc_emit_set_const64 (rtx op0, rtx op1)
2720 {
2721 unsigned HOST_WIDE_INT high_bits, low_bits;
2722 int lowest_bit_set, highest_bit_set;
2723 int all_bits_between_are_set;
2724 rtx temp = 0;
2725
2726 /* Sanity check that we know what we are working with. */
2727 gcc_assert (TARGET_ARCH64
2728 && (GET_CODE (op0) == SUBREG
2729 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2730
2731 if (! can_create_pseudo_p ())
2732 temp = op0;
2733
2734 if (GET_CODE (op1) != CONST_INT)
2735 {
2736 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2737 return;
2738 }
2739
2740 if (! temp)
2741 temp = gen_reg_rtx (DImode);
2742
2743 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2744 low_bits = (INTVAL (op1) & 0xffffffff);
2745
2746 /* low_bits bits 0 --> 31
2747 high_bits bits 32 --> 63 */
2748
2749 analyze_64bit_constant (high_bits, low_bits,
2750 &highest_bit_set, &lowest_bit_set,
2751 &all_bits_between_are_set);
2752
2753 /* First try for a 2-insn sequence. */
2754
2755 /* These situations are preferred because the optimizer can
2756 * do more things with them:
2757 * 1) mov -1, %reg
2758 * sllx %reg, shift, %reg
2759 * 2) mov -1, %reg
2760 * srlx %reg, shift, %reg
2761 * 3) mov some_small_const, %reg
2762 * sllx %reg, shift, %reg
2763 */
2764 if (((highest_bit_set == 63
2765 || lowest_bit_set == 0)
2766 && all_bits_between_are_set != 0)
2767 || ((highest_bit_set - lowest_bit_set) < 12))
2768 {
2769 HOST_WIDE_INT the_const = -1;
2770 int shift = lowest_bit_set;
2771
2772 if ((highest_bit_set != 63
2773 && lowest_bit_set != 0)
2774 || all_bits_between_are_set == 0)
2775 {
2776 the_const =
2777 create_simple_focus_bits (high_bits, low_bits,
2778 lowest_bit_set, 0);
2779 }
2780 else if (lowest_bit_set == 0)
2781 shift = -(63 - highest_bit_set);
2782
2783 gcc_assert (SPARC_SIMM13_P (the_const));
2784 gcc_assert (shift != 0);
2785
2786 emit_insn (gen_safe_SET64 (temp, the_const));
2787 if (shift > 0)
2788 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2789 GEN_INT (shift))));
2790 else if (shift < 0)
2791 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2792 GEN_INT (-shift))));
2793 return;
2794 }
2795
2796 /* Now a range of 22 or less bits set somewhere.
2797 * 1) sethi %hi(focus_bits), %reg
2798 * sllx %reg, shift, %reg
2799 * 2) sethi %hi(focus_bits), %reg
2800 * srlx %reg, shift, %reg
2801 */
2802 if ((highest_bit_set - lowest_bit_set) < 21)
2803 {
2804 unsigned HOST_WIDE_INT focus_bits =
2805 create_simple_focus_bits (high_bits, low_bits,
2806 lowest_bit_set, 10);
2807
2808 gcc_assert (SPARC_SETHI_P (focus_bits));
2809 gcc_assert (lowest_bit_set != 10);
2810
2811 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2812
2813 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2814 if (lowest_bit_set < 10)
2815 emit_insn (gen_rtx_SET (op0,
2816 gen_rtx_LSHIFTRT (DImode, temp,
2817 GEN_INT (10 - lowest_bit_set))));
2818 else if (lowest_bit_set > 10)
2819 emit_insn (gen_rtx_SET (op0,
2820 gen_rtx_ASHIFT (DImode, temp,
2821 GEN_INT (lowest_bit_set - 10))));
2822 return;
2823 }
2824
2825 /* 1) sethi %hi(low_bits), %reg
2826 * or %reg, %lo(low_bits), %reg
2827 * 2) sethi %hi(~low_bits), %reg
2828 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2829 */
2830 if (high_bits == 0
2831 || high_bits == 0xffffffff)
2832 {
2833 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2834 (high_bits == 0xffffffff));
2835 return;
2836 }
2837
2838 /* Now, try 3-insn sequences. */
2839
2840 /* 1) sethi %hi(high_bits), %reg
2841 * or %reg, %lo(high_bits), %reg
2842 * sllx %reg, 32, %reg
2843 */
2844 if (low_bits == 0)
2845 {
2846 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2847 return;
2848 }
2849
2850 /* We may be able to do something quick
2851 when the constant is negated, so try that. */
2852 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2853 (~low_bits) & 0xfffffc00))
2854 {
2855 /* NOTE: The trailing bits get XOR'd so we need the
2856 non-negated bits, not the negated ones. */
2857 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2858
2859 if ((((~high_bits) & 0xffffffff) == 0
2860 && ((~low_bits) & 0x80000000) == 0)
2861 || (((~high_bits) & 0xffffffff) == 0xffffffff
2862 && ((~low_bits) & 0x80000000) != 0))
2863 {
2864 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2865
2866 if ((SPARC_SETHI_P (fast_int)
2867 && (~high_bits & 0xffffffff) == 0)
2868 || SPARC_SIMM13_P (fast_int))
2869 emit_insn (gen_safe_SET64 (temp, fast_int));
2870 else
2871 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2872 }
2873 else
2874 {
2875 rtx negated_const;
2876 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2877 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2878 sparc_emit_set_const64 (temp, negated_const);
2879 }
2880
2881 /* If we are XOR'ing with -1, then we should emit a one's complement
2882 instead. This way the combiner will notice logical operations
2883 such as ANDN later on and substitute. */
2884 if (trailing_bits == 0x3ff)
2885 {
2886 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2887 }
2888 else
2889 {
2890 emit_insn (gen_rtx_SET (op0,
2891 gen_safe_XOR64 (temp,
2892 (-0x400 | trailing_bits))));
2893 }
2894 return;
2895 }
2896
2897 /* 1) sethi %hi(xxx), %reg
2898 * or %reg, %lo(xxx), %reg
2899 * sllx %reg, yyy, %reg
2900 *
2901 * ??? This is just a generalized version of the low_bits==0
2902 * thing above, FIXME...
2903 */
2904 if ((highest_bit_set - lowest_bit_set) < 32)
2905 {
2906 unsigned HOST_WIDE_INT focus_bits =
2907 create_simple_focus_bits (high_bits, low_bits,
2908 lowest_bit_set, 0);
2909
2910 /* We can't get here in this state. */
2911 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2912
2913 /* So what we know is that the set bits straddle the
2914 middle of the 64-bit word. */
2915 sparc_emit_set_const64_quick2 (op0, temp,
2916 focus_bits, 0,
2917 lowest_bit_set);
2918 return;
2919 }
2920
2921 /* 1) sethi %hi(high_bits), %reg
2922 * or %reg, %lo(high_bits), %reg
2923 * sllx %reg, 32, %reg
2924 * or %reg, low_bits, %reg
2925 */
2926 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2927 {
2928 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2929 return;
2930 }
2931
2932 /* The easiest way when all else fails, is full decomposition. */
2933 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2934 }
2935
2936 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2937
2938 static bool
2939 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2940 {
2941 *p1 = SPARC_ICC_REG;
2942 *p2 = SPARC_FCC_REG;
2943 return true;
2944 }
2945
2946 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2947
2948 static unsigned int
2949 sparc_min_arithmetic_precision (void)
2950 {
2951 return 32;
2952 }
2953
2954 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2955 return the mode to be used for the comparison. For floating-point,
2956 CCFP[E]mode is used. CCNZmode should be used when the first operand
2957 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2958 processing is needed. */
2959
2960 machine_mode
2961 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2962 {
2963 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2964 {
2965 switch (op)
2966 {
2967 case EQ:
2968 case NE:
2969 case UNORDERED:
2970 case ORDERED:
2971 case UNLT:
2972 case UNLE:
2973 case UNGT:
2974 case UNGE:
2975 case UNEQ:
2976 case LTGT:
2977 return CCFPmode;
2978
2979 case LT:
2980 case LE:
2981 case GT:
2982 case GE:
2983 return CCFPEmode;
2984
2985 default:
2986 gcc_unreachable ();
2987 }
2988 }
2989 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2990 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2991 && y == const0_rtx)
2992 {
2993 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2994 return CCXNZmode;
2995 else
2996 return CCNZmode;
2997 }
2998 else
2999 {
3000 /* This is for the cmp<mode>_sne pattern. */
3001 if (GET_CODE (x) == NOT && y == constm1_rtx)
3002 {
3003 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3004 return CCXCmode;
3005 else
3006 return CCCmode;
3007 }
3008
3009 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3010 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3011 {
3012 if (GET_CODE (y) == UNSPEC
3013 && (XINT (y, 1) == UNSPEC_ADDV
3014 || XINT (y, 1) == UNSPEC_SUBV
3015 || XINT (y, 1) == UNSPEC_NEGV))
3016 return CCVmode;
3017 else
3018 return CCCmode;
3019 }
3020
3021 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3022 return CCXmode;
3023 else
3024 return CCmode;
3025 }
3026 }
3027
3028 /* Emit the compare insn and return the CC reg for a CODE comparison
3029 with operands X and Y. */
3030
3031 static rtx
3032 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3033 {
3034 machine_mode mode;
3035 rtx cc_reg;
3036
3037 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3038 return x;
3039
3040 mode = SELECT_CC_MODE (code, x, y);
3041
3042 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3043 fcc regs (cse can't tell they're really call clobbered regs and will
3044 remove a duplicate comparison even if there is an intervening function
3045 call - it will then try to reload the cc reg via an int reg which is why
3046 we need the movcc patterns). It is possible to provide the movcc
3047 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3048 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3049 to tell cse that CCFPE mode registers (even pseudos) are call
3050 clobbered. */
3051
3052 /* ??? This is an experiment. Rather than making changes to cse which may
3053 or may not be easy/clean, we do our own cse. This is possible because
3054 we will generate hard registers. Cse knows they're call clobbered (it
3055 doesn't know the same thing about pseudos). If we guess wrong, no big
3056 deal, but if we win, great! */
3057
3058 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3059 #if 1 /* experiment */
3060 {
3061 int reg;
3062 /* We cycle through the registers to ensure they're all exercised. */
3063 static int next_fcc_reg = 0;
3064 /* Previous x,y for each fcc reg. */
3065 static rtx prev_args[4][2];
3066
3067 /* Scan prev_args for x,y. */
3068 for (reg = 0; reg < 4; reg++)
3069 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3070 break;
3071 if (reg == 4)
3072 {
3073 reg = next_fcc_reg;
3074 prev_args[reg][0] = x;
3075 prev_args[reg][1] = y;
3076 next_fcc_reg = (next_fcc_reg + 1) & 3;
3077 }
3078 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3079 }
3080 #else
3081 cc_reg = gen_reg_rtx (mode);
3082 #endif /* ! experiment */
3083 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3084 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3085 else
3086 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3087
3088 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3089 will only result in an unrecognizable insn so no point in asserting. */
3090 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3091
3092 return cc_reg;
3093 }
3094
3095
3096 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3097
3098 rtx
3099 gen_compare_reg (rtx cmp)
3100 {
3101 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3102 }
3103
3104 /* This function is used for v9 only.
3105 DEST is the target of the Scc insn.
3106 CODE is the code for an Scc's comparison.
3107 X and Y are the values we compare.
3108
3109 This function is needed to turn
3110
3111 (set (reg:SI 110)
3112 (gt (reg:CCX 100 %icc)
3113 (const_int 0)))
3114 into
3115 (set (reg:SI 110)
3116 (gt:DI (reg:CCX 100 %icc)
3117 (const_int 0)))
3118
3119 IE: The instruction recognizer needs to see the mode of the comparison to
3120 find the right instruction. We could use "gt:DI" right in the
3121 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3122
3123 static int
3124 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3125 {
3126 if (! TARGET_ARCH64
3127 && (GET_MODE (x) == DImode
3128 || GET_MODE (dest) == DImode))
3129 return 0;
3130
3131 /* Try to use the movrCC insns. */
3132 if (TARGET_ARCH64
3133 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3134 && y == const0_rtx
3135 && v9_regcmp_p (compare_code))
3136 {
3137 rtx op0 = x;
3138 rtx temp;
3139
3140 /* Special case for op0 != 0. This can be done with one instruction if
3141 dest == x. */
3142
3143 if (compare_code == NE
3144 && GET_MODE (dest) == DImode
3145 && rtx_equal_p (op0, dest))
3146 {
3147 emit_insn (gen_rtx_SET (dest,
3148 gen_rtx_IF_THEN_ELSE (DImode,
3149 gen_rtx_fmt_ee (compare_code, DImode,
3150 op0, const0_rtx),
3151 const1_rtx,
3152 dest)));
3153 return 1;
3154 }
3155
3156 if (reg_overlap_mentioned_p (dest, op0))
3157 {
3158 /* Handle the case where dest == x.
3159 We "early clobber" the result. */
3160 op0 = gen_reg_rtx (GET_MODE (x));
3161 emit_move_insn (op0, x);
3162 }
3163
3164 emit_insn (gen_rtx_SET (dest, const0_rtx));
3165 if (GET_MODE (op0) != DImode)
3166 {
3167 temp = gen_reg_rtx (DImode);
3168 convert_move (temp, op0, 0);
3169 }
3170 else
3171 temp = op0;
3172 emit_insn (gen_rtx_SET (dest,
3173 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3174 gen_rtx_fmt_ee (compare_code, DImode,
3175 temp, const0_rtx),
3176 const1_rtx,
3177 dest)));
3178 return 1;
3179 }
3180 else
3181 {
3182 x = gen_compare_reg_1 (compare_code, x, y);
3183 y = const0_rtx;
3184
3185 emit_insn (gen_rtx_SET (dest, const0_rtx));
3186 emit_insn (gen_rtx_SET (dest,
3187 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3188 gen_rtx_fmt_ee (compare_code,
3189 GET_MODE (x), x, y),
3190 const1_rtx, dest)));
3191 return 1;
3192 }
3193 }
3194
3195
3196 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3197 without jumps using the addx/subx instructions. */
3198
3199 bool
3200 emit_scc_insn (rtx operands[])
3201 {
3202 rtx tem, x, y;
3203 enum rtx_code code;
3204 machine_mode mode;
3205
3206 /* The quad-word fp compare library routines all return nonzero to indicate
3207 true, which is different from the equivalent libgcc routines, so we must
3208 handle them specially here. */
3209 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3210 {
3211 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3212 GET_CODE (operands[1]));
3213 operands[2] = XEXP (operands[1], 0);
3214 operands[3] = XEXP (operands[1], 1);
3215 }
3216
3217 code = GET_CODE (operands[1]);
3218 x = operands[2];
3219 y = operands[3];
3220 mode = GET_MODE (x);
3221
3222 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3223 more applications). The exception to this is "reg != 0" which can
3224 be done in one instruction on v9 (so we do it). */
3225 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3226 {
3227 if (y != const0_rtx)
3228 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3229
3230 rtx pat = gen_rtx_SET (operands[0],
3231 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3232 x, const0_rtx));
3233
3234 /* If we can use addx/subx or addxc, add a clobber for CC. */
3235 if (mode == SImode || (code == NE && TARGET_VIS3))
3236 {
3237 rtx clobber
3238 = gen_rtx_CLOBBER (VOIDmode,
3239 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3240 SPARC_ICC_REG));
3241 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3242 }
3243
3244 emit_insn (pat);
3245 return true;
3246 }
3247
3248 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3249 if (TARGET_ARCH64
3250 && mode == DImode
3251 && !((code == LTU || code == GTU) && TARGET_VIS3)
3252 && gen_v9_scc (operands[0], code, x, y))
3253 return true;
3254
3255 /* We can do LTU and GEU using the addx/subx instructions too. And
3256 for GTU/LEU, if both operands are registers swap them and fall
3257 back to the easy case. */
3258 if (code == GTU || code == LEU)
3259 {
3260 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3261 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3262 {
3263 tem = x;
3264 x = y;
3265 y = tem;
3266 code = swap_condition (code);
3267 }
3268 }
3269
3270 if (code == LTU || code == GEU)
3271 {
3272 emit_insn (gen_rtx_SET (operands[0],
3273 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3274 gen_compare_reg_1 (code, x, y),
3275 const0_rtx)));
3276 return true;
3277 }
3278
3279 /* All the posibilities to use addx/subx based sequences has been
3280 exhausted, try for a 3 instruction sequence using v9 conditional
3281 moves. */
3282 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3283 return true;
3284
3285 /* Nope, do branches. */
3286 return false;
3287 }
3288
3289 /* Emit a conditional jump insn for the v9 architecture using comparison code
3290 CODE and jump target LABEL.
3291 This function exists to take advantage of the v9 brxx insns. */
3292
3293 static void
3294 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3295 {
3296 emit_jump_insn (gen_rtx_SET (pc_rtx,
3297 gen_rtx_IF_THEN_ELSE (VOIDmode,
3298 gen_rtx_fmt_ee (code, GET_MODE (op0),
3299 op0, const0_rtx),
3300 gen_rtx_LABEL_REF (VOIDmode, label),
3301 pc_rtx)));
3302 }
3303
3304 /* Emit a conditional jump insn for the UA2011 architecture using
3305 comparison code CODE and jump target LABEL. This function exists
3306 to take advantage of the UA2011 Compare and Branch insns. */
3307
3308 static void
3309 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3310 {
3311 rtx if_then_else;
3312
3313 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3314 gen_rtx_fmt_ee(code, GET_MODE(op0),
3315 op0, op1),
3316 gen_rtx_LABEL_REF (VOIDmode, label),
3317 pc_rtx);
3318
3319 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3320 }
3321
3322 void
3323 emit_conditional_branch_insn (rtx operands[])
3324 {
3325 /* The quad-word fp compare library routines all return nonzero to indicate
3326 true, which is different from the equivalent libgcc routines, so we must
3327 handle them specially here. */
3328 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3329 {
3330 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3331 GET_CODE (operands[0]));
3332 operands[1] = XEXP (operands[0], 0);
3333 operands[2] = XEXP (operands[0], 1);
3334 }
3335
3336 /* If we can tell early on that the comparison is against a constant
3337 that won't fit in the 5-bit signed immediate field of a cbcond,
3338 use one of the other v9 conditional branch sequences. */
3339 if (TARGET_CBCOND
3340 && GET_CODE (operands[1]) == REG
3341 && (GET_MODE (operands[1]) == SImode
3342 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3343 && (GET_CODE (operands[2]) != CONST_INT
3344 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3345 {
3346 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3347 return;
3348 }
3349
3350 if (TARGET_ARCH64 && operands[2] == const0_rtx
3351 && GET_CODE (operands[1]) == REG
3352 && GET_MODE (operands[1]) == DImode)
3353 {
3354 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3355 return;
3356 }
3357
3358 operands[1] = gen_compare_reg (operands[0]);
3359 operands[2] = const0_rtx;
3360 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3361 operands[1], operands[2]);
3362 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3363 operands[3]));
3364 }
3365
3366
3367 /* Generate a DFmode part of a hard TFmode register.
3368 REG is the TFmode hard register, LOW is 1 for the
3369 low 64bit of the register and 0 otherwise.
3370 */
3371 rtx
3372 gen_df_reg (rtx reg, int low)
3373 {
3374 int regno = REGNO (reg);
3375
3376 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3377 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3378 return gen_rtx_REG (DFmode, regno);
3379 }
3380 \f
3381 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3382 Unlike normal calls, TFmode operands are passed by reference. It is
3383 assumed that no more than 3 operands are required. */
3384
3385 static void
3386 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3387 {
3388 rtx ret_slot = NULL, arg[3], func_sym;
3389 int i;
3390
3391 /* We only expect to be called for conversions, unary, and binary ops. */
3392 gcc_assert (nargs == 2 || nargs == 3);
3393
3394 for (i = 0; i < nargs; ++i)
3395 {
3396 rtx this_arg = operands[i];
3397 rtx this_slot;
3398
3399 /* TFmode arguments and return values are passed by reference. */
3400 if (GET_MODE (this_arg) == TFmode)
3401 {
3402 int force_stack_temp;
3403
3404 force_stack_temp = 0;
3405 if (TARGET_BUGGY_QP_LIB && i == 0)
3406 force_stack_temp = 1;
3407
3408 if (GET_CODE (this_arg) == MEM
3409 && ! force_stack_temp)
3410 {
3411 tree expr = MEM_EXPR (this_arg);
3412 if (expr)
3413 mark_addressable (expr);
3414 this_arg = XEXP (this_arg, 0);
3415 }
3416 else if (CONSTANT_P (this_arg)
3417 && ! force_stack_temp)
3418 {
3419 this_slot = force_const_mem (TFmode, this_arg);
3420 this_arg = XEXP (this_slot, 0);
3421 }
3422 else
3423 {
3424 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3425
3426 /* Operand 0 is the return value. We'll copy it out later. */
3427 if (i > 0)
3428 emit_move_insn (this_slot, this_arg);
3429 else
3430 ret_slot = this_slot;
3431
3432 this_arg = XEXP (this_slot, 0);
3433 }
3434 }
3435
3436 arg[i] = this_arg;
3437 }
3438
3439 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3440
3441 if (GET_MODE (operands[0]) == TFmode)
3442 {
3443 if (nargs == 2)
3444 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3445 arg[0], GET_MODE (arg[0]),
3446 arg[1], GET_MODE (arg[1]));
3447 else
3448 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3449 arg[0], GET_MODE (arg[0]),
3450 arg[1], GET_MODE (arg[1]),
3451 arg[2], GET_MODE (arg[2]));
3452
3453 if (ret_slot)
3454 emit_move_insn (operands[0], ret_slot);
3455 }
3456 else
3457 {
3458 rtx ret;
3459
3460 gcc_assert (nargs == 2);
3461
3462 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3463 GET_MODE (operands[0]),
3464 arg[1], GET_MODE (arg[1]));
3465
3466 if (ret != operands[0])
3467 emit_move_insn (operands[0], ret);
3468 }
3469 }
3470
3471 /* Expand soft-float TFmode calls to sparc abi routines. */
3472
3473 static void
3474 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3475 {
3476 const char *func;
3477
3478 switch (code)
3479 {
3480 case PLUS:
3481 func = "_Qp_add";
3482 break;
3483 case MINUS:
3484 func = "_Qp_sub";
3485 break;
3486 case MULT:
3487 func = "_Qp_mul";
3488 break;
3489 case DIV:
3490 func = "_Qp_div";
3491 break;
3492 default:
3493 gcc_unreachable ();
3494 }
3495
3496 emit_soft_tfmode_libcall (func, 3, operands);
3497 }
3498
3499 static void
3500 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3501 {
3502 const char *func;
3503
3504 gcc_assert (code == SQRT);
3505 func = "_Qp_sqrt";
3506
3507 emit_soft_tfmode_libcall (func, 2, operands);
3508 }
3509
3510 static void
3511 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3512 {
3513 const char *func;
3514
3515 switch (code)
3516 {
3517 case FLOAT_EXTEND:
3518 switch (GET_MODE (operands[1]))
3519 {
3520 case E_SFmode:
3521 func = "_Qp_stoq";
3522 break;
3523 case E_DFmode:
3524 func = "_Qp_dtoq";
3525 break;
3526 default:
3527 gcc_unreachable ();
3528 }
3529 break;
3530
3531 case FLOAT_TRUNCATE:
3532 switch (GET_MODE (operands[0]))
3533 {
3534 case E_SFmode:
3535 func = "_Qp_qtos";
3536 break;
3537 case E_DFmode:
3538 func = "_Qp_qtod";
3539 break;
3540 default:
3541 gcc_unreachable ();
3542 }
3543 break;
3544
3545 case FLOAT:
3546 switch (GET_MODE (operands[1]))
3547 {
3548 case E_SImode:
3549 func = "_Qp_itoq";
3550 if (TARGET_ARCH64)
3551 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3552 break;
3553 case E_DImode:
3554 func = "_Qp_xtoq";
3555 break;
3556 default:
3557 gcc_unreachable ();
3558 }
3559 break;
3560
3561 case UNSIGNED_FLOAT:
3562 switch (GET_MODE (operands[1]))
3563 {
3564 case E_SImode:
3565 func = "_Qp_uitoq";
3566 if (TARGET_ARCH64)
3567 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3568 break;
3569 case E_DImode:
3570 func = "_Qp_uxtoq";
3571 break;
3572 default:
3573 gcc_unreachable ();
3574 }
3575 break;
3576
3577 case FIX:
3578 switch (GET_MODE (operands[0]))
3579 {
3580 case E_SImode:
3581 func = "_Qp_qtoi";
3582 break;
3583 case E_DImode:
3584 func = "_Qp_qtox";
3585 break;
3586 default:
3587 gcc_unreachable ();
3588 }
3589 break;
3590
3591 case UNSIGNED_FIX:
3592 switch (GET_MODE (operands[0]))
3593 {
3594 case E_SImode:
3595 func = "_Qp_qtoui";
3596 break;
3597 case E_DImode:
3598 func = "_Qp_qtoux";
3599 break;
3600 default:
3601 gcc_unreachable ();
3602 }
3603 break;
3604
3605 default:
3606 gcc_unreachable ();
3607 }
3608
3609 emit_soft_tfmode_libcall (func, 2, operands);
3610 }
3611
3612 /* Expand a hard-float tfmode operation. All arguments must be in
3613 registers. */
3614
3615 static void
3616 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3617 {
3618 rtx op, dest;
3619
3620 if (GET_RTX_CLASS (code) == RTX_UNARY)
3621 {
3622 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3623 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3624 }
3625 else
3626 {
3627 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3628 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3629 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3630 operands[1], operands[2]);
3631 }
3632
3633 if (register_operand (operands[0], VOIDmode))
3634 dest = operands[0];
3635 else
3636 dest = gen_reg_rtx (GET_MODE (operands[0]));
3637
3638 emit_insn (gen_rtx_SET (dest, op));
3639
3640 if (dest != operands[0])
3641 emit_move_insn (operands[0], dest);
3642 }
3643
3644 void
3645 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3646 {
3647 if (TARGET_HARD_QUAD)
3648 emit_hard_tfmode_operation (code, operands);
3649 else
3650 emit_soft_tfmode_binop (code, operands);
3651 }
3652
3653 void
3654 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3655 {
3656 if (TARGET_HARD_QUAD)
3657 emit_hard_tfmode_operation (code, operands);
3658 else
3659 emit_soft_tfmode_unop (code, operands);
3660 }
3661
3662 void
3663 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3664 {
3665 if (TARGET_HARD_QUAD)
3666 emit_hard_tfmode_operation (code, operands);
3667 else
3668 emit_soft_tfmode_cvt (code, operands);
3669 }
3670 \f
3671 /* Return nonzero if a branch/jump/call instruction will be emitting
3672 nop into its delay slot. */
3673
3674 int
3675 empty_delay_slot (rtx_insn *insn)
3676 {
3677 rtx seq;
3678
3679 /* If no previous instruction (should not happen), return true. */
3680 if (PREV_INSN (insn) == NULL)
3681 return 1;
3682
3683 seq = NEXT_INSN (PREV_INSN (insn));
3684 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3685 return 0;
3686
3687 return 1;
3688 }
3689
3690 /* Return nonzero if we should emit a nop after a cbcond instruction.
3691 The cbcond instruction does not have a delay slot, however there is
3692 a severe performance penalty if a control transfer appears right
3693 after a cbcond. Therefore we emit a nop when we detect this
3694 situation. */
3695
3696 int
3697 emit_cbcond_nop (rtx_insn *insn)
3698 {
3699 rtx next = next_active_insn (insn);
3700
3701 if (!next)
3702 return 1;
3703
3704 if (NONJUMP_INSN_P (next)
3705 && GET_CODE (PATTERN (next)) == SEQUENCE)
3706 next = XVECEXP (PATTERN (next), 0, 0);
3707 else if (CALL_P (next)
3708 && GET_CODE (PATTERN (next)) == PARALLEL)
3709 {
3710 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3711
3712 if (GET_CODE (delay) == RETURN)
3713 {
3714 /* It's a sibling call. Do not emit the nop if we're going
3715 to emit something other than the jump itself as the first
3716 instruction of the sibcall sequence. */
3717 if (sparc_leaf_function_p || TARGET_FLAT)
3718 return 0;
3719 }
3720 }
3721
3722 if (NONJUMP_INSN_P (next))
3723 return 0;
3724
3725 return 1;
3726 }
3727
3728 /* Return nonzero if TRIAL can go into the call delay slot. */
3729
3730 int
3731 eligible_for_call_delay (rtx_insn *trial)
3732 {
3733 rtx pat;
3734
3735 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3736 return 0;
3737
3738 /* Binutils allows
3739 call __tls_get_addr, %tgd_call (foo)
3740 add %l7, %o0, %o0, %tgd_add (foo)
3741 while Sun as/ld does not. */
3742 if (TARGET_GNU_TLS || !TARGET_TLS)
3743 return 1;
3744
3745 pat = PATTERN (trial);
3746
3747 /* We must reject tgd_add{32|64}, i.e.
3748 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3749 and tldm_add{32|64}, i.e.
3750 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3751 for Sun as/ld. */
3752 if (GET_CODE (pat) == SET
3753 && GET_CODE (SET_SRC (pat)) == PLUS)
3754 {
3755 rtx unspec = XEXP (SET_SRC (pat), 1);
3756
3757 if (GET_CODE (unspec) == UNSPEC
3758 && (XINT (unspec, 1) == UNSPEC_TLSGD
3759 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3760 return 0;
3761 }
3762
3763 return 1;
3764 }
3765
3766 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3767 instruction. RETURN_P is true if the v9 variant 'return' is to be
3768 considered in the test too.
3769
3770 TRIAL must be a SET whose destination is a REG appropriate for the
3771 'restore' instruction or, if RETURN_P is true, for the 'return'
3772 instruction. */
3773
3774 static int
3775 eligible_for_restore_insn (rtx trial, bool return_p)
3776 {
3777 rtx pat = PATTERN (trial);
3778 rtx src = SET_SRC (pat);
3779 bool src_is_freg = false;
3780 rtx src_reg;
3781
3782 /* Since we now can do moves between float and integer registers when
3783 VIS3 is enabled, we have to catch this case. We can allow such
3784 moves when doing a 'return' however. */
3785 src_reg = src;
3786 if (GET_CODE (src_reg) == SUBREG)
3787 src_reg = SUBREG_REG (src_reg);
3788 if (GET_CODE (src_reg) == REG
3789 && SPARC_FP_REG_P (REGNO (src_reg)))
3790 src_is_freg = true;
3791
3792 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3793 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3794 && arith_operand (src, GET_MODE (src))
3795 && ! src_is_freg)
3796 {
3797 if (TARGET_ARCH64)
3798 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3799 else
3800 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3801 }
3802
3803 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3804 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3805 && arith_double_operand (src, GET_MODE (src))
3806 && ! src_is_freg)
3807 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3808
3809 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3810 else if (! TARGET_FPU && register_operand (src, SFmode))
3811 return 1;
3812
3813 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3814 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3815 return 1;
3816
3817 /* If we have the 'return' instruction, anything that does not use
3818 local or output registers and can go into a delay slot wins. */
3819 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3820 return 1;
3821
3822 /* The 'restore src1,src2,dest' pattern for SImode. */
3823 else if (GET_CODE (src) == PLUS
3824 && register_operand (XEXP (src, 0), SImode)
3825 && arith_operand (XEXP (src, 1), SImode))
3826 return 1;
3827
3828 /* The 'restore src1,src2,dest' pattern for DImode. */
3829 else if (GET_CODE (src) == PLUS
3830 && register_operand (XEXP (src, 0), DImode)
3831 && arith_double_operand (XEXP (src, 1), DImode))
3832 return 1;
3833
3834 /* The 'restore src1,%lo(src2),dest' pattern. */
3835 else if (GET_CODE (src) == LO_SUM
3836 && ! TARGET_CM_MEDMID
3837 && ((register_operand (XEXP (src, 0), SImode)
3838 && immediate_operand (XEXP (src, 1), SImode))
3839 || (TARGET_ARCH64
3840 && register_operand (XEXP (src, 0), DImode)
3841 && immediate_operand (XEXP (src, 1), DImode))))
3842 return 1;
3843
3844 /* The 'restore src,src,dest' pattern. */
3845 else if (GET_CODE (src) == ASHIFT
3846 && (register_operand (XEXP (src, 0), SImode)
3847 || register_operand (XEXP (src, 0), DImode))
3848 && XEXP (src, 1) == const1_rtx)
3849 return 1;
3850
3851 return 0;
3852 }
3853
3854 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3855
3856 int
3857 eligible_for_return_delay (rtx_insn *trial)
3858 {
3859 int regno;
3860 rtx pat;
3861
3862 /* If the function uses __builtin_eh_return, the eh_return machinery
3863 occupies the delay slot. */
3864 if (crtl->calls_eh_return)
3865 return 0;
3866
3867 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3868 return 0;
3869
3870 /* In the case of a leaf or flat function, anything can go into the slot. */
3871 if (sparc_leaf_function_p || TARGET_FLAT)
3872 return 1;
3873
3874 if (!NONJUMP_INSN_P (trial))
3875 return 0;
3876
3877 pat = PATTERN (trial);
3878 if (GET_CODE (pat) == PARALLEL)
3879 {
3880 int i;
3881
3882 if (! TARGET_V9)
3883 return 0;
3884 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3885 {
3886 rtx expr = XVECEXP (pat, 0, i);
3887 if (GET_CODE (expr) != SET)
3888 return 0;
3889 if (GET_CODE (SET_DEST (expr)) != REG)
3890 return 0;
3891 regno = REGNO (SET_DEST (expr));
3892 if (regno >= 8 && regno < 24)
3893 return 0;
3894 }
3895 return !epilogue_renumber (&pat, 1);
3896 }
3897
3898 if (GET_CODE (pat) != SET)
3899 return 0;
3900
3901 if (GET_CODE (SET_DEST (pat)) != REG)
3902 return 0;
3903
3904 regno = REGNO (SET_DEST (pat));
3905
3906 /* Otherwise, only operations which can be done in tandem with
3907 a `restore' or `return' insn can go into the delay slot. */
3908 if (regno >= 8 && regno < 24)
3909 return 0;
3910
3911 /* If this instruction sets up floating point register and we have a return
3912 instruction, it can probably go in. But restore will not work
3913 with FP_REGS. */
3914 if (! SPARC_INT_REG_P (regno))
3915 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3916
3917 return eligible_for_restore_insn (trial, true);
3918 }
3919
3920 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3921
3922 int
3923 eligible_for_sibcall_delay (rtx_insn *trial)
3924 {
3925 rtx pat;
3926
3927 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3928 return 0;
3929
3930 if (!NONJUMP_INSN_P (trial))
3931 return 0;
3932
3933 pat = PATTERN (trial);
3934
3935 if (sparc_leaf_function_p || TARGET_FLAT)
3936 {
3937 /* If the tail call is done using the call instruction,
3938 we have to restore %o7 in the delay slot. */
3939 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3940 return 0;
3941
3942 /* %g1 is used to build the function address */
3943 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3944 return 0;
3945
3946 return 1;
3947 }
3948
3949 if (GET_CODE (pat) != SET)
3950 return 0;
3951
3952 /* Otherwise, only operations which can be done in tandem with
3953 a `restore' insn can go into the delay slot. */
3954 if (GET_CODE (SET_DEST (pat)) != REG
3955 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3956 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3957 return 0;
3958
3959 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3960 in most cases. */
3961 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3962 return 0;
3963
3964 return eligible_for_restore_insn (trial, false);
3965 }
3966 \f
3967 /* Determine if it's legal to put X into the constant pool. This
3968 is not possible if X contains the address of a symbol that is
3969 not constant (TLS) or not known at final link time (PIC). */
3970
3971 static bool
3972 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3973 {
3974 switch (GET_CODE (x))
3975 {
3976 case CONST_INT:
3977 case CONST_WIDE_INT:
3978 case CONST_DOUBLE:
3979 case CONST_VECTOR:
3980 /* Accept all non-symbolic constants. */
3981 return false;
3982
3983 case LABEL_REF:
3984 /* Labels are OK iff we are non-PIC. */
3985 return flag_pic != 0;
3986
3987 case SYMBOL_REF:
3988 /* 'Naked' TLS symbol references are never OK,
3989 non-TLS symbols are OK iff we are non-PIC. */
3990 if (SYMBOL_REF_TLS_MODEL (x))
3991 return true;
3992 else
3993 return flag_pic != 0;
3994
3995 case CONST:
3996 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3997 case PLUS:
3998 case MINUS:
3999 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4000 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4001 case UNSPEC:
4002 return true;
4003 default:
4004 gcc_unreachable ();
4005 }
4006 }
4007 \f
4008 /* Global Offset Table support. */
4009 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4010 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4011
4012 /* Return the SYMBOL_REF for the Global Offset Table. */
4013
4014 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4015
4016 static rtx
4017 sparc_got (void)
4018 {
4019 if (!sparc_got_symbol)
4020 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4021
4022 return sparc_got_symbol;
4023 }
4024
4025 /* Ensure that we are not using patterns that are not OK with PIC. */
4026
4027 int
4028 check_pic (int i)
4029 {
4030 rtx op;
4031
4032 switch (flag_pic)
4033 {
4034 case 1:
4035 op = recog_data.operand[i];
4036 gcc_assert (GET_CODE (op) != SYMBOL_REF
4037 && (GET_CODE (op) != CONST
4038 || (GET_CODE (XEXP (op, 0)) == MINUS
4039 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4040 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4041 /* fallthrough */
4042 case 2:
4043 default:
4044 return 1;
4045 }
4046 }
4047
4048 /* Return true if X is an address which needs a temporary register when
4049 reloaded while generating PIC code. */
4050
4051 int
4052 pic_address_needs_scratch (rtx x)
4053 {
4054 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4055 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4056 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4058 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4059 return 1;
4060
4061 return 0;
4062 }
4063
4064 /* Determine if a given RTX is a valid constant. We already know this
4065 satisfies CONSTANT_P. */
4066
4067 static bool
4068 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4069 {
4070 switch (GET_CODE (x))
4071 {
4072 case CONST:
4073 case SYMBOL_REF:
4074 if (sparc_tls_referenced_p (x))
4075 return false;
4076 break;
4077
4078 case CONST_DOUBLE:
4079 /* Floating point constants are generally not ok.
4080 The only exception is 0.0 and all-ones in VIS. */
4081 if (TARGET_VIS
4082 && SCALAR_FLOAT_MODE_P (mode)
4083 && (const_zero_operand (x, mode)
4084 || const_all_ones_operand (x, mode)))
4085 return true;
4086
4087 return false;
4088
4089 case CONST_VECTOR:
4090 /* Vector constants are generally not ok.
4091 The only exception is 0 or -1 in VIS. */
4092 if (TARGET_VIS
4093 && (const_zero_operand (x, mode)
4094 || const_all_ones_operand (x, mode)))
4095 return true;
4096
4097 return false;
4098
4099 default:
4100 break;
4101 }
4102
4103 return true;
4104 }
4105
4106 /* Determine if a given RTX is a valid constant address. */
4107
4108 bool
4109 constant_address_p (rtx x)
4110 {
4111 switch (GET_CODE (x))
4112 {
4113 case LABEL_REF:
4114 case CONST_INT:
4115 case HIGH:
4116 return true;
4117
4118 case CONST:
4119 if (flag_pic && pic_address_needs_scratch (x))
4120 return false;
4121 return sparc_legitimate_constant_p (Pmode, x);
4122
4123 case SYMBOL_REF:
4124 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4125
4126 default:
4127 return false;
4128 }
4129 }
4130
4131 /* Nonzero if the constant value X is a legitimate general operand
4132 when generating PIC code. It is given that flag_pic is on and
4133 that X satisfies CONSTANT_P. */
4134
4135 bool
4136 legitimate_pic_operand_p (rtx x)
4137 {
4138 if (pic_address_needs_scratch (x))
4139 return false;
4140 if (sparc_tls_referenced_p (x))
4141 return false;
4142 return true;
4143 }
4144
4145 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4146 (CONST_INT_P (X) \
4147 && INTVAL (X) >= -0x1000 \
4148 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4149
4150 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4151 (CONST_INT_P (X) \
4152 && INTVAL (X) >= -0x1000 \
4153 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4154
4155 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4156
4157 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4158 ordinarily. This changes a bit when generating PIC. */
4159
4160 static bool
4161 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4162 {
4163 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4164
4165 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4166 rs1 = addr;
4167 else if (GET_CODE (addr) == PLUS)
4168 {
4169 rs1 = XEXP (addr, 0);
4170 rs2 = XEXP (addr, 1);
4171
4172 /* Canonicalize. REG comes first, if there are no regs,
4173 LO_SUM comes first. */
4174 if (!REG_P (rs1)
4175 && GET_CODE (rs1) != SUBREG
4176 && (REG_P (rs2)
4177 || GET_CODE (rs2) == SUBREG
4178 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4179 {
4180 rs1 = XEXP (addr, 1);
4181 rs2 = XEXP (addr, 0);
4182 }
4183
4184 if ((flag_pic == 1
4185 && rs1 == pic_offset_table_rtx
4186 && !REG_P (rs2)
4187 && GET_CODE (rs2) != SUBREG
4188 && GET_CODE (rs2) != LO_SUM
4189 && GET_CODE (rs2) != MEM
4190 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4191 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4192 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4193 || ((REG_P (rs1)
4194 || GET_CODE (rs1) == SUBREG)
4195 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4196 {
4197 imm1 = rs2;
4198 rs2 = NULL;
4199 }
4200 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4201 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4202 {
4203 /* We prohibit REG + REG for TFmode when there are no quad move insns
4204 and we consequently need to split. We do this because REG+REG
4205 is not an offsettable address. If we get the situation in reload
4206 where source and destination of a movtf pattern are both MEMs with
4207 REG+REG address, then only one of them gets converted to an
4208 offsettable address. */
4209 if (mode == TFmode
4210 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4211 return 0;
4212
4213 /* Likewise for TImode, but in all cases. */
4214 if (mode == TImode)
4215 return 0;
4216
4217 /* We prohibit REG + REG on ARCH32 if not optimizing for
4218 DFmode/DImode because then mem_min_alignment is likely to be zero
4219 after reload and the forced split would lack a matching splitter
4220 pattern. */
4221 if (TARGET_ARCH32 && !optimize
4222 && (mode == DFmode || mode == DImode))
4223 return 0;
4224 }
4225 else if (USE_AS_OFFSETABLE_LO10
4226 && GET_CODE (rs1) == LO_SUM
4227 && TARGET_ARCH64
4228 && ! TARGET_CM_MEDMID
4229 && RTX_OK_FOR_OLO10_P (rs2, mode))
4230 {
4231 rs2 = NULL;
4232 imm1 = XEXP (rs1, 1);
4233 rs1 = XEXP (rs1, 0);
4234 if (!CONSTANT_P (imm1)
4235 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4236 return 0;
4237 }
4238 }
4239 else if (GET_CODE (addr) == LO_SUM)
4240 {
4241 rs1 = XEXP (addr, 0);
4242 imm1 = XEXP (addr, 1);
4243
4244 if (!CONSTANT_P (imm1)
4245 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4246 return 0;
4247
4248 /* We can't allow TFmode in 32-bit mode, because an offset greater
4249 than the alignment (8) may cause the LO_SUM to overflow. */
4250 if (mode == TFmode && TARGET_ARCH32)
4251 return 0;
4252
4253 /* During reload, accept the HIGH+LO_SUM construct generated by
4254 sparc_legitimize_reload_address. */
4255 if (reload_in_progress
4256 && GET_CODE (rs1) == HIGH
4257 && XEXP (rs1, 0) == imm1)
4258 return 1;
4259 }
4260 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4261 return 1;
4262 else
4263 return 0;
4264
4265 if (GET_CODE (rs1) == SUBREG)
4266 rs1 = SUBREG_REG (rs1);
4267 if (!REG_P (rs1))
4268 return 0;
4269
4270 if (rs2)
4271 {
4272 if (GET_CODE (rs2) == SUBREG)
4273 rs2 = SUBREG_REG (rs2);
4274 if (!REG_P (rs2))
4275 return 0;
4276 }
4277
4278 if (strict)
4279 {
4280 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4281 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4282 return 0;
4283 }
4284 else
4285 {
4286 if ((! SPARC_INT_REG_P (REGNO (rs1))
4287 && REGNO (rs1) != FRAME_POINTER_REGNUM
4288 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4289 || (rs2
4290 && (! SPARC_INT_REG_P (REGNO (rs2))
4291 && REGNO (rs2) != FRAME_POINTER_REGNUM
4292 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4293 return 0;
4294 }
4295 return 1;
4296 }
4297
4298 /* Return the SYMBOL_REF for the tls_get_addr function. */
4299
4300 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4301
4302 static rtx
4303 sparc_tls_get_addr (void)
4304 {
4305 if (!sparc_tls_symbol)
4306 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4307
4308 return sparc_tls_symbol;
4309 }
4310
4311 /* Return the Global Offset Table to be used in TLS mode. */
4312
4313 static rtx
4314 sparc_tls_got (void)
4315 {
4316 /* In PIC mode, this is just the PIC offset table. */
4317 if (flag_pic)
4318 {
4319 crtl->uses_pic_offset_table = 1;
4320 return pic_offset_table_rtx;
4321 }
4322
4323 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4324 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4325 if (TARGET_SUN_TLS && TARGET_ARCH32)
4326 {
4327 load_got_register ();
4328 return global_offset_table_rtx;
4329 }
4330
4331 /* In all other cases, we load a new pseudo with the GOT symbol. */
4332 return copy_to_reg (sparc_got ());
4333 }
4334
4335 /* Return true if X contains a thread-local symbol. */
4336
4337 static bool
4338 sparc_tls_referenced_p (rtx x)
4339 {
4340 if (!TARGET_HAVE_TLS)
4341 return false;
4342
4343 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4344 x = XEXP (XEXP (x, 0), 0);
4345
4346 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4347 return true;
4348
4349 /* That's all we handle in sparc_legitimize_tls_address for now. */
4350 return false;
4351 }
4352
4353 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4354 this (thread-local) address. */
4355
4356 static rtx
4357 sparc_legitimize_tls_address (rtx addr)
4358 {
4359 rtx temp1, temp2, temp3, ret, o0, got;
4360 rtx_insn *insn;
4361
4362 gcc_assert (can_create_pseudo_p ());
4363
4364 if (GET_CODE (addr) == SYMBOL_REF)
4365 switch (SYMBOL_REF_TLS_MODEL (addr))
4366 {
4367 case TLS_MODEL_GLOBAL_DYNAMIC:
4368 start_sequence ();
4369 temp1 = gen_reg_rtx (SImode);
4370 temp2 = gen_reg_rtx (SImode);
4371 ret = gen_reg_rtx (Pmode);
4372 o0 = gen_rtx_REG (Pmode, 8);
4373 got = sparc_tls_got ();
4374 emit_insn (gen_tgd_hi22 (temp1, addr));
4375 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4376 if (TARGET_ARCH32)
4377 {
4378 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4379 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4380 addr, const1_rtx));
4381 }
4382 else
4383 {
4384 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4385 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4386 addr, const1_rtx));
4387 }
4388 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4389 insn = get_insns ();
4390 end_sequence ();
4391 emit_libcall_block (insn, ret, o0, addr);
4392 break;
4393
4394 case TLS_MODEL_LOCAL_DYNAMIC:
4395 start_sequence ();
4396 temp1 = gen_reg_rtx (SImode);
4397 temp2 = gen_reg_rtx (SImode);
4398 temp3 = gen_reg_rtx (Pmode);
4399 ret = gen_reg_rtx (Pmode);
4400 o0 = gen_rtx_REG (Pmode, 8);
4401 got = sparc_tls_got ();
4402 emit_insn (gen_tldm_hi22 (temp1));
4403 emit_insn (gen_tldm_lo10 (temp2, temp1));
4404 if (TARGET_ARCH32)
4405 {
4406 emit_insn (gen_tldm_add32 (o0, got, temp2));
4407 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4408 const1_rtx));
4409 }
4410 else
4411 {
4412 emit_insn (gen_tldm_add64 (o0, got, temp2));
4413 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4414 const1_rtx));
4415 }
4416 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4417 insn = get_insns ();
4418 end_sequence ();
4419 emit_libcall_block (insn, temp3, o0,
4420 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4421 UNSPEC_TLSLD_BASE));
4422 temp1 = gen_reg_rtx (SImode);
4423 temp2 = gen_reg_rtx (SImode);
4424 emit_insn (gen_tldo_hix22 (temp1, addr));
4425 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4426 if (TARGET_ARCH32)
4427 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4428 else
4429 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4430 break;
4431
4432 case TLS_MODEL_INITIAL_EXEC:
4433 temp1 = gen_reg_rtx (SImode);
4434 temp2 = gen_reg_rtx (SImode);
4435 temp3 = gen_reg_rtx (Pmode);
4436 got = sparc_tls_got ();
4437 emit_insn (gen_tie_hi22 (temp1, addr));
4438 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4439 if (TARGET_ARCH32)
4440 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4441 else
4442 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4443 if (TARGET_SUN_TLS)
4444 {
4445 ret = gen_reg_rtx (Pmode);
4446 if (TARGET_ARCH32)
4447 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4448 temp3, addr));
4449 else
4450 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4451 temp3, addr));
4452 }
4453 else
4454 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4455 break;
4456
4457 case TLS_MODEL_LOCAL_EXEC:
4458 temp1 = gen_reg_rtx (Pmode);
4459 temp2 = gen_reg_rtx (Pmode);
4460 if (TARGET_ARCH32)
4461 {
4462 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4463 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4464 }
4465 else
4466 {
4467 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4468 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4469 }
4470 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4471 break;
4472
4473 default:
4474 gcc_unreachable ();
4475 }
4476
4477 else if (GET_CODE (addr) == CONST)
4478 {
4479 rtx base, offset;
4480
4481 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4482
4483 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4484 offset = XEXP (XEXP (addr, 0), 1);
4485
4486 base = force_operand (base, NULL_RTX);
4487 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4488 offset = force_reg (Pmode, offset);
4489 ret = gen_rtx_PLUS (Pmode, base, offset);
4490 }
4491
4492 else
4493 gcc_unreachable (); /* for now ... */
4494
4495 return ret;
4496 }
4497
4498 /* Legitimize PIC addresses. If the address is already position-independent,
4499 we return ORIG. Newly generated position-independent addresses go into a
4500 reg. This is REG if nonzero, otherwise we allocate register(s) as
4501 necessary. */
4502
4503 static rtx
4504 sparc_legitimize_pic_address (rtx orig, rtx reg)
4505 {
4506 bool gotdata_op = false;
4507
4508 if (GET_CODE (orig) == SYMBOL_REF
4509 /* See the comment in sparc_expand_move. */
4510 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4511 {
4512 rtx pic_ref, address;
4513 rtx_insn *insn;
4514
4515 if (reg == 0)
4516 {
4517 gcc_assert (can_create_pseudo_p ());
4518 reg = gen_reg_rtx (Pmode);
4519 }
4520
4521 if (flag_pic == 2)
4522 {
4523 /* If not during reload, allocate another temp reg here for loading
4524 in the address, so that these instructions can be optimized
4525 properly. */
4526 rtx temp_reg = (! can_create_pseudo_p ()
4527 ? reg : gen_reg_rtx (Pmode));
4528
4529 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4530 won't get confused into thinking that these two instructions
4531 are loading in the true address of the symbol. If in the
4532 future a PIC rtx exists, that should be used instead. */
4533 if (TARGET_ARCH64)
4534 {
4535 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4536 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4537 }
4538 else
4539 {
4540 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4541 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4542 }
4543 address = temp_reg;
4544 gotdata_op = true;
4545 }
4546 else
4547 address = orig;
4548
4549 crtl->uses_pic_offset_table = 1;
4550 if (gotdata_op)
4551 {
4552 if (TARGET_ARCH64)
4553 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4554 pic_offset_table_rtx,
4555 address, orig));
4556 else
4557 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4558 pic_offset_table_rtx,
4559 address, orig));
4560 }
4561 else
4562 {
4563 pic_ref
4564 = gen_const_mem (Pmode,
4565 gen_rtx_PLUS (Pmode,
4566 pic_offset_table_rtx, address));
4567 insn = emit_move_insn (reg, pic_ref);
4568 }
4569
4570 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4571 by loop. */
4572 set_unique_reg_note (insn, REG_EQUAL, orig);
4573 return reg;
4574 }
4575 else if (GET_CODE (orig) == CONST)
4576 {
4577 rtx base, offset;
4578
4579 if (GET_CODE (XEXP (orig, 0)) == PLUS
4580 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4581 return orig;
4582
4583 if (reg == 0)
4584 {
4585 gcc_assert (can_create_pseudo_p ());
4586 reg = gen_reg_rtx (Pmode);
4587 }
4588
4589 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4590 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4591 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4592 base == reg ? NULL_RTX : reg);
4593
4594 if (GET_CODE (offset) == CONST_INT)
4595 {
4596 if (SMALL_INT (offset))
4597 return plus_constant (Pmode, base, INTVAL (offset));
4598 else if (can_create_pseudo_p ())
4599 offset = force_reg (Pmode, offset);
4600 else
4601 /* If we reach here, then something is seriously wrong. */
4602 gcc_unreachable ();
4603 }
4604 return gen_rtx_PLUS (Pmode, base, offset);
4605 }
4606 else if (GET_CODE (orig) == LABEL_REF)
4607 /* ??? We ought to be checking that the register is live instead, in case
4608 it is eliminated. */
4609 crtl->uses_pic_offset_table = 1;
4610
4611 return orig;
4612 }
4613
4614 /* Try machine-dependent ways of modifying an illegitimate address X
4615 to be legitimate. If we find one, return the new, valid address.
4616
4617 OLDX is the address as it was before break_out_memory_refs was called.
4618 In some cases it is useful to look at this to decide what needs to be done.
4619
4620 MODE is the mode of the operand pointed to by X.
4621
4622 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4623
4624 static rtx
4625 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4626 machine_mode mode)
4627 {
4628 rtx orig_x = x;
4629
4630 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4631 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4632 force_operand (XEXP (x, 0), NULL_RTX));
4633 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4634 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4635 force_operand (XEXP (x, 1), NULL_RTX));
4636 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4637 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4638 XEXP (x, 1));
4639 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4640 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4641 force_operand (XEXP (x, 1), NULL_RTX));
4642
4643 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4644 return x;
4645
4646 if (sparc_tls_referenced_p (x))
4647 x = sparc_legitimize_tls_address (x);
4648 else if (flag_pic)
4649 x = sparc_legitimize_pic_address (x, NULL_RTX);
4650 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4651 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4652 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4653 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4654 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4655 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4656 else if (GET_CODE (x) == SYMBOL_REF
4657 || GET_CODE (x) == CONST
4658 || GET_CODE (x) == LABEL_REF)
4659 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4660
4661 return x;
4662 }
4663
4664 /* Delegitimize an address that was legitimized by the above function. */
4665
4666 static rtx
4667 sparc_delegitimize_address (rtx x)
4668 {
4669 x = delegitimize_mem_from_attrs (x);
4670
4671 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4672 switch (XINT (XEXP (x, 1), 1))
4673 {
4674 case UNSPEC_MOVE_PIC:
4675 case UNSPEC_TLSLE:
4676 x = XVECEXP (XEXP (x, 1), 0, 0);
4677 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4678 break;
4679 default:
4680 break;
4681 }
4682
4683 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4684 if (GET_CODE (x) == MINUS
4685 && REG_P (XEXP (x, 0))
4686 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4687 && GET_CODE (XEXP (x, 1)) == LO_SUM
4688 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4689 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4690 {
4691 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4692 gcc_assert (GET_CODE (x) == LABEL_REF);
4693 }
4694
4695 return x;
4696 }
4697
4698 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4699 replace the input X, or the original X if no replacement is called for.
4700 The output parameter *WIN is 1 if the calling macro should goto WIN,
4701 0 if it should not.
4702
4703 For SPARC, we wish to handle addresses by splitting them into
4704 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4705 This cuts the number of extra insns by one.
4706
4707 Do nothing when generating PIC code and the address is a symbolic
4708 operand or requires a scratch register. */
4709
4710 rtx
4711 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4712 int opnum, int type,
4713 int ind_levels ATTRIBUTE_UNUSED, int *win)
4714 {
4715 /* Decompose SImode constants into HIGH+LO_SUM. */
4716 if (CONSTANT_P (x)
4717 && (mode != TFmode || TARGET_ARCH64)
4718 && GET_MODE (x) == SImode
4719 && GET_CODE (x) != LO_SUM
4720 && GET_CODE (x) != HIGH
4721 && sparc_cmodel <= CM_MEDLOW
4722 && !(flag_pic
4723 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4724 {
4725 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4726 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4727 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4728 opnum, (enum reload_type)type);
4729 *win = 1;
4730 return x;
4731 }
4732
4733 /* We have to recognize what we have already generated above. */
4734 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4735 {
4736 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4737 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4738 opnum, (enum reload_type)type);
4739 *win = 1;
4740 return x;
4741 }
4742
4743 *win = 0;
4744 return x;
4745 }
4746
4747 /* Return true if ADDR (a legitimate address expression)
4748 has an effect that depends on the machine mode it is used for.
4749
4750 In PIC mode,
4751
4752 (mem:HI [%l7+a])
4753
4754 is not equivalent to
4755
4756 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4757
4758 because [%l7+a+1] is interpreted as the address of (a+1). */
4759
4760
4761 static bool
4762 sparc_mode_dependent_address_p (const_rtx addr,
4763 addr_space_t as ATTRIBUTE_UNUSED)
4764 {
4765 if (flag_pic && GET_CODE (addr) == PLUS)
4766 {
4767 rtx op0 = XEXP (addr, 0);
4768 rtx op1 = XEXP (addr, 1);
4769 if (op0 == pic_offset_table_rtx
4770 && symbolic_operand (op1, VOIDmode))
4771 return true;
4772 }
4773
4774 return false;
4775 }
4776
4777 #ifdef HAVE_GAS_HIDDEN
4778 # define USE_HIDDEN_LINKONCE 1
4779 #else
4780 # define USE_HIDDEN_LINKONCE 0
4781 #endif
4782
4783 static void
4784 get_pc_thunk_name (char name[32], unsigned int regno)
4785 {
4786 const char *reg_name = reg_names[regno];
4787
4788 /* Skip the leading '%' as that cannot be used in a
4789 symbol name. */
4790 reg_name += 1;
4791
4792 if (USE_HIDDEN_LINKONCE)
4793 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4794 else
4795 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4796 }
4797
4798 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4799
4800 static rtx
4801 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4802 {
4803 int orig_flag_pic = flag_pic;
4804 rtx insn;
4805
4806 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4807 flag_pic = 0;
4808 if (TARGET_ARCH64)
4809 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4810 else
4811 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4812 flag_pic = orig_flag_pic;
4813
4814 return insn;
4815 }
4816
4817 /* Emit code to load the GOT register. */
4818
4819 void
4820 load_got_register (void)
4821 {
4822 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4823 if (!global_offset_table_rtx)
4824 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4825
4826 if (TARGET_VXWORKS_RTP)
4827 emit_insn (gen_vxworks_load_got ());
4828 else
4829 {
4830 /* The GOT symbol is subject to a PC-relative relocation so we need a
4831 helper function to add the PC value and thus get the final value. */
4832 if (!got_helper_rtx)
4833 {
4834 char name[32];
4835 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4836 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4837 }
4838
4839 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4840 got_helper_rtx,
4841 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4842 }
4843
4844 /* Need to emit this whether or not we obey regdecls,
4845 since setjmp/longjmp can cause life info to screw up.
4846 ??? In the case where we don't obey regdecls, this is not sufficient
4847 since we may not fall out the bottom. */
4848 emit_use (global_offset_table_rtx);
4849 }
4850
4851 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4852 address of the call target. */
4853
4854 void
4855 sparc_emit_call_insn (rtx pat, rtx addr)
4856 {
4857 rtx_insn *insn;
4858
4859 insn = emit_call_insn (pat);
4860
4861 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4862 if (TARGET_VXWORKS_RTP
4863 && flag_pic
4864 && GET_CODE (addr) == SYMBOL_REF
4865 && (SYMBOL_REF_DECL (addr)
4866 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4867 : !SYMBOL_REF_LOCAL_P (addr)))
4868 {
4869 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4870 crtl->uses_pic_offset_table = 1;
4871 }
4872 }
4873 \f
4874 /* Return 1 if RTX is a MEM which is known to be aligned to at
4875 least a DESIRED byte boundary. */
4876
4877 int
4878 mem_min_alignment (rtx mem, int desired)
4879 {
4880 rtx addr, base, offset;
4881
4882 /* If it's not a MEM we can't accept it. */
4883 if (GET_CODE (mem) != MEM)
4884 return 0;
4885
4886 /* Obviously... */
4887 if (!TARGET_UNALIGNED_DOUBLES
4888 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4889 return 1;
4890
4891 /* ??? The rest of the function predates MEM_ALIGN so
4892 there is probably a bit of redundancy. */
4893 addr = XEXP (mem, 0);
4894 base = offset = NULL_RTX;
4895 if (GET_CODE (addr) == PLUS)
4896 {
4897 if (GET_CODE (XEXP (addr, 0)) == REG)
4898 {
4899 base = XEXP (addr, 0);
4900
4901 /* What we are saying here is that if the base
4902 REG is aligned properly, the compiler will make
4903 sure any REG based index upon it will be so
4904 as well. */
4905 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4906 offset = XEXP (addr, 1);
4907 else
4908 offset = const0_rtx;
4909 }
4910 }
4911 else if (GET_CODE (addr) == REG)
4912 {
4913 base = addr;
4914 offset = const0_rtx;
4915 }
4916
4917 if (base != NULL_RTX)
4918 {
4919 int regno = REGNO (base);
4920
4921 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4922 {
4923 /* Check if the compiler has recorded some information
4924 about the alignment of the base REG. If reload has
4925 completed, we already matched with proper alignments.
4926 If not running global_alloc, reload might give us
4927 unaligned pointer to local stack though. */
4928 if (((cfun != 0
4929 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4930 || (optimize && reload_completed))
4931 && (INTVAL (offset) & (desired - 1)) == 0)
4932 return 1;
4933 }
4934 else
4935 {
4936 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4937 return 1;
4938 }
4939 }
4940 else if (! TARGET_UNALIGNED_DOUBLES
4941 || CONSTANT_P (addr)
4942 || GET_CODE (addr) == LO_SUM)
4943 {
4944 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4945 is true, in which case we can only assume that an access is aligned if
4946 it is to a constant address, or the address involves a LO_SUM. */
4947 return 1;
4948 }
4949
4950 /* An obviously unaligned address. */
4951 return 0;
4952 }
4953
4954 \f
4955 /* Vectors to keep interesting information about registers where it can easily
4956 be got. We used to use the actual mode value as the bit number, but there
4957 are more than 32 modes now. Instead we use two tables: one indexed by
4958 hard register number, and one indexed by mode. */
4959
4960 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4961 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4962 mapped into one sparc_mode_class mode. */
4963
4964 enum sparc_mode_class {
4965 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4966 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4967 CC_MODE, CCFP_MODE
4968 };
4969
4970 /* Modes for single-word and smaller quantities. */
4971 #define S_MODES \
4972 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4973
4974 /* Modes for double-word and smaller quantities. */
4975 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4976
4977 /* Modes for quad-word and smaller quantities. */
4978 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4979
4980 /* Modes for 8-word and smaller quantities. */
4981 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4982
4983 /* Modes for single-float quantities. */
4984 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4985
4986 /* Modes for double-float and smaller quantities. */
4987 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4988
4989 /* Modes for quad-float and smaller quantities. */
4990 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4991
4992 /* Modes for quad-float pairs and smaller quantities. */
4993 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4994
4995 /* Modes for double-float only quantities. */
4996 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4997
4998 /* Modes for quad-float and double-float only quantities. */
4999 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5000
5001 /* Modes for quad-float pairs and double-float only quantities. */
5002 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5003
5004 /* Modes for condition codes. */
5005 #define CC_MODES (1 << (int) CC_MODE)
5006 #define CCFP_MODES (1 << (int) CCFP_MODE)
5007
5008 /* Value is 1 if register/mode pair is acceptable on sparc.
5009
5010 The funny mixture of D and T modes is because integer operations
5011 do not specially operate on tetra quantities, so non-quad-aligned
5012 registers can hold quadword quantities (except %o4 and %i4 because
5013 they cross fixed registers).
5014
5015 ??? Note that, despite the settings, non-double-aligned parameter
5016 registers can hold double-word quantities in 32-bit mode. */
5017
5018 /* This points to either the 32-bit or the 64-bit version. */
5019 static const int *hard_regno_mode_classes;
5020
5021 static const int hard_32bit_mode_classes[] = {
5022 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5023 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5024 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5025 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5026
5027 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5028 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5029 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5030 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5031
5032 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5033 and none can hold SFmode/SImode values. */
5034 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5035 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5036 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5037 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5038
5039 /* %fcc[0123] */
5040 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5041
5042 /* %icc, %sfp, %gsr */
5043 CC_MODES, 0, D_MODES
5044 };
5045
5046 static const int hard_64bit_mode_classes[] = {
5047 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5048 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5049 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5050 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5051
5052 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5053 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5054 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5055 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5056
5057 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5058 and none can hold SFmode/SImode values. */
5059 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5060 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5061 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5062 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5063
5064 /* %fcc[0123] */
5065 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5066
5067 /* %icc, %sfp, %gsr */
5068 CC_MODES, 0, D_MODES
5069 };
5070
5071 static int sparc_mode_class [NUM_MACHINE_MODES];
5072
5073 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5074
5075 static void
5076 sparc_init_modes (void)
5077 {
5078 int i;
5079
5080 for (i = 0; i < NUM_MACHINE_MODES; i++)
5081 {
5082 machine_mode m = (machine_mode) i;
5083 unsigned int size = GET_MODE_SIZE (m);
5084
5085 switch (GET_MODE_CLASS (m))
5086 {
5087 case MODE_INT:
5088 case MODE_PARTIAL_INT:
5089 case MODE_COMPLEX_INT:
5090 if (size < 4)
5091 sparc_mode_class[i] = 1 << (int) H_MODE;
5092 else if (size == 4)
5093 sparc_mode_class[i] = 1 << (int) S_MODE;
5094 else if (size == 8)
5095 sparc_mode_class[i] = 1 << (int) D_MODE;
5096 else if (size == 16)
5097 sparc_mode_class[i] = 1 << (int) T_MODE;
5098 else if (size == 32)
5099 sparc_mode_class[i] = 1 << (int) O_MODE;
5100 else
5101 sparc_mode_class[i] = 0;
5102 break;
5103 case MODE_VECTOR_INT:
5104 if (size == 4)
5105 sparc_mode_class[i] = 1 << (int) SF_MODE;
5106 else if (size == 8)
5107 sparc_mode_class[i] = 1 << (int) DF_MODE;
5108 else
5109 sparc_mode_class[i] = 0;
5110 break;
5111 case MODE_FLOAT:
5112 case MODE_COMPLEX_FLOAT:
5113 if (size == 4)
5114 sparc_mode_class[i] = 1 << (int) SF_MODE;
5115 else if (size == 8)
5116 sparc_mode_class[i] = 1 << (int) DF_MODE;
5117 else if (size == 16)
5118 sparc_mode_class[i] = 1 << (int) TF_MODE;
5119 else if (size == 32)
5120 sparc_mode_class[i] = 1 << (int) OF_MODE;
5121 else
5122 sparc_mode_class[i] = 0;
5123 break;
5124 case MODE_CC:
5125 if (m == CCFPmode || m == CCFPEmode)
5126 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5127 else
5128 sparc_mode_class[i] = 1 << (int) CC_MODE;
5129 break;
5130 default:
5131 sparc_mode_class[i] = 0;
5132 break;
5133 }
5134 }
5135
5136 if (TARGET_ARCH64)
5137 hard_regno_mode_classes = hard_64bit_mode_classes;
5138 else
5139 hard_regno_mode_classes = hard_32bit_mode_classes;
5140
5141 /* Initialize the array used by REGNO_REG_CLASS. */
5142 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5143 {
5144 if (i < 16 && TARGET_V8PLUS)
5145 sparc_regno_reg_class[i] = I64_REGS;
5146 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5147 sparc_regno_reg_class[i] = GENERAL_REGS;
5148 else if (i < 64)
5149 sparc_regno_reg_class[i] = FP_REGS;
5150 else if (i < 96)
5151 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5152 else if (i < 100)
5153 sparc_regno_reg_class[i] = FPCC_REGS;
5154 else
5155 sparc_regno_reg_class[i] = NO_REGS;
5156 }
5157 }
5158 \f
5159 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5160
5161 static inline bool
5162 save_global_or_fp_reg_p (unsigned int regno,
5163 int leaf_function ATTRIBUTE_UNUSED)
5164 {
5165 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5166 }
5167
5168 /* Return whether the return address register (%i7) is needed. */
5169
5170 static inline bool
5171 return_addr_reg_needed_p (int leaf_function)
5172 {
5173 /* If it is live, for example because of __builtin_return_address (0). */
5174 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5175 return true;
5176
5177 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5178 if (!leaf_function
5179 /* Loading the GOT register clobbers %o7. */
5180 || crtl->uses_pic_offset_table
5181 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5182 return true;
5183
5184 return false;
5185 }
5186
5187 /* Return whether REGNO, a local or in register, must be saved/restored. */
5188
5189 static bool
5190 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5191 {
5192 /* General case: call-saved registers live at some point. */
5193 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5194 return true;
5195
5196 /* Frame pointer register (%fp) if needed. */
5197 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5198 return true;
5199
5200 /* Return address register (%i7) if needed. */
5201 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5202 return true;
5203
5204 /* GOT register (%l7) if needed. */
5205 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5206 return true;
5207
5208 /* If the function accesses prior frames, the frame pointer and the return
5209 address of the previous frame must be saved on the stack. */
5210 if (crtl->accesses_prior_frames
5211 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5212 return true;
5213
5214 return false;
5215 }
5216
5217 /* Compute the frame size required by the function. This function is called
5218 during the reload pass and also by sparc_expand_prologue. */
5219
5220 HOST_WIDE_INT
5221 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5222 {
5223 HOST_WIDE_INT frame_size, apparent_frame_size;
5224 int args_size, n_global_fp_regs = 0;
5225 bool save_local_in_regs_p = false;
5226 unsigned int i;
5227
5228 /* If the function allocates dynamic stack space, the dynamic offset is
5229 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5230 if (leaf_function && !cfun->calls_alloca)
5231 args_size = 0;
5232 else
5233 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5234
5235 /* Calculate space needed for global registers. */
5236 if (TARGET_ARCH64)
5237 {
5238 for (i = 0; i < 8; i++)
5239 if (save_global_or_fp_reg_p (i, 0))
5240 n_global_fp_regs += 2;
5241 }
5242 else
5243 {
5244 for (i = 0; i < 8; i += 2)
5245 if (save_global_or_fp_reg_p (i, 0)
5246 || save_global_or_fp_reg_p (i + 1, 0))
5247 n_global_fp_regs += 2;
5248 }
5249
5250 /* In the flat window model, find out which local and in registers need to
5251 be saved. We don't reserve space in the current frame for them as they
5252 will be spilled into the register window save area of the caller's frame.
5253 However, as soon as we use this register window save area, we must create
5254 that of the current frame to make it the live one. */
5255 if (TARGET_FLAT)
5256 for (i = 16; i < 32; i++)
5257 if (save_local_or_in_reg_p (i, leaf_function))
5258 {
5259 save_local_in_regs_p = true;
5260 break;
5261 }
5262
5263 /* Calculate space needed for FP registers. */
5264 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5265 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5266 n_global_fp_regs += 2;
5267
5268 if (size == 0
5269 && n_global_fp_regs == 0
5270 && args_size == 0
5271 && !save_local_in_regs_p)
5272 frame_size = apparent_frame_size = 0;
5273 else
5274 {
5275 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5276 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5277 apparent_frame_size += n_global_fp_regs * 4;
5278
5279 /* We need to add the size of the outgoing argument area. */
5280 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5281
5282 /* And that of the register window save area. */
5283 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5284
5285 /* Finally, bump to the appropriate alignment. */
5286 frame_size = SPARC_STACK_ALIGN (frame_size);
5287 }
5288
5289 /* Set up values for use in prologue and epilogue. */
5290 sparc_frame_size = frame_size;
5291 sparc_apparent_frame_size = apparent_frame_size;
5292 sparc_n_global_fp_regs = n_global_fp_regs;
5293 sparc_save_local_in_regs_p = save_local_in_regs_p;
5294
5295 return frame_size;
5296 }
5297
5298 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5299
5300 int
5301 sparc_initial_elimination_offset (int to)
5302 {
5303 int offset;
5304
5305 if (to == STACK_POINTER_REGNUM)
5306 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5307 else
5308 offset = 0;
5309
5310 offset += SPARC_STACK_BIAS;
5311 return offset;
5312 }
5313
5314 /* Output any necessary .register pseudo-ops. */
5315
5316 void
5317 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5318 {
5319 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5320 int i;
5321
5322 if (TARGET_ARCH32)
5323 return;
5324
5325 /* Check if %g[2367] were used without
5326 .register being printed for them already. */
5327 for (i = 2; i < 8; i++)
5328 {
5329 if (df_regs_ever_live_p (i)
5330 && ! sparc_hard_reg_printed [i])
5331 {
5332 sparc_hard_reg_printed [i] = 1;
5333 /* %g7 is used as TLS base register, use #ignore
5334 for it instead of #scratch. */
5335 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5336 i == 7 ? "ignore" : "scratch");
5337 }
5338 if (i == 3) i = 5;
5339 }
5340 #endif
5341 }
5342
5343 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5344
5345 #if PROBE_INTERVAL > 4096
5346 #error Cannot use indexed addressing mode for stack probing
5347 #endif
5348
5349 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5350 inclusive. These are offsets from the current stack pointer.
5351
5352 Note that we don't use the REG+REG addressing mode for the probes because
5353 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5354 so the advantages of having a single code win here. */
5355
5356 static void
5357 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5358 {
5359 rtx g1 = gen_rtx_REG (Pmode, 1);
5360
5361 /* See if we have a constant small number of probes to generate. If so,
5362 that's the easy case. */
5363 if (size <= PROBE_INTERVAL)
5364 {
5365 emit_move_insn (g1, GEN_INT (first));
5366 emit_insn (gen_rtx_SET (g1,
5367 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5368 emit_stack_probe (plus_constant (Pmode, g1, -size));
5369 }
5370
5371 /* The run-time loop is made up of 9 insns in the generic case while the
5372 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5373 else if (size <= 4 * PROBE_INTERVAL)
5374 {
5375 HOST_WIDE_INT i;
5376
5377 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5378 emit_insn (gen_rtx_SET (g1,
5379 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5380 emit_stack_probe (g1);
5381
5382 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5383 it exceeds SIZE. If only two probes are needed, this will not
5384 generate any code. Then probe at FIRST + SIZE. */
5385 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5386 {
5387 emit_insn (gen_rtx_SET (g1,
5388 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5389 emit_stack_probe (g1);
5390 }
5391
5392 emit_stack_probe (plus_constant (Pmode, g1,
5393 (i - PROBE_INTERVAL) - size));
5394 }
5395
5396 /* Otherwise, do the same as above, but in a loop. Note that we must be
5397 extra careful with variables wrapping around because we might be at
5398 the very top (or the very bottom) of the address space and we have
5399 to be able to handle this case properly; in particular, we use an
5400 equality test for the loop condition. */
5401 else
5402 {
5403 HOST_WIDE_INT rounded_size;
5404 rtx g4 = gen_rtx_REG (Pmode, 4);
5405
5406 emit_move_insn (g1, GEN_INT (first));
5407
5408
5409 /* Step 1: round SIZE to the previous multiple of the interval. */
5410
5411 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5412 emit_move_insn (g4, GEN_INT (rounded_size));
5413
5414
5415 /* Step 2: compute initial and final value of the loop counter. */
5416
5417 /* TEST_ADDR = SP + FIRST. */
5418 emit_insn (gen_rtx_SET (g1,
5419 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5420
5421 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5422 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5423
5424
5425 /* Step 3: the loop
5426
5427 while (TEST_ADDR != LAST_ADDR)
5428 {
5429 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5430 probe at TEST_ADDR
5431 }
5432
5433 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5434 until it is equal to ROUNDED_SIZE. */
5435
5436 if (TARGET_ARCH64)
5437 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5438 else
5439 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5440
5441
5442 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5443 that SIZE is equal to ROUNDED_SIZE. */
5444
5445 if (size != rounded_size)
5446 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5447 }
5448
5449 /* Make sure nothing is scheduled before we are done. */
5450 emit_insn (gen_blockage ());
5451 }
5452
5453 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5454 absolute addresses. */
5455
5456 const char *
5457 output_probe_stack_range (rtx reg1, rtx reg2)
5458 {
5459 static int labelno = 0;
5460 char loop_lab[32];
5461 rtx xops[2];
5462
5463 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5464
5465 /* Loop. */
5466 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5467
5468 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5469 xops[0] = reg1;
5470 xops[1] = GEN_INT (-PROBE_INTERVAL);
5471 output_asm_insn ("add\t%0, %1, %0", xops);
5472
5473 /* Test if TEST_ADDR == LAST_ADDR. */
5474 xops[1] = reg2;
5475 output_asm_insn ("cmp\t%0, %1", xops);
5476
5477 /* Probe at TEST_ADDR and branch. */
5478 if (TARGET_ARCH64)
5479 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5480 else
5481 fputs ("\tbne\t", asm_out_file);
5482 assemble_name_raw (asm_out_file, loop_lab);
5483 fputc ('\n', asm_out_file);
5484 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5485 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5486
5487 return "";
5488 }
5489
5490 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5491 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5492 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5493 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5494 the action to be performed if it returns false. Return the new offset. */
5495
5496 typedef bool (*sorr_pred_t) (unsigned int, int);
5497 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5498
5499 static int
5500 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5501 int offset, int leaf_function, sorr_pred_t save_p,
5502 sorr_act_t action_true, sorr_act_t action_false)
5503 {
5504 unsigned int i;
5505 rtx mem;
5506 rtx_insn *insn;
5507
5508 if (TARGET_ARCH64 && high <= 32)
5509 {
5510 int fp_offset = -1;
5511
5512 for (i = low; i < high; i++)
5513 {
5514 if (save_p (i, leaf_function))
5515 {
5516 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5517 base, offset));
5518 if (action_true == SORR_SAVE)
5519 {
5520 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5521 RTX_FRAME_RELATED_P (insn) = 1;
5522 }
5523 else /* action_true == SORR_RESTORE */
5524 {
5525 /* The frame pointer must be restored last since its old
5526 value may be used as base address for the frame. This
5527 is problematic in 64-bit mode only because of the lack
5528 of double-word load instruction. */
5529 if (i == HARD_FRAME_POINTER_REGNUM)
5530 fp_offset = offset;
5531 else
5532 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5533 }
5534 offset += 8;
5535 }
5536 else if (action_false == SORR_ADVANCE)
5537 offset += 8;
5538 }
5539
5540 if (fp_offset >= 0)
5541 {
5542 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5543 emit_move_insn (hard_frame_pointer_rtx, mem);
5544 }
5545 }
5546 else
5547 {
5548 for (i = low; i < high; i += 2)
5549 {
5550 bool reg0 = save_p (i, leaf_function);
5551 bool reg1 = save_p (i + 1, leaf_function);
5552 machine_mode mode;
5553 int regno;
5554
5555 if (reg0 && reg1)
5556 {
5557 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5558 regno = i;
5559 }
5560 else if (reg0)
5561 {
5562 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5563 regno = i;
5564 }
5565 else if (reg1)
5566 {
5567 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5568 regno = i + 1;
5569 offset += 4;
5570 }
5571 else
5572 {
5573 if (action_false == SORR_ADVANCE)
5574 offset += 8;
5575 continue;
5576 }
5577
5578 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5579 if (action_true == SORR_SAVE)
5580 {
5581 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5582 RTX_FRAME_RELATED_P (insn) = 1;
5583 if (mode == DImode)
5584 {
5585 rtx set1, set2;
5586 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5587 offset));
5588 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5589 RTX_FRAME_RELATED_P (set1) = 1;
5590 mem
5591 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5592 offset + 4));
5593 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5594 RTX_FRAME_RELATED_P (set2) = 1;
5595 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5596 gen_rtx_PARALLEL (VOIDmode,
5597 gen_rtvec (2, set1, set2)));
5598 }
5599 }
5600 else /* action_true == SORR_RESTORE */
5601 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5602
5603 /* Bump and round down to double word
5604 in case we already bumped by 4. */
5605 offset = ROUND_DOWN (offset + 8, 8);
5606 }
5607 }
5608
5609 return offset;
5610 }
5611
5612 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5613
5614 static rtx
5615 emit_adjust_base_to_offset (rtx base, int offset)
5616 {
5617 /* ??? This might be optimized a little as %g1 might already have a
5618 value close enough that a single add insn will do. */
5619 /* ??? Although, all of this is probably only a temporary fix because
5620 if %g1 can hold a function result, then sparc_expand_epilogue will
5621 lose (the result will be clobbered). */
5622 rtx new_base = gen_rtx_REG (Pmode, 1);
5623 emit_move_insn (new_base, GEN_INT (offset));
5624 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5625 return new_base;
5626 }
5627
5628 /* Emit code to save/restore call-saved global and FP registers. */
5629
5630 static void
5631 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5632 {
5633 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5634 {
5635 base = emit_adjust_base_to_offset (base, offset);
5636 offset = 0;
5637 }
5638
5639 offset
5640 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5641 save_global_or_fp_reg_p, action, SORR_NONE);
5642 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5643 save_global_or_fp_reg_p, action, SORR_NONE);
5644 }
5645
5646 /* Emit code to save/restore call-saved local and in registers. */
5647
5648 static void
5649 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5650 {
5651 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5652 {
5653 base = emit_adjust_base_to_offset (base, offset);
5654 offset = 0;
5655 }
5656
5657 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5658 save_local_or_in_reg_p, action, SORR_ADVANCE);
5659 }
5660
5661 /* Emit a window_save insn. */
5662
5663 static rtx_insn *
5664 emit_window_save (rtx increment)
5665 {
5666 rtx_insn *insn = emit_insn (gen_window_save (increment));
5667 RTX_FRAME_RELATED_P (insn) = 1;
5668
5669 /* The incoming return address (%o7) is saved in %i7. */
5670 add_reg_note (insn, REG_CFA_REGISTER,
5671 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5672 gen_rtx_REG (Pmode,
5673 INCOMING_RETURN_ADDR_REGNUM)));
5674
5675 /* The window save event. */
5676 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5677
5678 /* The CFA is %fp, the hard frame pointer. */
5679 add_reg_note (insn, REG_CFA_DEF_CFA,
5680 plus_constant (Pmode, hard_frame_pointer_rtx,
5681 INCOMING_FRAME_SP_OFFSET));
5682
5683 return insn;
5684 }
5685
5686 /* Generate an increment for the stack pointer. */
5687
5688 static rtx
5689 gen_stack_pointer_inc (rtx increment)
5690 {
5691 return gen_rtx_SET (stack_pointer_rtx,
5692 gen_rtx_PLUS (Pmode,
5693 stack_pointer_rtx,
5694 increment));
5695 }
5696
5697 /* Expand the function prologue. The prologue is responsible for reserving
5698 storage for the frame, saving the call-saved registers and loading the
5699 GOT register if needed. */
5700
5701 void
5702 sparc_expand_prologue (void)
5703 {
5704 HOST_WIDE_INT size;
5705 rtx_insn *insn;
5706
5707 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5708 on the final value of the flag means deferring the prologue/epilogue
5709 expansion until just before the second scheduling pass, which is too
5710 late to emit multiple epilogues or return insns.
5711
5712 Of course we are making the assumption that the value of the flag
5713 will not change between now and its final value. Of the three parts
5714 of the formula, only the last one can reasonably vary. Let's take a
5715 closer look, after assuming that the first two ones are set to true
5716 (otherwise the last value is effectively silenced).
5717
5718 If only_leaf_regs_used returns false, the global predicate will also
5719 be false so the actual frame size calculated below will be positive.
5720 As a consequence, the save_register_window insn will be emitted in
5721 the instruction stream; now this insn explicitly references %fp
5722 which is not a leaf register so only_leaf_regs_used will always
5723 return false subsequently.
5724
5725 If only_leaf_regs_used returns true, we hope that the subsequent
5726 optimization passes won't cause non-leaf registers to pop up. For
5727 example, the regrename pass has special provisions to not rename to
5728 non-leaf registers in a leaf function. */
5729 sparc_leaf_function_p
5730 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5731
5732 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5733
5734 if (flag_stack_usage_info)
5735 current_function_static_stack_size = size;
5736
5737 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5738 {
5739 if (crtl->is_leaf && !cfun->calls_alloca)
5740 {
5741 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5742 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5743 size - STACK_CHECK_PROTECT);
5744 }
5745 else if (size > 0)
5746 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5747 }
5748
5749 if (size == 0)
5750 ; /* do nothing. */
5751 else if (sparc_leaf_function_p)
5752 {
5753 rtx size_int_rtx = GEN_INT (-size);
5754
5755 if (size <= 4096)
5756 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5757 else if (size <= 8192)
5758 {
5759 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5760 RTX_FRAME_RELATED_P (insn) = 1;
5761
5762 /* %sp is still the CFA register. */
5763 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5764 }
5765 else
5766 {
5767 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5768 emit_move_insn (size_rtx, size_int_rtx);
5769 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5770 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5771 gen_stack_pointer_inc (size_int_rtx));
5772 }
5773
5774 RTX_FRAME_RELATED_P (insn) = 1;
5775 }
5776 else
5777 {
5778 rtx size_int_rtx = GEN_INT (-size);
5779
5780 if (size <= 4096)
5781 emit_window_save (size_int_rtx);
5782 else if (size <= 8192)
5783 {
5784 emit_window_save (GEN_INT (-4096));
5785
5786 /* %sp is not the CFA register anymore. */
5787 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5788
5789 /* Make sure no %fp-based store is issued until after the frame is
5790 established. The offset between the frame pointer and the stack
5791 pointer is calculated relative to the value of the stack pointer
5792 at the end of the function prologue, and moving instructions that
5793 access the stack via the frame pointer between the instructions
5794 that decrement the stack pointer could result in accessing the
5795 register window save area, which is volatile. */
5796 emit_insn (gen_frame_blockage ());
5797 }
5798 else
5799 {
5800 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5801 emit_move_insn (size_rtx, size_int_rtx);
5802 emit_window_save (size_rtx);
5803 }
5804 }
5805
5806 if (sparc_leaf_function_p)
5807 {
5808 sparc_frame_base_reg = stack_pointer_rtx;
5809 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5810 }
5811 else
5812 {
5813 sparc_frame_base_reg = hard_frame_pointer_rtx;
5814 sparc_frame_base_offset = SPARC_STACK_BIAS;
5815 }
5816
5817 if (sparc_n_global_fp_regs > 0)
5818 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5819 sparc_frame_base_offset
5820 - sparc_apparent_frame_size,
5821 SORR_SAVE);
5822
5823 /* Load the GOT register if needed. */
5824 if (crtl->uses_pic_offset_table)
5825 load_got_register ();
5826
5827 /* Advertise that the data calculated just above are now valid. */
5828 sparc_prologue_data_valid_p = true;
5829 }
5830
5831 /* Expand the function prologue. The prologue is responsible for reserving
5832 storage for the frame, saving the call-saved registers and loading the
5833 GOT register if needed. */
5834
5835 void
5836 sparc_flat_expand_prologue (void)
5837 {
5838 HOST_WIDE_INT size;
5839 rtx_insn *insn;
5840
5841 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5842
5843 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5844
5845 if (flag_stack_usage_info)
5846 current_function_static_stack_size = size;
5847
5848 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5849 {
5850 if (crtl->is_leaf && !cfun->calls_alloca)
5851 {
5852 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5853 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5854 size - STACK_CHECK_PROTECT);
5855 }
5856 else if (size > 0)
5857 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5858 }
5859
5860 if (sparc_save_local_in_regs_p)
5861 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5862 SORR_SAVE);
5863
5864 if (size == 0)
5865 ; /* do nothing. */
5866 else
5867 {
5868 rtx size_int_rtx, size_rtx;
5869
5870 size_rtx = size_int_rtx = GEN_INT (-size);
5871
5872 /* We establish the frame (i.e. decrement the stack pointer) first, even
5873 if we use a frame pointer, because we cannot clobber any call-saved
5874 registers, including the frame pointer, if we haven't created a new
5875 register save area, for the sake of compatibility with the ABI. */
5876 if (size <= 4096)
5877 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5878 else if (size <= 8192 && !frame_pointer_needed)
5879 {
5880 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5881 RTX_FRAME_RELATED_P (insn) = 1;
5882 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5883 }
5884 else
5885 {
5886 size_rtx = gen_rtx_REG (Pmode, 1);
5887 emit_move_insn (size_rtx, size_int_rtx);
5888 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5889 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5890 gen_stack_pointer_inc (size_int_rtx));
5891 }
5892 RTX_FRAME_RELATED_P (insn) = 1;
5893
5894 /* Ensure nothing is scheduled until after the frame is established. */
5895 emit_insn (gen_blockage ());
5896
5897 if (frame_pointer_needed)
5898 {
5899 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5900 gen_rtx_MINUS (Pmode,
5901 stack_pointer_rtx,
5902 size_rtx)));
5903 RTX_FRAME_RELATED_P (insn) = 1;
5904
5905 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5906 gen_rtx_SET (hard_frame_pointer_rtx,
5907 plus_constant (Pmode, stack_pointer_rtx,
5908 size)));
5909 }
5910
5911 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5912 {
5913 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5914 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5915
5916 insn = emit_move_insn (i7, o7);
5917 RTX_FRAME_RELATED_P (insn) = 1;
5918
5919 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5920
5921 /* Prevent this instruction from ever being considered dead,
5922 even if this function has no epilogue. */
5923 emit_use (i7);
5924 }
5925 }
5926
5927 if (frame_pointer_needed)
5928 {
5929 sparc_frame_base_reg = hard_frame_pointer_rtx;
5930 sparc_frame_base_offset = SPARC_STACK_BIAS;
5931 }
5932 else
5933 {
5934 sparc_frame_base_reg = stack_pointer_rtx;
5935 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5936 }
5937
5938 if (sparc_n_global_fp_regs > 0)
5939 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5940 sparc_frame_base_offset
5941 - sparc_apparent_frame_size,
5942 SORR_SAVE);
5943
5944 /* Load the GOT register if needed. */
5945 if (crtl->uses_pic_offset_table)
5946 load_got_register ();
5947
5948 /* Advertise that the data calculated just above are now valid. */
5949 sparc_prologue_data_valid_p = true;
5950 }
5951
5952 /* This function generates the assembly code for function entry, which boils
5953 down to emitting the necessary .register directives. */
5954
5955 static void
5956 sparc_asm_function_prologue (FILE *file)
5957 {
5958 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5959 if (!TARGET_FLAT)
5960 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5961
5962 sparc_output_scratch_registers (file);
5963 }
5964
5965 /* Expand the function epilogue, either normal or part of a sibcall.
5966 We emit all the instructions except the return or the call. */
5967
5968 void
5969 sparc_expand_epilogue (bool for_eh)
5970 {
5971 HOST_WIDE_INT size = sparc_frame_size;
5972
5973 if (cfun->calls_alloca)
5974 emit_insn (gen_frame_blockage ());
5975
5976 if (sparc_n_global_fp_regs > 0)
5977 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5978 sparc_frame_base_offset
5979 - sparc_apparent_frame_size,
5980 SORR_RESTORE);
5981
5982 if (size == 0 || for_eh)
5983 ; /* do nothing. */
5984 else if (sparc_leaf_function_p)
5985 {
5986 if (size <= 4096)
5987 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5988 else if (size <= 8192)
5989 {
5990 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5991 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5992 }
5993 else
5994 {
5995 rtx reg = gen_rtx_REG (Pmode, 1);
5996 emit_move_insn (reg, GEN_INT (size));
5997 emit_insn (gen_stack_pointer_inc (reg));
5998 }
5999 }
6000 }
6001
6002 /* Expand the function epilogue, either normal or part of a sibcall.
6003 We emit all the instructions except the return or the call. */
6004
6005 void
6006 sparc_flat_expand_epilogue (bool for_eh)
6007 {
6008 HOST_WIDE_INT size = sparc_frame_size;
6009
6010 if (sparc_n_global_fp_regs > 0)
6011 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6012 sparc_frame_base_offset
6013 - sparc_apparent_frame_size,
6014 SORR_RESTORE);
6015
6016 /* If we have a frame pointer, we'll need both to restore it before the
6017 frame is destroyed and use its current value in destroying the frame.
6018 Since we don't have an atomic way to do that in the flat window model,
6019 we save the current value into a temporary register (%g1). */
6020 if (frame_pointer_needed && !for_eh)
6021 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6022
6023 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6024 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6025 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6026
6027 if (sparc_save_local_in_regs_p)
6028 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6029 sparc_frame_base_offset,
6030 SORR_RESTORE);
6031
6032 if (size == 0 || for_eh)
6033 ; /* do nothing. */
6034 else if (frame_pointer_needed)
6035 {
6036 /* Make sure the frame is destroyed after everything else is done. */
6037 emit_insn (gen_blockage ());
6038
6039 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6040 }
6041 else
6042 {
6043 /* Likewise. */
6044 emit_insn (gen_blockage ());
6045
6046 if (size <= 4096)
6047 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6048 else if (size <= 8192)
6049 {
6050 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6051 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6052 }
6053 else
6054 {
6055 rtx reg = gen_rtx_REG (Pmode, 1);
6056 emit_move_insn (reg, GEN_INT (size));
6057 emit_insn (gen_stack_pointer_inc (reg));
6058 }
6059 }
6060 }
6061
6062 /* Return true if it is appropriate to emit `return' instructions in the
6063 body of a function. */
6064
6065 bool
6066 sparc_can_use_return_insn_p (void)
6067 {
6068 return sparc_prologue_data_valid_p
6069 && sparc_n_global_fp_regs == 0
6070 && TARGET_FLAT
6071 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6072 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6073 }
6074
6075 /* This function generates the assembly code for function exit. */
6076
6077 static void
6078 sparc_asm_function_epilogue (FILE *file)
6079 {
6080 /* If the last two instructions of a function are "call foo; dslot;"
6081 the return address might point to the first instruction in the next
6082 function and we have to output a dummy nop for the sake of sane
6083 backtraces in such cases. This is pointless for sibling calls since
6084 the return address is explicitly adjusted. */
6085
6086 rtx_insn *insn = get_last_insn ();
6087
6088 rtx last_real_insn = prev_real_insn (insn);
6089 if (last_real_insn
6090 && NONJUMP_INSN_P (last_real_insn)
6091 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6092 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6093
6094 if (last_real_insn
6095 && CALL_P (last_real_insn)
6096 && !SIBLING_CALL_P (last_real_insn))
6097 fputs("\tnop\n", file);
6098
6099 sparc_output_deferred_case_vectors ();
6100 }
6101
6102 /* Output a 'restore' instruction. */
6103
6104 static void
6105 output_restore (rtx pat)
6106 {
6107 rtx operands[3];
6108
6109 if (! pat)
6110 {
6111 fputs ("\t restore\n", asm_out_file);
6112 return;
6113 }
6114
6115 gcc_assert (GET_CODE (pat) == SET);
6116
6117 operands[0] = SET_DEST (pat);
6118 pat = SET_SRC (pat);
6119
6120 switch (GET_CODE (pat))
6121 {
6122 case PLUS:
6123 operands[1] = XEXP (pat, 0);
6124 operands[2] = XEXP (pat, 1);
6125 output_asm_insn (" restore %r1, %2, %Y0", operands);
6126 break;
6127 case LO_SUM:
6128 operands[1] = XEXP (pat, 0);
6129 operands[2] = XEXP (pat, 1);
6130 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6131 break;
6132 case ASHIFT:
6133 operands[1] = XEXP (pat, 0);
6134 gcc_assert (XEXP (pat, 1) == const1_rtx);
6135 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6136 break;
6137 default:
6138 operands[1] = pat;
6139 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6140 break;
6141 }
6142 }
6143
6144 /* Output a return. */
6145
6146 const char *
6147 output_return (rtx_insn *insn)
6148 {
6149 if (crtl->calls_eh_return)
6150 {
6151 /* If the function uses __builtin_eh_return, the eh_return
6152 machinery occupies the delay slot. */
6153 gcc_assert (!final_sequence);
6154
6155 if (flag_delayed_branch)
6156 {
6157 if (!TARGET_FLAT && TARGET_V9)
6158 fputs ("\treturn\t%i7+8\n", asm_out_file);
6159 else
6160 {
6161 if (!TARGET_FLAT)
6162 fputs ("\trestore\n", asm_out_file);
6163
6164 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6165 }
6166
6167 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6168 }
6169 else
6170 {
6171 if (!TARGET_FLAT)
6172 fputs ("\trestore\n", asm_out_file);
6173
6174 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6175 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6176 }
6177 }
6178 else if (sparc_leaf_function_p || TARGET_FLAT)
6179 {
6180 /* This is a leaf or flat function so we don't have to bother restoring
6181 the register window, which frees us from dealing with the convoluted
6182 semantics of restore/return. We simply output the jump to the
6183 return address and the insn in the delay slot (if any). */
6184
6185 return "jmp\t%%o7+%)%#";
6186 }
6187 else
6188 {
6189 /* This is a regular function so we have to restore the register window.
6190 We may have a pending insn for the delay slot, which will be either
6191 combined with the 'restore' instruction or put in the delay slot of
6192 the 'return' instruction. */
6193
6194 if (final_sequence)
6195 {
6196 rtx_insn *delay;
6197 rtx pat;
6198 int seen;
6199
6200 delay = NEXT_INSN (insn);
6201 gcc_assert (delay);
6202
6203 pat = PATTERN (delay);
6204
6205 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6206 {
6207 epilogue_renumber (&pat, 0);
6208 return "return\t%%i7+%)%#";
6209 }
6210 else
6211 {
6212 output_asm_insn ("jmp\t%%i7+%)", NULL);
6213
6214 /* We're going to output the insn in the delay slot manually.
6215 Make sure to output its source location first. */
6216 PATTERN (delay) = gen_blockage ();
6217 INSN_CODE (delay) = -1;
6218 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6219 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6220
6221 output_restore (pat);
6222 }
6223 }
6224 else
6225 {
6226 /* The delay slot is empty. */
6227 if (TARGET_V9)
6228 return "return\t%%i7+%)\n\t nop";
6229 else if (flag_delayed_branch)
6230 return "jmp\t%%i7+%)\n\t restore";
6231 else
6232 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6233 }
6234 }
6235
6236 return "";
6237 }
6238
6239 /* Output a sibling call. */
6240
6241 const char *
6242 output_sibcall (rtx_insn *insn, rtx call_operand)
6243 {
6244 rtx operands[1];
6245
6246 gcc_assert (flag_delayed_branch);
6247
6248 operands[0] = call_operand;
6249
6250 if (sparc_leaf_function_p || TARGET_FLAT)
6251 {
6252 /* This is a leaf or flat function so we don't have to bother restoring
6253 the register window. We simply output the jump to the function and
6254 the insn in the delay slot (if any). */
6255
6256 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6257
6258 if (final_sequence)
6259 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6260 operands);
6261 else
6262 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6263 it into branch if possible. */
6264 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6265 operands);
6266 }
6267 else
6268 {
6269 /* This is a regular function so we have to restore the register window.
6270 We may have a pending insn for the delay slot, which will be combined
6271 with the 'restore' instruction. */
6272
6273 output_asm_insn ("call\t%a0, 0", operands);
6274
6275 if (final_sequence)
6276 {
6277 rtx_insn *delay;
6278 rtx pat;
6279 int seen;
6280
6281 delay = NEXT_INSN (insn);
6282 gcc_assert (delay);
6283
6284 pat = PATTERN (delay);
6285
6286 /* We're going to output the insn in the delay slot manually.
6287 Make sure to output its source location first. */
6288 PATTERN (delay) = gen_blockage ();
6289 INSN_CODE (delay) = -1;
6290 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6291 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6292
6293 output_restore (pat);
6294 }
6295 else
6296 output_restore (NULL_RTX);
6297 }
6298
6299 return "";
6300 }
6301 \f
6302 /* Functions for handling argument passing.
6303
6304 For 32-bit, the first 6 args are normally in registers and the rest are
6305 pushed. Any arg that starts within the first 6 words is at least
6306 partially passed in a register unless its data type forbids.
6307
6308 For 64-bit, the argument registers are laid out as an array of 16 elements
6309 and arguments are added sequentially. The first 6 int args and up to the
6310 first 16 fp args (depending on size) are passed in regs.
6311
6312 Slot Stack Integral Float Float in structure Double Long Double
6313 ---- ----- -------- ----- ------------------ ------ -----------
6314 15 [SP+248] %f31 %f30,%f31 %d30
6315 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6316 13 [SP+232] %f27 %f26,%f27 %d26
6317 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6318 11 [SP+216] %f23 %f22,%f23 %d22
6319 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6320 9 [SP+200] %f19 %f18,%f19 %d18
6321 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6322 7 [SP+184] %f15 %f14,%f15 %d14
6323 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6324 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6325 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6326 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6327 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6328 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6329 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6330
6331 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6332
6333 Integral arguments are always passed as 64-bit quantities appropriately
6334 extended.
6335
6336 Passing of floating point values is handled as follows.
6337 If a prototype is in scope:
6338 If the value is in a named argument (i.e. not a stdarg function or a
6339 value not part of the `...') then the value is passed in the appropriate
6340 fp reg.
6341 If the value is part of the `...' and is passed in one of the first 6
6342 slots then the value is passed in the appropriate int reg.
6343 If the value is part of the `...' and is not passed in one of the first 6
6344 slots then the value is passed in memory.
6345 If a prototype is not in scope:
6346 If the value is one of the first 6 arguments the value is passed in the
6347 appropriate integer reg and the appropriate fp reg.
6348 If the value is not one of the first 6 arguments the value is passed in
6349 the appropriate fp reg and in memory.
6350
6351
6352 Summary of the calling conventions implemented by GCC on the SPARC:
6353
6354 32-bit ABI:
6355 size argument return value
6356
6357 small integer <4 int. reg. int. reg.
6358 word 4 int. reg. int. reg.
6359 double word 8 int. reg. int. reg.
6360
6361 _Complex small integer <8 int. reg. int. reg.
6362 _Complex word 8 int. reg. int. reg.
6363 _Complex double word 16 memory int. reg.
6364
6365 vector integer <=8 int. reg. FP reg.
6366 vector integer >8 memory memory
6367
6368 float 4 int. reg. FP reg.
6369 double 8 int. reg. FP reg.
6370 long double 16 memory memory
6371
6372 _Complex float 8 memory FP reg.
6373 _Complex double 16 memory FP reg.
6374 _Complex long double 32 memory FP reg.
6375
6376 vector float any memory memory
6377
6378 aggregate any memory memory
6379
6380
6381
6382 64-bit ABI:
6383 size argument return value
6384
6385 small integer <8 int. reg. int. reg.
6386 word 8 int. reg. int. reg.
6387 double word 16 int. reg. int. reg.
6388
6389 _Complex small integer <16 int. reg. int. reg.
6390 _Complex word 16 int. reg. int. reg.
6391 _Complex double word 32 memory int. reg.
6392
6393 vector integer <=16 FP reg. FP reg.
6394 vector integer 16<s<=32 memory FP reg.
6395 vector integer >32 memory memory
6396
6397 float 4 FP reg. FP reg.
6398 double 8 FP reg. FP reg.
6399 long double 16 FP reg. FP reg.
6400
6401 _Complex float 8 FP reg. FP reg.
6402 _Complex double 16 FP reg. FP reg.
6403 _Complex long double 32 memory FP reg.
6404
6405 vector float <=16 FP reg. FP reg.
6406 vector float 16<s<=32 memory FP reg.
6407 vector float >32 memory memory
6408
6409 aggregate <=16 reg. reg.
6410 aggregate 16<s<=32 memory reg.
6411 aggregate >32 memory memory
6412
6413
6414
6415 Note #1: complex floating-point types follow the extended SPARC ABIs as
6416 implemented by the Sun compiler.
6417
6418 Note #2: integral vector types follow the scalar floating-point types
6419 conventions to match what is implemented by the Sun VIS SDK.
6420
6421 Note #3: floating-point vector types follow the aggregate types
6422 conventions. */
6423
6424
6425 /* Maximum number of int regs for args. */
6426 #define SPARC_INT_ARG_MAX 6
6427 /* Maximum number of fp regs for args. */
6428 #define SPARC_FP_ARG_MAX 16
6429 /* Number of words (partially) occupied for a given size in units. */
6430 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6431
6432 /* Handle the INIT_CUMULATIVE_ARGS macro.
6433 Initialize a variable CUM of type CUMULATIVE_ARGS
6434 for a call to a function whose data type is FNTYPE.
6435 For a library call, FNTYPE is 0. */
6436
6437 void
6438 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6439 {
6440 cum->words = 0;
6441 cum->prototype_p = fntype && prototype_p (fntype);
6442 cum->libcall_p = !fntype;
6443 }
6444
6445 /* Handle promotion of pointer and integer arguments. */
6446
6447 static machine_mode
6448 sparc_promote_function_mode (const_tree type, machine_mode mode,
6449 int *punsignedp, const_tree, int)
6450 {
6451 if (type && POINTER_TYPE_P (type))
6452 {
6453 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6454 return Pmode;
6455 }
6456
6457 /* Integral arguments are passed as full words, as per the ABI. */
6458 if (GET_MODE_CLASS (mode) == MODE_INT
6459 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6460 return word_mode;
6461
6462 return mode;
6463 }
6464
6465 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6466
6467 static bool
6468 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6469 {
6470 return TARGET_ARCH64 ? true : false;
6471 }
6472
6473 /* Traverse the record TYPE recursively and call FUNC on its fields.
6474 NAMED is true if this is for a named parameter. DATA is passed
6475 to FUNC for each field. OFFSET is the starting position and
6476 PACKED is true if we are inside a packed record. */
6477
6478 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6479 static void
6480 traverse_record_type (const_tree type, bool named, T *data,
6481 HOST_WIDE_INT offset = 0, bool packed = false)
6482 {
6483 /* The ABI obviously doesn't specify how packed structures are passed.
6484 These are passed in integer regs if possible, otherwise memory. */
6485 if (!packed)
6486 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6487 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6488 {
6489 packed = true;
6490 break;
6491 }
6492
6493 /* Walk the real fields, but skip those with no size or a zero size.
6494 ??? Fields with variable offset are handled as having zero offset. */
6495 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6496 if (TREE_CODE (field) == FIELD_DECL)
6497 {
6498 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6499 continue;
6500
6501 HOST_WIDE_INT bitpos = offset;
6502 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6503 bitpos += int_bit_position (field);
6504
6505 tree field_type = TREE_TYPE (field);
6506 if (TREE_CODE (field_type) == RECORD_TYPE)
6507 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6508 packed);
6509 else
6510 {
6511 const bool fp_type
6512 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6513 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6514 data);
6515 }
6516 }
6517 }
6518
6519 /* Handle recursive register classifying for structure layout. */
6520
6521 typedef struct
6522 {
6523 bool fp_regs; /* true if field eligible to FP registers. */
6524 bool fp_regs_in_first_word; /* true if such field in first word. */
6525 } classify_data_t;
6526
6527 /* A subroutine of function_arg_slotno. Classify the field. */
6528
6529 inline void
6530 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6531 classify_data_t *data)
6532 {
6533 if (fp)
6534 {
6535 data->fp_regs = true;
6536 if (bitpos < BITS_PER_WORD)
6537 data->fp_regs_in_first_word = true;
6538 }
6539 }
6540
6541 /* Compute the slot number to pass an argument in.
6542 Return the slot number or -1 if passing on the stack.
6543
6544 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6545 the preceding args and about the function being called.
6546 MODE is the argument's machine mode.
6547 TYPE is the data type of the argument (as a tree).
6548 This is null for libcalls where that information may
6549 not be available.
6550 NAMED is nonzero if this argument is a named parameter
6551 (otherwise it is an extra parameter matching an ellipsis).
6552 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6553 *PREGNO records the register number to use if scalar type.
6554 *PPADDING records the amount of padding needed in words. */
6555
6556 static int
6557 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6558 const_tree type, bool named, bool incoming,
6559 int *pregno, int *ppadding)
6560 {
6561 int regbase = (incoming
6562 ? SPARC_INCOMING_INT_ARG_FIRST
6563 : SPARC_OUTGOING_INT_ARG_FIRST);
6564 int slotno = cum->words;
6565 enum mode_class mclass;
6566 int regno;
6567
6568 *ppadding = 0;
6569
6570 if (type && TREE_ADDRESSABLE (type))
6571 return -1;
6572
6573 if (TARGET_ARCH32
6574 && mode == BLKmode
6575 && type
6576 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6577 return -1;
6578
6579 /* For SPARC64, objects requiring 16-byte alignment get it. */
6580 if (TARGET_ARCH64
6581 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6582 && (slotno & 1) != 0)
6583 slotno++, *ppadding = 1;
6584
6585 mclass = GET_MODE_CLASS (mode);
6586 if (type && TREE_CODE (type) == VECTOR_TYPE)
6587 {
6588 /* Vector types deserve special treatment because they are
6589 polymorphic wrt their mode, depending upon whether VIS
6590 instructions are enabled. */
6591 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6592 {
6593 /* The SPARC port defines no floating-point vector modes. */
6594 gcc_assert (mode == BLKmode);
6595 }
6596 else
6597 {
6598 /* Integral vector types should either have a vector
6599 mode or an integral mode, because we are guaranteed
6600 by pass_by_reference that their size is not greater
6601 than 16 bytes and TImode is 16-byte wide. */
6602 gcc_assert (mode != BLKmode);
6603
6604 /* Vector integers are handled like floats according to
6605 the Sun VIS SDK. */
6606 mclass = MODE_FLOAT;
6607 }
6608 }
6609
6610 switch (mclass)
6611 {
6612 case MODE_FLOAT:
6613 case MODE_COMPLEX_FLOAT:
6614 case MODE_VECTOR_INT:
6615 if (TARGET_ARCH64 && TARGET_FPU && named)
6616 {
6617 /* If all arg slots are filled, then must pass on stack. */
6618 if (slotno >= SPARC_FP_ARG_MAX)
6619 return -1;
6620
6621 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6622 /* Arguments filling only one single FP register are
6623 right-justified in the outer double FP register. */
6624 if (GET_MODE_SIZE (mode) <= 4)
6625 regno++;
6626 break;
6627 }
6628 /* fallthrough */
6629
6630 case MODE_INT:
6631 case MODE_COMPLEX_INT:
6632 /* If all arg slots are filled, then must pass on stack. */
6633 if (slotno >= SPARC_INT_ARG_MAX)
6634 return -1;
6635
6636 regno = regbase + slotno;
6637 break;
6638
6639 case MODE_RANDOM:
6640 if (mode == VOIDmode)
6641 /* MODE is VOIDmode when generating the actual call. */
6642 return -1;
6643
6644 gcc_assert (mode == BLKmode);
6645
6646 if (TARGET_ARCH32
6647 || !type
6648 || (TREE_CODE (type) != RECORD_TYPE
6649 && TREE_CODE (type) != VECTOR_TYPE))
6650 {
6651 /* If all arg slots are filled, then must pass on stack. */
6652 if (slotno >= SPARC_INT_ARG_MAX)
6653 return -1;
6654
6655 regno = regbase + slotno;
6656 }
6657 else /* TARGET_ARCH64 && type */
6658 {
6659 /* If all arg slots are filled, then must pass on stack. */
6660 if (slotno >= SPARC_FP_ARG_MAX)
6661 return -1;
6662
6663 if (TREE_CODE (type) == RECORD_TYPE)
6664 {
6665 classify_data_t data = { false, false };
6666 traverse_record_type<classify_data_t, classify_registers>
6667 (type, named, &data);
6668
6669 if (data.fp_regs)
6670 {
6671 /* If all FP slots are filled except for the last one and
6672 there is no FP field in the first word, then must pass
6673 on stack. */
6674 if (slotno >= SPARC_FP_ARG_MAX - 1
6675 && !data.fp_regs_in_first_word)
6676 return -1;
6677 }
6678 else
6679 {
6680 /* If all int slots are filled, then must pass on stack. */
6681 if (slotno >= SPARC_INT_ARG_MAX)
6682 return -1;
6683 }
6684 }
6685
6686 /* PREGNO isn't set since both int and FP regs can be used. */
6687 return slotno;
6688 }
6689 break;
6690
6691 default :
6692 gcc_unreachable ();
6693 }
6694
6695 *pregno = regno;
6696 return slotno;
6697 }
6698
6699 /* Handle recursive register counting/assigning for structure layout. */
6700
6701 typedef struct
6702 {
6703 int slotno; /* slot number of the argument. */
6704 int regbase; /* regno of the base register. */
6705 int intoffset; /* offset of the first pending integer field. */
6706 int nregs; /* number of words passed in registers. */
6707 bool stack; /* true if part of the argument is on the stack. */
6708 rtx ret; /* return expression being built. */
6709 } assign_data_t;
6710
6711 /* A subroutine of function_arg_record_value. Compute the number of integer
6712 registers to be assigned between PARMS->intoffset and BITPOS. Return
6713 true if at least one integer register is assigned or false otherwise. */
6714
6715 static bool
6716 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6717 {
6718 if (data->intoffset < 0)
6719 return false;
6720
6721 const int intoffset = data->intoffset;
6722 data->intoffset = -1;
6723
6724 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6725 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6726 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6727 int nregs = (endbit - startbit) / BITS_PER_WORD;
6728
6729 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6730 {
6731 nregs = SPARC_INT_ARG_MAX - this_slotno;
6732
6733 /* We need to pass this field (partly) on the stack. */
6734 data->stack = 1;
6735 }
6736
6737 if (nregs <= 0)
6738 return false;
6739
6740 *pnregs = nregs;
6741 return true;
6742 }
6743
6744 /* A subroutine of function_arg_record_value. Compute the number and the mode
6745 of the FP registers to be assigned for FIELD. Return true if at least one
6746 FP register is assigned or false otherwise. */
6747
6748 static bool
6749 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6750 assign_data_t *data,
6751 int *pnregs, machine_mode *pmode)
6752 {
6753 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6754 machine_mode mode = DECL_MODE (field);
6755 int nregs, nslots;
6756
6757 /* Slots are counted as words while regs are counted as having the size of
6758 the (inner) mode. */
6759 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6760 {
6761 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6762 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6763 }
6764 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6765 {
6766 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6767 nregs = 2;
6768 }
6769 else
6770 nregs = 1;
6771
6772 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6773
6774 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6775 {
6776 nslots = SPARC_FP_ARG_MAX - this_slotno;
6777 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6778
6779 /* We need to pass this field (partly) on the stack. */
6780 data->stack = 1;
6781
6782 if (nregs <= 0)
6783 return false;
6784 }
6785
6786 *pnregs = nregs;
6787 *pmode = mode;
6788 return true;
6789 }
6790
6791 /* A subroutine of function_arg_record_value. Count the number of registers
6792 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6793
6794 inline void
6795 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6796 assign_data_t *data)
6797 {
6798 if (fp)
6799 {
6800 int nregs;
6801 machine_mode mode;
6802
6803 if (compute_int_layout (bitpos, data, &nregs))
6804 data->nregs += nregs;
6805
6806 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6807 data->nregs += nregs;
6808 }
6809 else
6810 {
6811 if (data->intoffset < 0)
6812 data->intoffset = bitpos;
6813 }
6814 }
6815
6816 /* A subroutine of function_arg_record_value. Assign the bits of the
6817 structure between PARMS->intoffset and BITPOS to integer registers. */
6818
6819 static void
6820 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6821 {
6822 int intoffset = data->intoffset;
6823 machine_mode mode;
6824 int nregs;
6825
6826 if (!compute_int_layout (bitpos, data, &nregs))
6827 return;
6828
6829 /* If this is the trailing part of a word, only load that much into
6830 the register. Otherwise load the whole register. Note that in
6831 the latter case we may pick up unwanted bits. It's not a problem
6832 at the moment but may wish to revisit. */
6833 if (intoffset % BITS_PER_WORD != 0)
6834 mode = smallest_int_mode_for_size (BITS_PER_WORD
6835 - intoffset % BITS_PER_WORD);
6836 else
6837 mode = word_mode;
6838
6839 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6840 unsigned int regno = data->regbase + this_slotno;
6841 intoffset /= BITS_PER_UNIT;
6842
6843 do
6844 {
6845 rtx reg = gen_rtx_REG (mode, regno);
6846 XVECEXP (data->ret, 0, data->stack + data->nregs)
6847 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6848 data->nregs += 1;
6849 mode = word_mode;
6850 regno += 1;
6851 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6852 }
6853 while (--nregs > 0);
6854 }
6855
6856 /* A subroutine of function_arg_record_value. Assign FIELD at position
6857 BITPOS to FP registers. */
6858
6859 static void
6860 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6861 assign_data_t *data)
6862 {
6863 int nregs;
6864 machine_mode mode;
6865
6866 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6867 return;
6868
6869 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6870 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6871 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6872 regno++;
6873 int pos = bitpos / BITS_PER_UNIT;
6874
6875 do
6876 {
6877 rtx reg = gen_rtx_REG (mode, regno);
6878 XVECEXP (data->ret, 0, data->stack + data->nregs)
6879 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6880 data->nregs += 1;
6881 regno += GET_MODE_SIZE (mode) / 4;
6882 pos += GET_MODE_SIZE (mode);
6883 }
6884 while (--nregs > 0);
6885 }
6886
6887 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6888 the structure between PARMS->intoffset and BITPOS to registers. */
6889
6890 inline void
6891 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6892 assign_data_t *data)
6893 {
6894 if (fp)
6895 {
6896 assign_int_registers (bitpos, data);
6897
6898 assign_fp_registers (field, bitpos, data);
6899 }
6900 else
6901 {
6902 if (data->intoffset < 0)
6903 data->intoffset = bitpos;
6904 }
6905 }
6906
6907 /* Used by function_arg and sparc_function_value_1 to implement the complex
6908 conventions of the 64-bit ABI for passing and returning structures.
6909 Return an expression valid as a return value for the FUNCTION_ARG
6910 and TARGET_FUNCTION_VALUE.
6911
6912 TYPE is the data type of the argument (as a tree).
6913 This is null for libcalls where that information may
6914 not be available.
6915 MODE is the argument's machine mode.
6916 SLOTNO is the index number of the argument's slot in the parameter array.
6917 NAMED is true if this argument is a named parameter
6918 (otherwise it is an extra parameter matching an ellipsis).
6919 REGBASE is the regno of the base register for the parameter array. */
6920
6921 static rtx
6922 function_arg_record_value (const_tree type, machine_mode mode,
6923 int slotno, bool named, int regbase)
6924 {
6925 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6926 assign_data_t data;
6927 int nregs;
6928
6929 data.slotno = slotno;
6930 data.regbase = regbase;
6931
6932 /* Count how many registers we need. */
6933 data.nregs = 0;
6934 data.intoffset = 0;
6935 data.stack = false;
6936 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6937
6938 /* Take into account pending integer fields. */
6939 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6940 data.nregs += nregs;
6941
6942 /* Allocate the vector and handle some annoying special cases. */
6943 nregs = data.nregs;
6944
6945 if (nregs == 0)
6946 {
6947 /* ??? Empty structure has no value? Duh? */
6948 if (typesize <= 0)
6949 {
6950 /* Though there's nothing really to store, return a word register
6951 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6952 leads to breakage due to the fact that there are zero bytes to
6953 load. */
6954 return gen_rtx_REG (mode, regbase);
6955 }
6956
6957 /* ??? C++ has structures with no fields, and yet a size. Give up
6958 for now and pass everything back in integer registers. */
6959 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6960 if (nregs + slotno > SPARC_INT_ARG_MAX)
6961 nregs = SPARC_INT_ARG_MAX - slotno;
6962 }
6963
6964 gcc_assert (nregs > 0);
6965
6966 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6967
6968 /* If at least one field must be passed on the stack, generate
6969 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6970 also be passed on the stack. We can't do much better because the
6971 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6972 of structures for which the fields passed exclusively in registers
6973 are not at the beginning of the structure. */
6974 if (data.stack)
6975 XVECEXP (data.ret, 0, 0)
6976 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6977
6978 /* Assign the registers. */
6979 data.nregs = 0;
6980 data.intoffset = 0;
6981 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6982
6983 /* Assign pending integer fields. */
6984 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6985
6986 gcc_assert (data.nregs == nregs);
6987
6988 return data.ret;
6989 }
6990
6991 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6992 of the 64-bit ABI for passing and returning unions.
6993 Return an expression valid as a return value for the FUNCTION_ARG
6994 and TARGET_FUNCTION_VALUE.
6995
6996 SIZE is the size in bytes of the union.
6997 MODE is the argument's machine mode.
6998 REGNO is the hard register the union will be passed in. */
6999
7000 static rtx
7001 function_arg_union_value (int size, machine_mode mode, int slotno,
7002 int regno)
7003 {
7004 int nwords = CEIL_NWORDS (size), i;
7005 rtx regs;
7006
7007 /* See comment in previous function for empty structures. */
7008 if (nwords == 0)
7009 return gen_rtx_REG (mode, regno);
7010
7011 if (slotno == SPARC_INT_ARG_MAX - 1)
7012 nwords = 1;
7013
7014 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7015
7016 for (i = 0; i < nwords; i++)
7017 {
7018 /* Unions are passed left-justified. */
7019 XVECEXP (regs, 0, i)
7020 = gen_rtx_EXPR_LIST (VOIDmode,
7021 gen_rtx_REG (word_mode, regno),
7022 GEN_INT (UNITS_PER_WORD * i));
7023 regno++;
7024 }
7025
7026 return regs;
7027 }
7028
7029 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7030 for passing and returning BLKmode vectors.
7031 Return an expression valid as a return value for the FUNCTION_ARG
7032 and TARGET_FUNCTION_VALUE.
7033
7034 SIZE is the size in bytes of the vector.
7035 REGNO is the FP hard register the vector will be passed in. */
7036
7037 static rtx
7038 function_arg_vector_value (int size, int regno)
7039 {
7040 const int nregs = MAX (1, size / 8);
7041 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7042
7043 if (size < 8)
7044 XVECEXP (regs, 0, 0)
7045 = gen_rtx_EXPR_LIST (VOIDmode,
7046 gen_rtx_REG (SImode, regno),
7047 const0_rtx);
7048 else
7049 for (int i = 0; i < nregs; i++)
7050 XVECEXP (regs, 0, i)
7051 = gen_rtx_EXPR_LIST (VOIDmode,
7052 gen_rtx_REG (DImode, regno + 2*i),
7053 GEN_INT (i*8));
7054
7055 return regs;
7056 }
7057
7058 /* Determine where to put an argument to a function.
7059 Value is zero to push the argument on the stack,
7060 or a hard register in which to store the argument.
7061
7062 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7063 the preceding args and about the function being called.
7064 MODE is the argument's machine mode.
7065 TYPE is the data type of the argument (as a tree).
7066 This is null for libcalls where that information may
7067 not be available.
7068 NAMED is true if this argument is a named parameter
7069 (otherwise it is an extra parameter matching an ellipsis).
7070 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7071 TARGET_FUNCTION_INCOMING_ARG. */
7072
7073 static rtx
7074 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7075 const_tree type, bool named, bool incoming)
7076 {
7077 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7078
7079 int regbase = (incoming
7080 ? SPARC_INCOMING_INT_ARG_FIRST
7081 : SPARC_OUTGOING_INT_ARG_FIRST);
7082 int slotno, regno, padding;
7083 enum mode_class mclass = GET_MODE_CLASS (mode);
7084
7085 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7086 &regno, &padding);
7087 if (slotno == -1)
7088 return 0;
7089
7090 /* Vector types deserve special treatment because they are polymorphic wrt
7091 their mode, depending upon whether VIS instructions are enabled. */
7092 if (type && TREE_CODE (type) == VECTOR_TYPE)
7093 {
7094 HOST_WIDE_INT size = int_size_in_bytes (type);
7095 gcc_assert ((TARGET_ARCH32 && size <= 8)
7096 || (TARGET_ARCH64 && size <= 16));
7097
7098 if (mode == BLKmode)
7099 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7100
7101 mclass = MODE_FLOAT;
7102 }
7103
7104 if (TARGET_ARCH32)
7105 return gen_rtx_REG (mode, regno);
7106
7107 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7108 and are promoted to registers if possible. */
7109 if (type && TREE_CODE (type) == RECORD_TYPE)
7110 {
7111 HOST_WIDE_INT size = int_size_in_bytes (type);
7112 gcc_assert (size <= 16);
7113
7114 return function_arg_record_value (type, mode, slotno, named, regbase);
7115 }
7116
7117 /* Unions up to 16 bytes in size are passed in integer registers. */
7118 else if (type && TREE_CODE (type) == UNION_TYPE)
7119 {
7120 HOST_WIDE_INT size = int_size_in_bytes (type);
7121 gcc_assert (size <= 16);
7122
7123 return function_arg_union_value (size, mode, slotno, regno);
7124 }
7125
7126 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7127 but also have the slot allocated for them.
7128 If no prototype is in scope fp values in register slots get passed
7129 in two places, either fp regs and int regs or fp regs and memory. */
7130 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7131 && SPARC_FP_REG_P (regno))
7132 {
7133 rtx reg = gen_rtx_REG (mode, regno);
7134 if (cum->prototype_p || cum->libcall_p)
7135 return reg;
7136 else
7137 {
7138 rtx v0, v1;
7139
7140 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7141 {
7142 int intreg;
7143
7144 /* On incoming, we don't need to know that the value
7145 is passed in %f0 and %i0, and it confuses other parts
7146 causing needless spillage even on the simplest cases. */
7147 if (incoming)
7148 return reg;
7149
7150 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7151 + (regno - SPARC_FP_ARG_FIRST) / 2);
7152
7153 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7154 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7155 const0_rtx);
7156 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7157 }
7158 else
7159 {
7160 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7161 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7162 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7163 }
7164 }
7165 }
7166
7167 /* All other aggregate types are passed in an integer register in a mode
7168 corresponding to the size of the type. */
7169 else if (type && AGGREGATE_TYPE_P (type))
7170 {
7171 HOST_WIDE_INT size = int_size_in_bytes (type);
7172 gcc_assert (size <= 16);
7173
7174 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7175 }
7176
7177 return gen_rtx_REG (mode, regno);
7178 }
7179
7180 /* Handle the TARGET_FUNCTION_ARG target hook. */
7181
7182 static rtx
7183 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7184 const_tree type, bool named)
7185 {
7186 return sparc_function_arg_1 (cum, mode, type, named, false);
7187 }
7188
7189 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7190
7191 static rtx
7192 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7193 const_tree type, bool named)
7194 {
7195 return sparc_function_arg_1 (cum, mode, type, named, true);
7196 }
7197
7198 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7199
7200 static unsigned int
7201 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7202 {
7203 return ((TARGET_ARCH64
7204 && (GET_MODE_ALIGNMENT (mode) == 128
7205 || (type && TYPE_ALIGN (type) == 128)))
7206 ? 128
7207 : PARM_BOUNDARY);
7208 }
7209
7210 /* For an arg passed partly in registers and partly in memory,
7211 this is the number of bytes of registers used.
7212 For args passed entirely in registers or entirely in memory, zero.
7213
7214 Any arg that starts in the first 6 regs but won't entirely fit in them
7215 needs partial registers on v8. On v9, structures with integer
7216 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7217 values that begin in the last fp reg [where "last fp reg" varies with the
7218 mode] will be split between that reg and memory. */
7219
7220 static int
7221 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7222 tree type, bool named)
7223 {
7224 int slotno, regno, padding;
7225
7226 /* We pass false for incoming here, it doesn't matter. */
7227 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7228 false, &regno, &padding);
7229
7230 if (slotno == -1)
7231 return 0;
7232
7233 if (TARGET_ARCH32)
7234 {
7235 if ((slotno + (mode == BLKmode
7236 ? CEIL_NWORDS (int_size_in_bytes (type))
7237 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7238 > SPARC_INT_ARG_MAX)
7239 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7240 }
7241 else
7242 {
7243 /* We are guaranteed by pass_by_reference that the size of the
7244 argument is not greater than 16 bytes, so we only need to return
7245 one word if the argument is partially passed in registers. */
7246
7247 if (type && AGGREGATE_TYPE_P (type))
7248 {
7249 int size = int_size_in_bytes (type);
7250
7251 if (size > UNITS_PER_WORD
7252 && (slotno == SPARC_INT_ARG_MAX - 1
7253 || slotno == SPARC_FP_ARG_MAX - 1))
7254 return UNITS_PER_WORD;
7255 }
7256 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7257 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7258 && ! (TARGET_FPU && named)))
7259 {
7260 /* The complex types are passed as packed types. */
7261 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7262 && slotno == SPARC_INT_ARG_MAX - 1)
7263 return UNITS_PER_WORD;
7264 }
7265 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7266 {
7267 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7268 > SPARC_FP_ARG_MAX)
7269 return UNITS_PER_WORD;
7270 }
7271 }
7272
7273 return 0;
7274 }
7275
7276 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7277 Specify whether to pass the argument by reference. */
7278
7279 static bool
7280 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7281 machine_mode mode, const_tree type,
7282 bool named ATTRIBUTE_UNUSED)
7283 {
7284 if (TARGET_ARCH32)
7285 /* Original SPARC 32-bit ABI says that structures and unions,
7286 and quad-precision floats are passed by reference. For Pascal,
7287 also pass arrays by reference. All other base types are passed
7288 in registers.
7289
7290 Extended ABI (as implemented by the Sun compiler) says that all
7291 complex floats are passed by reference. Pass complex integers
7292 in registers up to 8 bytes. More generally, enforce the 2-word
7293 cap for passing arguments in registers.
7294
7295 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7296 integers are passed like floats of the same size, that is in
7297 registers up to 8 bytes. Pass all vector floats by reference
7298 like structure and unions. */
7299 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7300 || mode == SCmode
7301 /* Catch CDImode, TFmode, DCmode and TCmode. */
7302 || GET_MODE_SIZE (mode) > 8
7303 || (type
7304 && TREE_CODE (type) == VECTOR_TYPE
7305 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7306 else
7307 /* Original SPARC 64-bit ABI says that structures and unions
7308 smaller than 16 bytes are passed in registers, as well as
7309 all other base types.
7310
7311 Extended ABI (as implemented by the Sun compiler) says that
7312 complex floats are passed in registers up to 16 bytes. Pass
7313 all complex integers in registers up to 16 bytes. More generally,
7314 enforce the 2-word cap for passing arguments in registers.
7315
7316 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7317 integers are passed like floats of the same size, that is in
7318 registers (up to 16 bytes). Pass all vector floats like structure
7319 and unions. */
7320 return ((type
7321 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7322 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7323 /* Catch CTImode and TCmode. */
7324 || GET_MODE_SIZE (mode) > 16);
7325 }
7326
7327 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7328 Update the data in CUM to advance over an argument
7329 of mode MODE and data type TYPE.
7330 TYPE is null for libcalls where that information may not be available. */
7331
7332 static void
7333 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7334 const_tree type, bool named)
7335 {
7336 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7337 int regno, padding;
7338
7339 /* We pass false for incoming here, it doesn't matter. */
7340 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7341
7342 /* If argument requires leading padding, add it. */
7343 cum->words += padding;
7344
7345 if (TARGET_ARCH32)
7346 cum->words += (mode == BLKmode
7347 ? CEIL_NWORDS (int_size_in_bytes (type))
7348 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7349 else
7350 {
7351 if (type && AGGREGATE_TYPE_P (type))
7352 {
7353 int size = int_size_in_bytes (type);
7354
7355 if (size <= 8)
7356 ++cum->words;
7357 else if (size <= 16)
7358 cum->words += 2;
7359 else /* passed by reference */
7360 ++cum->words;
7361 }
7362 else
7363 cum->words += (mode == BLKmode
7364 ? CEIL_NWORDS (int_size_in_bytes (type))
7365 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7366 }
7367 }
7368
7369 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7370 are always stored left shifted in their argument slot. */
7371
7372 static pad_direction
7373 sparc_function_arg_padding (machine_mode mode, const_tree type)
7374 {
7375 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7376 return PAD_UPWARD;
7377
7378 /* Fall back to the default. */
7379 return default_function_arg_padding (mode, type);
7380 }
7381
7382 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7383 Specify whether to return the return value in memory. */
7384
7385 static bool
7386 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7387 {
7388 if (TARGET_ARCH32)
7389 /* Original SPARC 32-bit ABI says that structures and unions,
7390 and quad-precision floats are returned in memory. All other
7391 base types are returned in registers.
7392
7393 Extended ABI (as implemented by the Sun compiler) says that
7394 all complex floats are returned in registers (8 FP registers
7395 at most for '_Complex long double'). Return all complex integers
7396 in registers (4 at most for '_Complex long long').
7397
7398 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7399 integers are returned like floats of the same size, that is in
7400 registers up to 8 bytes and in memory otherwise. Return all
7401 vector floats in memory like structure and unions; note that
7402 they always have BLKmode like the latter. */
7403 return (TYPE_MODE (type) == BLKmode
7404 || TYPE_MODE (type) == TFmode
7405 || (TREE_CODE (type) == VECTOR_TYPE
7406 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7407 else
7408 /* Original SPARC 64-bit ABI says that structures and unions
7409 smaller than 32 bytes are returned in registers, as well as
7410 all other base types.
7411
7412 Extended ABI (as implemented by the Sun compiler) says that all
7413 complex floats are returned in registers (8 FP registers at most
7414 for '_Complex long double'). Return all complex integers in
7415 registers (4 at most for '_Complex TItype').
7416
7417 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7418 integers are returned like floats of the same size, that is in
7419 registers. Return all vector floats like structure and unions;
7420 note that they always have BLKmode like the latter. */
7421 return (TYPE_MODE (type) == BLKmode
7422 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7423 }
7424
7425 /* Handle the TARGET_STRUCT_VALUE target hook.
7426 Return where to find the structure return value address. */
7427
7428 static rtx
7429 sparc_struct_value_rtx (tree fndecl, int incoming)
7430 {
7431 if (TARGET_ARCH64)
7432 return 0;
7433 else
7434 {
7435 rtx mem;
7436
7437 if (incoming)
7438 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7439 STRUCT_VALUE_OFFSET));
7440 else
7441 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7442 STRUCT_VALUE_OFFSET));
7443
7444 /* Only follow the SPARC ABI for fixed-size structure returns.
7445 Variable size structure returns are handled per the normal
7446 procedures in GCC. This is enabled by -mstd-struct-return */
7447 if (incoming == 2
7448 && sparc_std_struct_return
7449 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7450 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7451 {
7452 /* We must check and adjust the return address, as it is optional
7453 as to whether the return object is really provided. */
7454 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7455 rtx scratch = gen_reg_rtx (SImode);
7456 rtx_code_label *endlab = gen_label_rtx ();
7457
7458 /* Calculate the return object size. */
7459 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7460 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7461 /* Construct a temporary return value. */
7462 rtx temp_val
7463 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7464
7465 /* Implement SPARC 32-bit psABI callee return struct checking:
7466
7467 Fetch the instruction where we will return to and see if
7468 it's an unimp instruction (the most significant 10 bits
7469 will be zero). */
7470 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7471 plus_constant (Pmode,
7472 ret_reg, 8)));
7473 /* Assume the size is valid and pre-adjust. */
7474 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7475 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7476 0, endlab);
7477 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7478 /* Write the address of the memory pointed to by temp_val into
7479 the memory pointed to by mem. */
7480 emit_move_insn (mem, XEXP (temp_val, 0));
7481 emit_label (endlab);
7482 }
7483
7484 return mem;
7485 }
7486 }
7487
7488 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7489 For v9, function return values are subject to the same rules as arguments,
7490 except that up to 32 bytes may be returned in registers. */
7491
7492 static rtx
7493 sparc_function_value_1 (const_tree type, machine_mode mode,
7494 bool outgoing)
7495 {
7496 /* Beware that the two values are swapped here wrt function_arg. */
7497 int regbase = (outgoing
7498 ? SPARC_INCOMING_INT_ARG_FIRST
7499 : SPARC_OUTGOING_INT_ARG_FIRST);
7500 enum mode_class mclass = GET_MODE_CLASS (mode);
7501 int regno;
7502
7503 /* Vector types deserve special treatment because they are polymorphic wrt
7504 their mode, depending upon whether VIS instructions are enabled. */
7505 if (type && TREE_CODE (type) == VECTOR_TYPE)
7506 {
7507 HOST_WIDE_INT size = int_size_in_bytes (type);
7508 gcc_assert ((TARGET_ARCH32 && size <= 8)
7509 || (TARGET_ARCH64 && size <= 32));
7510
7511 if (mode == BLKmode)
7512 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7513
7514 mclass = MODE_FLOAT;
7515 }
7516
7517 if (TARGET_ARCH64 && type)
7518 {
7519 /* Structures up to 32 bytes in size are returned in registers. */
7520 if (TREE_CODE (type) == RECORD_TYPE)
7521 {
7522 HOST_WIDE_INT size = int_size_in_bytes (type);
7523 gcc_assert (size <= 32);
7524
7525 return function_arg_record_value (type, mode, 0, 1, regbase);
7526 }
7527
7528 /* Unions up to 32 bytes in size are returned in integer registers. */
7529 else if (TREE_CODE (type) == UNION_TYPE)
7530 {
7531 HOST_WIDE_INT size = int_size_in_bytes (type);
7532 gcc_assert (size <= 32);
7533
7534 return function_arg_union_value (size, mode, 0, regbase);
7535 }
7536
7537 /* Objects that require it are returned in FP registers. */
7538 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7539 ;
7540
7541 /* All other aggregate types are returned in an integer register in a
7542 mode corresponding to the size of the type. */
7543 else if (AGGREGATE_TYPE_P (type))
7544 {
7545 /* All other aggregate types are passed in an integer register
7546 in a mode corresponding to the size of the type. */
7547 HOST_WIDE_INT size = int_size_in_bytes (type);
7548 gcc_assert (size <= 32);
7549
7550 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7551
7552 /* ??? We probably should have made the same ABI change in
7553 3.4.0 as the one we made for unions. The latter was
7554 required by the SCD though, while the former is not
7555 specified, so we favored compatibility and efficiency.
7556
7557 Now we're stuck for aggregates larger than 16 bytes,
7558 because OImode vanished in the meantime. Let's not
7559 try to be unduly clever, and simply follow the ABI
7560 for unions in that case. */
7561 if (mode == BLKmode)
7562 return function_arg_union_value (size, mode, 0, regbase);
7563 else
7564 mclass = MODE_INT;
7565 }
7566
7567 /* We should only have pointer and integer types at this point. This
7568 must match sparc_promote_function_mode. */
7569 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7570 mode = word_mode;
7571 }
7572
7573 /* We should only have pointer and integer types at this point, except with
7574 -freg-struct-return. This must match sparc_promote_function_mode. */
7575 else if (TARGET_ARCH32
7576 && !(type && AGGREGATE_TYPE_P (type))
7577 && mclass == MODE_INT
7578 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7579 mode = word_mode;
7580
7581 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7582 regno = SPARC_FP_ARG_FIRST;
7583 else
7584 regno = regbase;
7585
7586 return gen_rtx_REG (mode, regno);
7587 }
7588
7589 /* Handle TARGET_FUNCTION_VALUE.
7590 On the SPARC, the value is found in the first "output" register, but the
7591 called function leaves it in the first "input" register. */
7592
7593 static rtx
7594 sparc_function_value (const_tree valtype,
7595 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7596 bool outgoing)
7597 {
7598 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7599 }
7600
7601 /* Handle TARGET_LIBCALL_VALUE. */
7602
7603 static rtx
7604 sparc_libcall_value (machine_mode mode,
7605 const_rtx fun ATTRIBUTE_UNUSED)
7606 {
7607 return sparc_function_value_1 (NULL_TREE, mode, false);
7608 }
7609
7610 /* Handle FUNCTION_VALUE_REGNO_P.
7611 On the SPARC, the first "output" reg is used for integer values, and the
7612 first floating point register is used for floating point values. */
7613
7614 static bool
7615 sparc_function_value_regno_p (const unsigned int regno)
7616 {
7617 return (regno == 8 || (TARGET_FPU && regno == 32));
7618 }
7619
7620 /* Do what is necessary for `va_start'. We look at the current function
7621 to determine if stdarg or varargs is used and return the address of
7622 the first unnamed parameter. */
7623
7624 static rtx
7625 sparc_builtin_saveregs (void)
7626 {
7627 int first_reg = crtl->args.info.words;
7628 rtx address;
7629 int regno;
7630
7631 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7632 emit_move_insn (gen_rtx_MEM (word_mode,
7633 gen_rtx_PLUS (Pmode,
7634 frame_pointer_rtx,
7635 GEN_INT (FIRST_PARM_OFFSET (0)
7636 + (UNITS_PER_WORD
7637 * regno)))),
7638 gen_rtx_REG (word_mode,
7639 SPARC_INCOMING_INT_ARG_FIRST + regno));
7640
7641 address = gen_rtx_PLUS (Pmode,
7642 frame_pointer_rtx,
7643 GEN_INT (FIRST_PARM_OFFSET (0)
7644 + UNITS_PER_WORD * first_reg));
7645
7646 return address;
7647 }
7648
7649 /* Implement `va_start' for stdarg. */
7650
7651 static void
7652 sparc_va_start (tree valist, rtx nextarg)
7653 {
7654 nextarg = expand_builtin_saveregs ();
7655 std_expand_builtin_va_start (valist, nextarg);
7656 }
7657
7658 /* Implement `va_arg' for stdarg. */
7659
7660 static tree
7661 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7662 gimple_seq *post_p)
7663 {
7664 HOST_WIDE_INT size, rsize, align;
7665 tree addr, incr;
7666 bool indirect;
7667 tree ptrtype = build_pointer_type (type);
7668
7669 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7670 {
7671 indirect = true;
7672 size = rsize = UNITS_PER_WORD;
7673 align = 0;
7674 }
7675 else
7676 {
7677 indirect = false;
7678 size = int_size_in_bytes (type);
7679 rsize = ROUND_UP (size, UNITS_PER_WORD);
7680 align = 0;
7681
7682 if (TARGET_ARCH64)
7683 {
7684 /* For SPARC64, objects requiring 16-byte alignment get it. */
7685 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7686 align = 2 * UNITS_PER_WORD;
7687
7688 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7689 are left-justified in their slots. */
7690 if (AGGREGATE_TYPE_P (type))
7691 {
7692 if (size == 0)
7693 size = rsize = UNITS_PER_WORD;
7694 else
7695 size = rsize;
7696 }
7697 }
7698 }
7699
7700 incr = valist;
7701 if (align)
7702 {
7703 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7704 incr = fold_convert (sizetype, incr);
7705 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7706 size_int (-align));
7707 incr = fold_convert (ptr_type_node, incr);
7708 }
7709
7710 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7711 addr = incr;
7712
7713 if (BYTES_BIG_ENDIAN && size < rsize)
7714 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7715
7716 if (indirect)
7717 {
7718 addr = fold_convert (build_pointer_type (ptrtype), addr);
7719 addr = build_va_arg_indirect_ref (addr);
7720 }
7721
7722 /* If the address isn't aligned properly for the type, we need a temporary.
7723 FIXME: This is inefficient, usually we can do this in registers. */
7724 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7725 {
7726 tree tmp = create_tmp_var (type, "va_arg_tmp");
7727 tree dest_addr = build_fold_addr_expr (tmp);
7728 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7729 3, dest_addr, addr, size_int (rsize));
7730 TREE_ADDRESSABLE (tmp) = 1;
7731 gimplify_and_add (copy, pre_p);
7732 addr = dest_addr;
7733 }
7734
7735 else
7736 addr = fold_convert (ptrtype, addr);
7737
7738 incr = fold_build_pointer_plus_hwi (incr, rsize);
7739 gimplify_assign (valist, incr, post_p);
7740
7741 return build_va_arg_indirect_ref (addr);
7742 }
7743 \f
7744 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7745 Specify whether the vector mode is supported by the hardware. */
7746
7747 static bool
7748 sparc_vector_mode_supported_p (machine_mode mode)
7749 {
7750 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7751 }
7752 \f
7753 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7754
7755 static machine_mode
7756 sparc_preferred_simd_mode (scalar_mode mode)
7757 {
7758 if (TARGET_VIS)
7759 switch (mode)
7760 {
7761 case E_SImode:
7762 return V2SImode;
7763 case E_HImode:
7764 return V4HImode;
7765 case E_QImode:
7766 return V8QImode;
7767
7768 default:;
7769 }
7770
7771 return word_mode;
7772 }
7773 \f
7774 /* Return the string to output an unconditional branch to LABEL, which is
7775 the operand number of the label.
7776
7777 DEST is the destination insn (i.e. the label), INSN is the source. */
7778
7779 const char *
7780 output_ubranch (rtx dest, rtx_insn *insn)
7781 {
7782 static char string[64];
7783 bool v9_form = false;
7784 int delta;
7785 char *p;
7786
7787 /* Even if we are trying to use cbcond for this, evaluate
7788 whether we can use V9 branches as our backup plan. */
7789
7790 delta = 5000000;
7791 if (INSN_ADDRESSES_SET_P ())
7792 delta = (INSN_ADDRESSES (INSN_UID (dest))
7793 - INSN_ADDRESSES (INSN_UID (insn)));
7794
7795 /* Leave some instructions for "slop". */
7796 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7797 v9_form = true;
7798
7799 if (TARGET_CBCOND)
7800 {
7801 bool emit_nop = emit_cbcond_nop (insn);
7802 bool far = false;
7803 const char *rval;
7804
7805 if (delta < -500 || delta > 500)
7806 far = true;
7807
7808 if (far)
7809 {
7810 if (v9_form)
7811 rval = "ba,a,pt\t%%xcc, %l0";
7812 else
7813 rval = "b,a\t%l0";
7814 }
7815 else
7816 {
7817 if (emit_nop)
7818 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7819 else
7820 rval = "cwbe\t%%g0, %%g0, %l0";
7821 }
7822 return rval;
7823 }
7824
7825 if (v9_form)
7826 strcpy (string, "ba%*,pt\t%%xcc, ");
7827 else
7828 strcpy (string, "b%*\t");
7829
7830 p = strchr (string, '\0');
7831 *p++ = '%';
7832 *p++ = 'l';
7833 *p++ = '0';
7834 *p++ = '%';
7835 *p++ = '(';
7836 *p = '\0';
7837
7838 return string;
7839 }
7840
7841 /* Return the string to output a conditional branch to LABEL, which is
7842 the operand number of the label. OP is the conditional expression.
7843 XEXP (OP, 0) is assumed to be a condition code register (integer or
7844 floating point) and its mode specifies what kind of comparison we made.
7845
7846 DEST is the destination insn (i.e. the label), INSN is the source.
7847
7848 REVERSED is nonzero if we should reverse the sense of the comparison.
7849
7850 ANNUL is nonzero if we should generate an annulling branch. */
7851
7852 const char *
7853 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7854 rtx_insn *insn)
7855 {
7856 static char string[64];
7857 enum rtx_code code = GET_CODE (op);
7858 rtx cc_reg = XEXP (op, 0);
7859 machine_mode mode = GET_MODE (cc_reg);
7860 const char *labelno, *branch;
7861 int spaces = 8, far;
7862 char *p;
7863
7864 /* v9 branches are limited to +-1MB. If it is too far away,
7865 change
7866
7867 bne,pt %xcc, .LC30
7868
7869 to
7870
7871 be,pn %xcc, .+12
7872 nop
7873 ba .LC30
7874
7875 and
7876
7877 fbne,a,pn %fcc2, .LC29
7878
7879 to
7880
7881 fbe,pt %fcc2, .+16
7882 nop
7883 ba .LC29 */
7884
7885 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7886 if (reversed ^ far)
7887 {
7888 /* Reversal of FP compares takes care -- an ordered compare
7889 becomes an unordered compare and vice versa. */
7890 if (mode == CCFPmode || mode == CCFPEmode)
7891 code = reverse_condition_maybe_unordered (code);
7892 else
7893 code = reverse_condition (code);
7894 }
7895
7896 /* Start by writing the branch condition. */
7897 if (mode == CCFPmode || mode == CCFPEmode)
7898 {
7899 switch (code)
7900 {
7901 case NE:
7902 branch = "fbne";
7903 break;
7904 case EQ:
7905 branch = "fbe";
7906 break;
7907 case GE:
7908 branch = "fbge";
7909 break;
7910 case GT:
7911 branch = "fbg";
7912 break;
7913 case LE:
7914 branch = "fble";
7915 break;
7916 case LT:
7917 branch = "fbl";
7918 break;
7919 case UNORDERED:
7920 branch = "fbu";
7921 break;
7922 case ORDERED:
7923 branch = "fbo";
7924 break;
7925 case UNGT:
7926 branch = "fbug";
7927 break;
7928 case UNLT:
7929 branch = "fbul";
7930 break;
7931 case UNEQ:
7932 branch = "fbue";
7933 break;
7934 case UNGE:
7935 branch = "fbuge";
7936 break;
7937 case UNLE:
7938 branch = "fbule";
7939 break;
7940 case LTGT:
7941 branch = "fblg";
7942 break;
7943 default:
7944 gcc_unreachable ();
7945 }
7946
7947 /* ??? !v9: FP branches cannot be preceded by another floating point
7948 insn. Because there is currently no concept of pre-delay slots,
7949 we can fix this only by always emitting a nop before a floating
7950 point branch. */
7951
7952 string[0] = '\0';
7953 if (! TARGET_V9)
7954 strcpy (string, "nop\n\t");
7955 strcat (string, branch);
7956 }
7957 else
7958 {
7959 switch (code)
7960 {
7961 case NE:
7962 if (mode == CCVmode || mode == CCXVmode)
7963 branch = "bvs";
7964 else
7965 branch = "bne";
7966 break;
7967 case EQ:
7968 if (mode == CCVmode || mode == CCXVmode)
7969 branch = "bvc";
7970 else
7971 branch = "be";
7972 break;
7973 case GE:
7974 if (mode == CCNZmode || mode == CCXNZmode)
7975 branch = "bpos";
7976 else
7977 branch = "bge";
7978 break;
7979 case GT:
7980 branch = "bg";
7981 break;
7982 case LE:
7983 branch = "ble";
7984 break;
7985 case LT:
7986 if (mode == CCNZmode || mode == CCXNZmode)
7987 branch = "bneg";
7988 else
7989 branch = "bl";
7990 break;
7991 case GEU:
7992 branch = "bgeu";
7993 break;
7994 case GTU:
7995 branch = "bgu";
7996 break;
7997 case LEU:
7998 branch = "bleu";
7999 break;
8000 case LTU:
8001 branch = "blu";
8002 break;
8003 default:
8004 gcc_unreachable ();
8005 }
8006 strcpy (string, branch);
8007 }
8008 spaces -= strlen (branch);
8009 p = strchr (string, '\0');
8010
8011 /* Now add the annulling, the label, and a possible noop. */
8012 if (annul && ! far)
8013 {
8014 strcpy (p, ",a");
8015 p += 2;
8016 spaces -= 2;
8017 }
8018
8019 if (TARGET_V9)
8020 {
8021 rtx note;
8022 int v8 = 0;
8023
8024 if (! far && insn && INSN_ADDRESSES_SET_P ())
8025 {
8026 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8027 - INSN_ADDRESSES (INSN_UID (insn)));
8028 /* Leave some instructions for "slop". */
8029 if (delta < -260000 || delta >= 260000)
8030 v8 = 1;
8031 }
8032
8033 switch (mode)
8034 {
8035 case E_CCmode:
8036 case E_CCNZmode:
8037 case E_CCCmode:
8038 case E_CCVmode:
8039 labelno = "%%icc, ";
8040 if (v8)
8041 labelno = "";
8042 break;
8043 case E_CCXmode:
8044 case E_CCXNZmode:
8045 case E_CCXCmode:
8046 case E_CCXVmode:
8047 labelno = "%%xcc, ";
8048 gcc_assert (!v8);
8049 break;
8050 case E_CCFPmode:
8051 case E_CCFPEmode:
8052 {
8053 static char v9_fcc_labelno[] = "%%fccX, ";
8054 /* Set the char indicating the number of the fcc reg to use. */
8055 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8056 labelno = v9_fcc_labelno;
8057 if (v8)
8058 {
8059 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8060 labelno = "";
8061 }
8062 }
8063 break;
8064 default:
8065 gcc_unreachable ();
8066 }
8067
8068 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8069 {
8070 strcpy (p,
8071 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8072 >= profile_probability::even ()) ^ far)
8073 ? ",pt" : ",pn");
8074 p += 3;
8075 spaces -= 3;
8076 }
8077 }
8078 else
8079 labelno = "";
8080
8081 if (spaces > 0)
8082 *p++ = '\t';
8083 else
8084 *p++ = ' ';
8085 strcpy (p, labelno);
8086 p = strchr (p, '\0');
8087 if (far)
8088 {
8089 strcpy (p, ".+12\n\t nop\n\tb\t");
8090 /* Skip the next insn if requested or
8091 if we know that it will be a nop. */
8092 if (annul || ! final_sequence)
8093 p[3] = '6';
8094 p += 14;
8095 }
8096 *p++ = '%';
8097 *p++ = 'l';
8098 *p++ = label + '0';
8099 *p++ = '%';
8100 *p++ = '#';
8101 *p = '\0';
8102
8103 return string;
8104 }
8105
8106 /* Emit a library call comparison between floating point X and Y.
8107 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8108 Return the new operator to be used in the comparison sequence.
8109
8110 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8111 values as arguments instead of the TFmode registers themselves,
8112 that's why we cannot call emit_float_lib_cmp. */
8113
8114 rtx
8115 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8116 {
8117 const char *qpfunc;
8118 rtx slot0, slot1, result, tem, tem2, libfunc;
8119 machine_mode mode;
8120 enum rtx_code new_comparison;
8121
8122 switch (comparison)
8123 {
8124 case EQ:
8125 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8126 break;
8127
8128 case NE:
8129 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8130 break;
8131
8132 case GT:
8133 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8134 break;
8135
8136 case GE:
8137 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8138 break;
8139
8140 case LT:
8141 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8142 break;
8143
8144 case LE:
8145 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8146 break;
8147
8148 case ORDERED:
8149 case UNORDERED:
8150 case UNGT:
8151 case UNLT:
8152 case UNEQ:
8153 case UNGE:
8154 case UNLE:
8155 case LTGT:
8156 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8157 break;
8158
8159 default:
8160 gcc_unreachable ();
8161 }
8162
8163 if (TARGET_ARCH64)
8164 {
8165 if (MEM_P (x))
8166 {
8167 tree expr = MEM_EXPR (x);
8168 if (expr)
8169 mark_addressable (expr);
8170 slot0 = x;
8171 }
8172 else
8173 {
8174 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8175 emit_move_insn (slot0, x);
8176 }
8177
8178 if (MEM_P (y))
8179 {
8180 tree expr = MEM_EXPR (y);
8181 if (expr)
8182 mark_addressable (expr);
8183 slot1 = y;
8184 }
8185 else
8186 {
8187 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8188 emit_move_insn (slot1, y);
8189 }
8190
8191 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8192 emit_library_call (libfunc, LCT_NORMAL,
8193 DImode,
8194 XEXP (slot0, 0), Pmode,
8195 XEXP (slot1, 0), Pmode);
8196 mode = DImode;
8197 }
8198 else
8199 {
8200 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8201 emit_library_call (libfunc, LCT_NORMAL,
8202 SImode,
8203 x, TFmode, y, TFmode);
8204 mode = SImode;
8205 }
8206
8207
8208 /* Immediately move the result of the libcall into a pseudo
8209 register so reload doesn't clobber the value if it needs
8210 the return register for a spill reg. */
8211 result = gen_reg_rtx (mode);
8212 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8213
8214 switch (comparison)
8215 {
8216 default:
8217 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8218 case ORDERED:
8219 case UNORDERED:
8220 new_comparison = (comparison == UNORDERED ? EQ : NE);
8221 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8222 case UNGT:
8223 case UNGE:
8224 new_comparison = (comparison == UNGT ? GT : NE);
8225 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8226 case UNLE:
8227 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8228 case UNLT:
8229 tem = gen_reg_rtx (mode);
8230 if (TARGET_ARCH32)
8231 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8232 else
8233 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8234 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8235 case UNEQ:
8236 case LTGT:
8237 tem = gen_reg_rtx (mode);
8238 if (TARGET_ARCH32)
8239 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8240 else
8241 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8242 tem2 = gen_reg_rtx (mode);
8243 if (TARGET_ARCH32)
8244 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8245 else
8246 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8247 new_comparison = (comparison == UNEQ ? EQ : NE);
8248 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8249 }
8250
8251 gcc_unreachable ();
8252 }
8253
8254 /* Generate an unsigned DImode to FP conversion. This is the same code
8255 optabs would emit if we didn't have TFmode patterns. */
8256
8257 void
8258 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8259 {
8260 rtx i0, i1, f0, in, out;
8261
8262 out = operands[0];
8263 in = force_reg (DImode, operands[1]);
8264 rtx_code_label *neglab = gen_label_rtx ();
8265 rtx_code_label *donelab = gen_label_rtx ();
8266 i0 = gen_reg_rtx (DImode);
8267 i1 = gen_reg_rtx (DImode);
8268 f0 = gen_reg_rtx (mode);
8269
8270 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8271
8272 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8273 emit_jump_insn (gen_jump (donelab));
8274 emit_barrier ();
8275
8276 emit_label (neglab);
8277
8278 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8279 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8280 emit_insn (gen_iordi3 (i0, i0, i1));
8281 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8282 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8283
8284 emit_label (donelab);
8285 }
8286
8287 /* Generate an FP to unsigned DImode conversion. This is the same code
8288 optabs would emit if we didn't have TFmode patterns. */
8289
8290 void
8291 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8292 {
8293 rtx i0, i1, f0, in, out, limit;
8294
8295 out = operands[0];
8296 in = force_reg (mode, operands[1]);
8297 rtx_code_label *neglab = gen_label_rtx ();
8298 rtx_code_label *donelab = gen_label_rtx ();
8299 i0 = gen_reg_rtx (DImode);
8300 i1 = gen_reg_rtx (DImode);
8301 limit = gen_reg_rtx (mode);
8302 f0 = gen_reg_rtx (mode);
8303
8304 emit_move_insn (limit,
8305 const_double_from_real_value (
8306 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8307 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8308
8309 emit_insn (gen_rtx_SET (out,
8310 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8311 emit_jump_insn (gen_jump (donelab));
8312 emit_barrier ();
8313
8314 emit_label (neglab);
8315
8316 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8317 emit_insn (gen_rtx_SET (i0,
8318 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8319 emit_insn (gen_movdi (i1, const1_rtx));
8320 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8321 emit_insn (gen_xordi3 (out, i0, i1));
8322
8323 emit_label (donelab);
8324 }
8325
8326 /* Return the string to output a compare and branch instruction to DEST.
8327 DEST is the destination insn (i.e. the label), INSN is the source,
8328 and OP is the conditional expression. */
8329
8330 const char *
8331 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8332 {
8333 machine_mode mode = GET_MODE (XEXP (op, 0));
8334 enum rtx_code code = GET_CODE (op);
8335 const char *cond_str, *tmpl;
8336 int far, emit_nop, len;
8337 static char string[64];
8338 char size_char;
8339
8340 /* Compare and Branch is limited to +-2KB. If it is too far away,
8341 change
8342
8343 cxbne X, Y, .LC30
8344
8345 to
8346
8347 cxbe X, Y, .+16
8348 nop
8349 ba,pt xcc, .LC30
8350 nop */
8351
8352 len = get_attr_length (insn);
8353
8354 far = len == 4;
8355 emit_nop = len == 2;
8356
8357 if (far)
8358 code = reverse_condition (code);
8359
8360 size_char = ((mode == SImode) ? 'w' : 'x');
8361
8362 switch (code)
8363 {
8364 case NE:
8365 cond_str = "ne";
8366 break;
8367
8368 case EQ:
8369 cond_str = "e";
8370 break;
8371
8372 case GE:
8373 cond_str = "ge";
8374 break;
8375
8376 case GT:
8377 cond_str = "g";
8378 break;
8379
8380 case LE:
8381 cond_str = "le";
8382 break;
8383
8384 case LT:
8385 cond_str = "l";
8386 break;
8387
8388 case GEU:
8389 cond_str = "cc";
8390 break;
8391
8392 case GTU:
8393 cond_str = "gu";
8394 break;
8395
8396 case LEU:
8397 cond_str = "leu";
8398 break;
8399
8400 case LTU:
8401 cond_str = "cs";
8402 break;
8403
8404 default:
8405 gcc_unreachable ();
8406 }
8407
8408 if (far)
8409 {
8410 int veryfar = 1, delta;
8411
8412 if (INSN_ADDRESSES_SET_P ())
8413 {
8414 delta = (INSN_ADDRESSES (INSN_UID (dest))
8415 - INSN_ADDRESSES (INSN_UID (insn)));
8416 /* Leave some instructions for "slop". */
8417 if (delta >= -260000 && delta < 260000)
8418 veryfar = 0;
8419 }
8420
8421 if (veryfar)
8422 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8423 else
8424 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8425 }
8426 else
8427 {
8428 if (emit_nop)
8429 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8430 else
8431 tmpl = "c%cb%s\t%%1, %%2, %%3";
8432 }
8433
8434 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8435
8436 return string;
8437 }
8438
8439 /* Return the string to output a conditional branch to LABEL, testing
8440 register REG. LABEL is the operand number of the label; REG is the
8441 operand number of the reg. OP is the conditional expression. The mode
8442 of REG says what kind of comparison we made.
8443
8444 DEST is the destination insn (i.e. the label), INSN is the source.
8445
8446 REVERSED is nonzero if we should reverse the sense of the comparison.
8447
8448 ANNUL is nonzero if we should generate an annulling branch. */
8449
8450 const char *
8451 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8452 int annul, rtx_insn *insn)
8453 {
8454 static char string[64];
8455 enum rtx_code code = GET_CODE (op);
8456 machine_mode mode = GET_MODE (XEXP (op, 0));
8457 rtx note;
8458 int far;
8459 char *p;
8460
8461 /* branch on register are limited to +-128KB. If it is too far away,
8462 change
8463
8464 brnz,pt %g1, .LC30
8465
8466 to
8467
8468 brz,pn %g1, .+12
8469 nop
8470 ba,pt %xcc, .LC30
8471
8472 and
8473
8474 brgez,a,pn %o1, .LC29
8475
8476 to
8477
8478 brlz,pt %o1, .+16
8479 nop
8480 ba,pt %xcc, .LC29 */
8481
8482 far = get_attr_length (insn) >= 3;
8483
8484 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8485 if (reversed ^ far)
8486 code = reverse_condition (code);
8487
8488 /* Only 64-bit versions of these instructions exist. */
8489 gcc_assert (mode == DImode);
8490
8491 /* Start by writing the branch condition. */
8492
8493 switch (code)
8494 {
8495 case NE:
8496 strcpy (string, "brnz");
8497 break;
8498
8499 case EQ:
8500 strcpy (string, "brz");
8501 break;
8502
8503 case GE:
8504 strcpy (string, "brgez");
8505 break;
8506
8507 case LT:
8508 strcpy (string, "brlz");
8509 break;
8510
8511 case LE:
8512 strcpy (string, "brlez");
8513 break;
8514
8515 case GT:
8516 strcpy (string, "brgz");
8517 break;
8518
8519 default:
8520 gcc_unreachable ();
8521 }
8522
8523 p = strchr (string, '\0');
8524
8525 /* Now add the annulling, reg, label, and nop. */
8526 if (annul && ! far)
8527 {
8528 strcpy (p, ",a");
8529 p += 2;
8530 }
8531
8532 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8533 {
8534 strcpy (p,
8535 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8536 >= profile_probability::even ()) ^ far)
8537 ? ",pt" : ",pn");
8538 p += 3;
8539 }
8540
8541 *p = p < string + 8 ? '\t' : ' ';
8542 p++;
8543 *p++ = '%';
8544 *p++ = '0' + reg;
8545 *p++ = ',';
8546 *p++ = ' ';
8547 if (far)
8548 {
8549 int veryfar = 1, delta;
8550
8551 if (INSN_ADDRESSES_SET_P ())
8552 {
8553 delta = (INSN_ADDRESSES (INSN_UID (dest))
8554 - INSN_ADDRESSES (INSN_UID (insn)));
8555 /* Leave some instructions for "slop". */
8556 if (delta >= -260000 && delta < 260000)
8557 veryfar = 0;
8558 }
8559
8560 strcpy (p, ".+12\n\t nop\n\t");
8561 /* Skip the next insn if requested or
8562 if we know that it will be a nop. */
8563 if (annul || ! final_sequence)
8564 p[3] = '6';
8565 p += 12;
8566 if (veryfar)
8567 {
8568 strcpy (p, "b\t");
8569 p += 2;
8570 }
8571 else
8572 {
8573 strcpy (p, "ba,pt\t%%xcc, ");
8574 p += 13;
8575 }
8576 }
8577 *p++ = '%';
8578 *p++ = 'l';
8579 *p++ = '0' + label;
8580 *p++ = '%';
8581 *p++ = '#';
8582 *p = '\0';
8583
8584 return string;
8585 }
8586
8587 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8588 Such instructions cannot be used in the delay slot of return insn on v9.
8589 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8590 */
8591
8592 static int
8593 epilogue_renumber (register rtx *where, int test)
8594 {
8595 register const char *fmt;
8596 register int i;
8597 register enum rtx_code code;
8598
8599 if (*where == 0)
8600 return 0;
8601
8602 code = GET_CODE (*where);
8603
8604 switch (code)
8605 {
8606 case REG:
8607 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8608 return 1;
8609 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8610 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8611 /* fallthrough */
8612 case SCRATCH:
8613 case CC0:
8614 case PC:
8615 case CONST_INT:
8616 case CONST_WIDE_INT:
8617 case CONST_DOUBLE:
8618 return 0;
8619
8620 /* Do not replace the frame pointer with the stack pointer because
8621 it can cause the delayed instruction to load below the stack.
8622 This occurs when instructions like:
8623
8624 (set (reg/i:SI 24 %i0)
8625 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8626 (const_int -20 [0xffffffec])) 0))
8627
8628 are in the return delayed slot. */
8629 case PLUS:
8630 if (GET_CODE (XEXP (*where, 0)) == REG
8631 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8632 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8633 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8634 return 1;
8635 break;
8636
8637 case MEM:
8638 if (SPARC_STACK_BIAS
8639 && GET_CODE (XEXP (*where, 0)) == REG
8640 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8641 return 1;
8642 break;
8643
8644 default:
8645 break;
8646 }
8647
8648 fmt = GET_RTX_FORMAT (code);
8649
8650 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8651 {
8652 if (fmt[i] == 'E')
8653 {
8654 register int j;
8655 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8656 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8657 return 1;
8658 }
8659 else if (fmt[i] == 'e'
8660 && epilogue_renumber (&(XEXP (*where, i)), test))
8661 return 1;
8662 }
8663 return 0;
8664 }
8665 \f
8666 /* Leaf functions and non-leaf functions have different needs. */
8667
8668 static const int
8669 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8670
8671 static const int
8672 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8673
8674 static const int *const reg_alloc_orders[] = {
8675 reg_leaf_alloc_order,
8676 reg_nonleaf_alloc_order};
8677
8678 void
8679 order_regs_for_local_alloc (void)
8680 {
8681 static int last_order_nonleaf = 1;
8682
8683 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8684 {
8685 last_order_nonleaf = !last_order_nonleaf;
8686 memcpy ((char *) reg_alloc_order,
8687 (const char *) reg_alloc_orders[last_order_nonleaf],
8688 FIRST_PSEUDO_REGISTER * sizeof (int));
8689 }
8690 }
8691 \f
8692 /* Return 1 if REG and MEM are legitimate enough to allow the various
8693 MEM<-->REG splits to be run. */
8694
8695 int
8696 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8697 {
8698 /* Punt if we are here by mistake. */
8699 gcc_assert (reload_completed);
8700
8701 /* We must have an offsettable memory reference. */
8702 if (!offsettable_memref_p (mem))
8703 return 0;
8704
8705 /* If we have legitimate args for ldd/std, we do not want
8706 the split to happen. */
8707 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8708 return 0;
8709
8710 /* Success. */
8711 return 1;
8712 }
8713
8714 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8715
8716 void
8717 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8718 {
8719 rtx high_part = gen_highpart (mode, dest);
8720 rtx low_part = gen_lowpart (mode, dest);
8721 rtx word0 = adjust_address (src, mode, 0);
8722 rtx word1 = adjust_address (src, mode, 4);
8723
8724 if (reg_overlap_mentioned_p (high_part, word1))
8725 {
8726 emit_move_insn_1 (low_part, word1);
8727 emit_move_insn_1 (high_part, word0);
8728 }
8729 else
8730 {
8731 emit_move_insn_1 (high_part, word0);
8732 emit_move_insn_1 (low_part, word1);
8733 }
8734 }
8735
8736 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8737
8738 void
8739 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8740 {
8741 rtx word0 = adjust_address (dest, mode, 0);
8742 rtx word1 = adjust_address (dest, mode, 4);
8743 rtx high_part = gen_highpart (mode, src);
8744 rtx low_part = gen_lowpart (mode, src);
8745
8746 emit_move_insn_1 (word0, high_part);
8747 emit_move_insn_1 (word1, low_part);
8748 }
8749
8750 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8751
8752 int
8753 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8754 {
8755 /* Punt if we are here by mistake. */
8756 gcc_assert (reload_completed);
8757
8758 if (GET_CODE (reg1) == SUBREG)
8759 reg1 = SUBREG_REG (reg1);
8760 if (GET_CODE (reg1) != REG)
8761 return 0;
8762 const int regno1 = REGNO (reg1);
8763
8764 if (GET_CODE (reg2) == SUBREG)
8765 reg2 = SUBREG_REG (reg2);
8766 if (GET_CODE (reg2) != REG)
8767 return 0;
8768 const int regno2 = REGNO (reg2);
8769
8770 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8771 return 1;
8772
8773 if (TARGET_VIS3)
8774 {
8775 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8776 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8777 return 1;
8778 }
8779
8780 return 0;
8781 }
8782
8783 /* Split a REG <--> REG move into a pair of moves in MODE. */
8784
8785 void
8786 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8787 {
8788 rtx dest1 = gen_highpart (mode, dest);
8789 rtx dest2 = gen_lowpart (mode, dest);
8790 rtx src1 = gen_highpart (mode, src);
8791 rtx src2 = gen_lowpart (mode, src);
8792
8793 /* Now emit using the real source and destination we found, swapping
8794 the order if we detect overlap. */
8795 if (reg_overlap_mentioned_p (dest1, src2))
8796 {
8797 emit_move_insn_1 (dest2, src2);
8798 emit_move_insn_1 (dest1, src1);
8799 }
8800 else
8801 {
8802 emit_move_insn_1 (dest1, src1);
8803 emit_move_insn_1 (dest2, src2);
8804 }
8805 }
8806
8807 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8808 This makes them candidates for using ldd and std insns.
8809
8810 Note reg1 and reg2 *must* be hard registers. */
8811
8812 int
8813 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8814 {
8815 /* We might have been passed a SUBREG. */
8816 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8817 return 0;
8818
8819 if (REGNO (reg1) % 2 != 0)
8820 return 0;
8821
8822 /* Integer ldd is deprecated in SPARC V9 */
8823 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8824 return 0;
8825
8826 return (REGNO (reg1) == REGNO (reg2) - 1);
8827 }
8828
8829 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8830 an ldd or std insn.
8831
8832 This can only happen when addr1 and addr2, the addresses in mem1
8833 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8834 addr1 must also be aligned on a 64-bit boundary.
8835
8836 Also iff dependent_reg_rtx is not null it should not be used to
8837 compute the address for mem1, i.e. we cannot optimize a sequence
8838 like:
8839 ld [%o0], %o0
8840 ld [%o0 + 4], %o1
8841 to
8842 ldd [%o0], %o0
8843 nor:
8844 ld [%g3 + 4], %g3
8845 ld [%g3], %g2
8846 to
8847 ldd [%g3], %g2
8848
8849 But, note that the transformation from:
8850 ld [%g2 + 4], %g3
8851 ld [%g2], %g2
8852 to
8853 ldd [%g2], %g2
8854 is perfectly fine. Thus, the peephole2 patterns always pass us
8855 the destination register of the first load, never the second one.
8856
8857 For stores we don't have a similar problem, so dependent_reg_rtx is
8858 NULL_RTX. */
8859
8860 int
8861 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8862 {
8863 rtx addr1, addr2;
8864 unsigned int reg1;
8865 HOST_WIDE_INT offset1;
8866
8867 /* The mems cannot be volatile. */
8868 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8869 return 0;
8870
8871 /* MEM1 should be aligned on a 64-bit boundary. */
8872 if (MEM_ALIGN (mem1) < 64)
8873 return 0;
8874
8875 addr1 = XEXP (mem1, 0);
8876 addr2 = XEXP (mem2, 0);
8877
8878 /* Extract a register number and offset (if used) from the first addr. */
8879 if (GET_CODE (addr1) == PLUS)
8880 {
8881 /* If not a REG, return zero. */
8882 if (GET_CODE (XEXP (addr1, 0)) != REG)
8883 return 0;
8884 else
8885 {
8886 reg1 = REGNO (XEXP (addr1, 0));
8887 /* The offset must be constant! */
8888 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8889 return 0;
8890 offset1 = INTVAL (XEXP (addr1, 1));
8891 }
8892 }
8893 else if (GET_CODE (addr1) != REG)
8894 return 0;
8895 else
8896 {
8897 reg1 = REGNO (addr1);
8898 /* This was a simple (mem (reg)) expression. Offset is 0. */
8899 offset1 = 0;
8900 }
8901
8902 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8903 if (GET_CODE (addr2) != PLUS)
8904 return 0;
8905
8906 if (GET_CODE (XEXP (addr2, 0)) != REG
8907 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8908 return 0;
8909
8910 if (reg1 != REGNO (XEXP (addr2, 0)))
8911 return 0;
8912
8913 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8914 return 0;
8915
8916 /* The first offset must be evenly divisible by 8 to ensure the
8917 address is 64-bit aligned. */
8918 if (offset1 % 8 != 0)
8919 return 0;
8920
8921 /* The offset for the second addr must be 4 more than the first addr. */
8922 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8923 return 0;
8924
8925 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8926 instructions. */
8927 return 1;
8928 }
8929
8930 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8931
8932 rtx
8933 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8934 {
8935 rtx x = widen_memory_access (mem1, mode, 0);
8936 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8937 return x;
8938 }
8939
8940 /* Return 1 if reg is a pseudo, or is the first register in
8941 a hard register pair. This makes it suitable for use in
8942 ldd and std insns. */
8943
8944 int
8945 register_ok_for_ldd (rtx reg)
8946 {
8947 /* We might have been passed a SUBREG. */
8948 if (!REG_P (reg))
8949 return 0;
8950
8951 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8952 return (REGNO (reg) % 2 == 0);
8953
8954 return 1;
8955 }
8956
8957 /* Return 1 if OP, a MEM, has an address which is known to be
8958 aligned to an 8-byte boundary. */
8959
8960 int
8961 memory_ok_for_ldd (rtx op)
8962 {
8963 /* In 64-bit mode, we assume that the address is word-aligned. */
8964 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8965 return 0;
8966
8967 if (! can_create_pseudo_p ()
8968 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8969 return 0;
8970
8971 return 1;
8972 }
8973 \f
8974 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8975
8976 static bool
8977 sparc_print_operand_punct_valid_p (unsigned char code)
8978 {
8979 if (code == '#'
8980 || code == '*'
8981 || code == '('
8982 || code == ')'
8983 || code == '_'
8984 || code == '&')
8985 return true;
8986
8987 return false;
8988 }
8989
8990 /* Implement TARGET_PRINT_OPERAND.
8991 Print operand X (an rtx) in assembler syntax to file FILE.
8992 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8993 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8994
8995 static void
8996 sparc_print_operand (FILE *file, rtx x, int code)
8997 {
8998 const char *s;
8999
9000 switch (code)
9001 {
9002 case '#':
9003 /* Output an insn in a delay slot. */
9004 if (final_sequence)
9005 sparc_indent_opcode = 1;
9006 else
9007 fputs ("\n\t nop", file);
9008 return;
9009 case '*':
9010 /* Output an annul flag if there's nothing for the delay slot and we
9011 are optimizing. This is always used with '(' below.
9012 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9013 this is a dbx bug. So, we only do this when optimizing.
9014 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9015 Always emit a nop in case the next instruction is a branch. */
9016 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9017 fputs (",a", file);
9018 return;
9019 case '(':
9020 /* Output a 'nop' if there's nothing for the delay slot and we are
9021 not optimizing. This is always used with '*' above. */
9022 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9023 fputs ("\n\t nop", file);
9024 else if (final_sequence)
9025 sparc_indent_opcode = 1;
9026 return;
9027 case ')':
9028 /* Output the right displacement from the saved PC on function return.
9029 The caller may have placed an "unimp" insn immediately after the call
9030 so we have to account for it. This insn is used in the 32-bit ABI
9031 when calling a function that returns a non zero-sized structure. The
9032 64-bit ABI doesn't have it. Be careful to have this test be the same
9033 as that for the call. The exception is when sparc_std_struct_return
9034 is enabled, the psABI is followed exactly and the adjustment is made
9035 by the code in sparc_struct_value_rtx. The call emitted is the same
9036 when sparc_std_struct_return is enabled. */
9037 if (!TARGET_ARCH64
9038 && cfun->returns_struct
9039 && !sparc_std_struct_return
9040 && DECL_SIZE (DECL_RESULT (current_function_decl))
9041 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9042 == INTEGER_CST
9043 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9044 fputs ("12", file);
9045 else
9046 fputc ('8', file);
9047 return;
9048 case '_':
9049 /* Output the Embedded Medium/Anywhere code model base register. */
9050 fputs (EMBMEDANY_BASE_REG, file);
9051 return;
9052 case '&':
9053 /* Print some local dynamic TLS name. */
9054 if (const char *name = get_some_local_dynamic_name ())
9055 assemble_name (file, name);
9056 else
9057 output_operand_lossage ("'%%&' used without any "
9058 "local dynamic TLS references");
9059 return;
9060
9061 case 'Y':
9062 /* Adjust the operand to take into account a RESTORE operation. */
9063 if (GET_CODE (x) == CONST_INT)
9064 break;
9065 else if (GET_CODE (x) != REG)
9066 output_operand_lossage ("invalid %%Y operand");
9067 else if (REGNO (x) < 8)
9068 fputs (reg_names[REGNO (x)], file);
9069 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9070 fputs (reg_names[REGNO (x)-16], file);
9071 else
9072 output_operand_lossage ("invalid %%Y operand");
9073 return;
9074 case 'L':
9075 /* Print out the low order register name of a register pair. */
9076 if (WORDS_BIG_ENDIAN)
9077 fputs (reg_names[REGNO (x)+1], file);
9078 else
9079 fputs (reg_names[REGNO (x)], file);
9080 return;
9081 case 'H':
9082 /* Print out the high order register name of a register pair. */
9083 if (WORDS_BIG_ENDIAN)
9084 fputs (reg_names[REGNO (x)], file);
9085 else
9086 fputs (reg_names[REGNO (x)+1], file);
9087 return;
9088 case 'R':
9089 /* Print out the second register name of a register pair or quad.
9090 I.e., R (%o0) => %o1. */
9091 fputs (reg_names[REGNO (x)+1], file);
9092 return;
9093 case 'S':
9094 /* Print out the third register name of a register quad.
9095 I.e., S (%o0) => %o2. */
9096 fputs (reg_names[REGNO (x)+2], file);
9097 return;
9098 case 'T':
9099 /* Print out the fourth register name of a register quad.
9100 I.e., T (%o0) => %o3. */
9101 fputs (reg_names[REGNO (x)+3], file);
9102 return;
9103 case 'x':
9104 /* Print a condition code register. */
9105 if (REGNO (x) == SPARC_ICC_REG)
9106 {
9107 switch (GET_MODE (x))
9108 {
9109 case E_CCmode:
9110 case E_CCNZmode:
9111 case E_CCCmode:
9112 case E_CCVmode:
9113 s = "%icc";
9114 break;
9115 case E_CCXmode:
9116 case E_CCXNZmode:
9117 case E_CCXCmode:
9118 case E_CCXVmode:
9119 s = "%xcc";
9120 break;
9121 default:
9122 gcc_unreachable ();
9123 }
9124 fputs (s, file);
9125 }
9126 else
9127 /* %fccN register */
9128 fputs (reg_names[REGNO (x)], file);
9129 return;
9130 case 'm':
9131 /* Print the operand's address only. */
9132 output_address (GET_MODE (x), XEXP (x, 0));
9133 return;
9134 case 'r':
9135 /* In this case we need a register. Use %g0 if the
9136 operand is const0_rtx. */
9137 if (x == const0_rtx
9138 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9139 {
9140 fputs ("%g0", file);
9141 return;
9142 }
9143 else
9144 break;
9145
9146 case 'A':
9147 switch (GET_CODE (x))
9148 {
9149 case IOR:
9150 s = "or";
9151 break;
9152 case AND:
9153 s = "and";
9154 break;
9155 case XOR:
9156 s = "xor";
9157 break;
9158 default:
9159 output_operand_lossage ("invalid %%A operand");
9160 s = "";
9161 break;
9162 }
9163 fputs (s, file);
9164 return;
9165
9166 case 'B':
9167 switch (GET_CODE (x))
9168 {
9169 case IOR:
9170 s = "orn";
9171 break;
9172 case AND:
9173 s = "andn";
9174 break;
9175 case XOR:
9176 s = "xnor";
9177 break;
9178 default:
9179 output_operand_lossage ("invalid %%B operand");
9180 s = "";
9181 break;
9182 }
9183 fputs (s, file);
9184 return;
9185
9186 /* This is used by the conditional move instructions. */
9187 case 'C':
9188 {
9189 machine_mode mode = GET_MODE (XEXP (x, 0));
9190 switch (GET_CODE (x))
9191 {
9192 case NE:
9193 if (mode == CCVmode || mode == CCXVmode)
9194 s = "vs";
9195 else
9196 s = "ne";
9197 break;
9198 case EQ:
9199 if (mode == CCVmode || mode == CCXVmode)
9200 s = "vc";
9201 else
9202 s = "e";
9203 break;
9204 case GE:
9205 if (mode == CCNZmode || mode == CCXNZmode)
9206 s = "pos";
9207 else
9208 s = "ge";
9209 break;
9210 case GT:
9211 s = "g";
9212 break;
9213 case LE:
9214 s = "le";
9215 break;
9216 case LT:
9217 if (mode == CCNZmode || mode == CCXNZmode)
9218 s = "neg";
9219 else
9220 s = "l";
9221 break;
9222 case GEU:
9223 s = "geu";
9224 break;
9225 case GTU:
9226 s = "gu";
9227 break;
9228 case LEU:
9229 s = "leu";
9230 break;
9231 case LTU:
9232 s = "lu";
9233 break;
9234 case LTGT:
9235 s = "lg";
9236 break;
9237 case UNORDERED:
9238 s = "u";
9239 break;
9240 case ORDERED:
9241 s = "o";
9242 break;
9243 case UNLT:
9244 s = "ul";
9245 break;
9246 case UNLE:
9247 s = "ule";
9248 break;
9249 case UNGT:
9250 s = "ug";
9251 break;
9252 case UNGE:
9253 s = "uge"
9254 ; break;
9255 case UNEQ:
9256 s = "ue";
9257 break;
9258 default:
9259 output_operand_lossage ("invalid %%C operand");
9260 s = "";
9261 break;
9262 }
9263 fputs (s, file);
9264 return;
9265 }
9266
9267 /* This are used by the movr instruction pattern. */
9268 case 'D':
9269 {
9270 switch (GET_CODE (x))
9271 {
9272 case NE:
9273 s = "ne";
9274 break;
9275 case EQ:
9276 s = "e";
9277 break;
9278 case GE:
9279 s = "gez";
9280 break;
9281 case LT:
9282 s = "lz";
9283 break;
9284 case LE:
9285 s = "lez";
9286 break;
9287 case GT:
9288 s = "gz";
9289 break;
9290 default:
9291 output_operand_lossage ("invalid %%D operand");
9292 s = "";
9293 break;
9294 }
9295 fputs (s, file);
9296 return;
9297 }
9298
9299 case 'b':
9300 {
9301 /* Print a sign-extended character. */
9302 int i = trunc_int_for_mode (INTVAL (x), QImode);
9303 fprintf (file, "%d", i);
9304 return;
9305 }
9306
9307 case 'f':
9308 /* Operand must be a MEM; write its address. */
9309 if (GET_CODE (x) != MEM)
9310 output_operand_lossage ("invalid %%f operand");
9311 output_address (GET_MODE (x), XEXP (x, 0));
9312 return;
9313
9314 case 's':
9315 {
9316 /* Print a sign-extended 32-bit value. */
9317 HOST_WIDE_INT i;
9318 if (GET_CODE(x) == CONST_INT)
9319 i = INTVAL (x);
9320 else
9321 {
9322 output_operand_lossage ("invalid %%s operand");
9323 return;
9324 }
9325 i = trunc_int_for_mode (i, SImode);
9326 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9327 return;
9328 }
9329
9330 case 0:
9331 /* Do nothing special. */
9332 break;
9333
9334 default:
9335 /* Undocumented flag. */
9336 output_operand_lossage ("invalid operand output code");
9337 }
9338
9339 if (GET_CODE (x) == REG)
9340 fputs (reg_names[REGNO (x)], file);
9341 else if (GET_CODE (x) == MEM)
9342 {
9343 fputc ('[', file);
9344 /* Poor Sun assembler doesn't understand absolute addressing. */
9345 if (CONSTANT_P (XEXP (x, 0)))
9346 fputs ("%g0+", file);
9347 output_address (GET_MODE (x), XEXP (x, 0));
9348 fputc (']', file);
9349 }
9350 else if (GET_CODE (x) == HIGH)
9351 {
9352 fputs ("%hi(", file);
9353 output_addr_const (file, XEXP (x, 0));
9354 fputc (')', file);
9355 }
9356 else if (GET_CODE (x) == LO_SUM)
9357 {
9358 sparc_print_operand (file, XEXP (x, 0), 0);
9359 if (TARGET_CM_MEDMID)
9360 fputs ("+%l44(", file);
9361 else
9362 fputs ("+%lo(", file);
9363 output_addr_const (file, XEXP (x, 1));
9364 fputc (')', file);
9365 }
9366 else if (GET_CODE (x) == CONST_DOUBLE)
9367 output_operand_lossage ("floating-point constant not a valid immediate operand");
9368 else
9369 output_addr_const (file, x);
9370 }
9371
9372 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9373
9374 static void
9375 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9376 {
9377 register rtx base, index = 0;
9378 int offset = 0;
9379 register rtx addr = x;
9380
9381 if (REG_P (addr))
9382 fputs (reg_names[REGNO (addr)], file);
9383 else if (GET_CODE (addr) == PLUS)
9384 {
9385 if (CONST_INT_P (XEXP (addr, 0)))
9386 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9387 else if (CONST_INT_P (XEXP (addr, 1)))
9388 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9389 else
9390 base = XEXP (addr, 0), index = XEXP (addr, 1);
9391 if (GET_CODE (base) == LO_SUM)
9392 {
9393 gcc_assert (USE_AS_OFFSETABLE_LO10
9394 && TARGET_ARCH64
9395 && ! TARGET_CM_MEDMID);
9396 output_operand (XEXP (base, 0), 0);
9397 fputs ("+%lo(", file);
9398 output_address (VOIDmode, XEXP (base, 1));
9399 fprintf (file, ")+%d", offset);
9400 }
9401 else
9402 {
9403 fputs (reg_names[REGNO (base)], file);
9404 if (index == 0)
9405 fprintf (file, "%+d", offset);
9406 else if (REG_P (index))
9407 fprintf (file, "+%s", reg_names[REGNO (index)]);
9408 else if (GET_CODE (index) == SYMBOL_REF
9409 || GET_CODE (index) == LABEL_REF
9410 || GET_CODE (index) == CONST)
9411 fputc ('+', file), output_addr_const (file, index);
9412 else gcc_unreachable ();
9413 }
9414 }
9415 else if (GET_CODE (addr) == MINUS
9416 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9417 {
9418 output_addr_const (file, XEXP (addr, 0));
9419 fputs ("-(", file);
9420 output_addr_const (file, XEXP (addr, 1));
9421 fputs ("-.)", file);
9422 }
9423 else if (GET_CODE (addr) == LO_SUM)
9424 {
9425 output_operand (XEXP (addr, 0), 0);
9426 if (TARGET_CM_MEDMID)
9427 fputs ("+%l44(", file);
9428 else
9429 fputs ("+%lo(", file);
9430 output_address (VOIDmode, XEXP (addr, 1));
9431 fputc (')', file);
9432 }
9433 else if (flag_pic
9434 && GET_CODE (addr) == CONST
9435 && GET_CODE (XEXP (addr, 0)) == MINUS
9436 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9437 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9438 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9439 {
9440 addr = XEXP (addr, 0);
9441 output_addr_const (file, XEXP (addr, 0));
9442 /* Group the args of the second CONST in parenthesis. */
9443 fputs ("-(", file);
9444 /* Skip past the second CONST--it does nothing for us. */
9445 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9446 /* Close the parenthesis. */
9447 fputc (')', file);
9448 }
9449 else
9450 {
9451 output_addr_const (file, addr);
9452 }
9453 }
9454 \f
9455 /* Target hook for assembling integer objects. The sparc version has
9456 special handling for aligned DI-mode objects. */
9457
9458 static bool
9459 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9460 {
9461 /* ??? We only output .xword's for symbols and only then in environments
9462 where the assembler can handle them. */
9463 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9464 {
9465 if (TARGET_V9)
9466 {
9467 assemble_integer_with_op ("\t.xword\t", x);
9468 return true;
9469 }
9470 else
9471 {
9472 assemble_aligned_integer (4, const0_rtx);
9473 assemble_aligned_integer (4, x);
9474 return true;
9475 }
9476 }
9477 return default_assemble_integer (x, size, aligned_p);
9478 }
9479 \f
9480 /* Return the value of a code used in the .proc pseudo-op that says
9481 what kind of result this function returns. For non-C types, we pick
9482 the closest C type. */
9483
9484 #ifndef SHORT_TYPE_SIZE
9485 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9486 #endif
9487
9488 #ifndef INT_TYPE_SIZE
9489 #define INT_TYPE_SIZE BITS_PER_WORD
9490 #endif
9491
9492 #ifndef LONG_TYPE_SIZE
9493 #define LONG_TYPE_SIZE BITS_PER_WORD
9494 #endif
9495
9496 #ifndef LONG_LONG_TYPE_SIZE
9497 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9498 #endif
9499
9500 #ifndef FLOAT_TYPE_SIZE
9501 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9502 #endif
9503
9504 #ifndef DOUBLE_TYPE_SIZE
9505 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9506 #endif
9507
9508 #ifndef LONG_DOUBLE_TYPE_SIZE
9509 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9510 #endif
9511
9512 unsigned long
9513 sparc_type_code (register tree type)
9514 {
9515 register unsigned long qualifiers = 0;
9516 register unsigned shift;
9517
9518 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9519 setting more, since some assemblers will give an error for this. Also,
9520 we must be careful to avoid shifts of 32 bits or more to avoid getting
9521 unpredictable results. */
9522
9523 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9524 {
9525 switch (TREE_CODE (type))
9526 {
9527 case ERROR_MARK:
9528 return qualifiers;
9529
9530 case ARRAY_TYPE:
9531 qualifiers |= (3 << shift);
9532 break;
9533
9534 case FUNCTION_TYPE:
9535 case METHOD_TYPE:
9536 qualifiers |= (2 << shift);
9537 break;
9538
9539 case POINTER_TYPE:
9540 case REFERENCE_TYPE:
9541 case OFFSET_TYPE:
9542 qualifiers |= (1 << shift);
9543 break;
9544
9545 case RECORD_TYPE:
9546 return (qualifiers | 8);
9547
9548 case UNION_TYPE:
9549 case QUAL_UNION_TYPE:
9550 return (qualifiers | 9);
9551
9552 case ENUMERAL_TYPE:
9553 return (qualifiers | 10);
9554
9555 case VOID_TYPE:
9556 return (qualifiers | 16);
9557
9558 case INTEGER_TYPE:
9559 /* If this is a range type, consider it to be the underlying
9560 type. */
9561 if (TREE_TYPE (type) != 0)
9562 break;
9563
9564 /* Carefully distinguish all the standard types of C,
9565 without messing up if the language is not C. We do this by
9566 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9567 look at both the names and the above fields, but that's redundant.
9568 Any type whose size is between two C types will be considered
9569 to be the wider of the two types. Also, we do not have a
9570 special code to use for "long long", so anything wider than
9571 long is treated the same. Note that we can't distinguish
9572 between "int" and "long" in this code if they are the same
9573 size, but that's fine, since neither can the assembler. */
9574
9575 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9576 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9577
9578 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9579 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9580
9581 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9582 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9583
9584 else
9585 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9586
9587 case REAL_TYPE:
9588 /* If this is a range type, consider it to be the underlying
9589 type. */
9590 if (TREE_TYPE (type) != 0)
9591 break;
9592
9593 /* Carefully distinguish all the standard types of C,
9594 without messing up if the language is not C. */
9595
9596 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9597 return (qualifiers | 6);
9598
9599 else
9600 return (qualifiers | 7);
9601
9602 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9603 /* ??? We need to distinguish between double and float complex types,
9604 but I don't know how yet because I can't reach this code from
9605 existing front-ends. */
9606 return (qualifiers | 7); /* Who knows? */
9607
9608 case VECTOR_TYPE:
9609 case BOOLEAN_TYPE: /* Boolean truth value type. */
9610 case LANG_TYPE:
9611 case NULLPTR_TYPE:
9612 return qualifiers;
9613
9614 default:
9615 gcc_unreachable (); /* Not a type! */
9616 }
9617 }
9618
9619 return qualifiers;
9620 }
9621 \f
9622 /* Nested function support. */
9623
9624 /* Emit RTL insns to initialize the variable parts of a trampoline.
9625 FNADDR is an RTX for the address of the function's pure code.
9626 CXT is an RTX for the static chain value for the function.
9627
9628 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9629 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9630 (to store insns). This is a bit excessive. Perhaps a different
9631 mechanism would be better here.
9632
9633 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9634
9635 static void
9636 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9637 {
9638 /* SPARC 32-bit trampoline:
9639
9640 sethi %hi(fn), %g1
9641 sethi %hi(static), %g2
9642 jmp %g1+%lo(fn)
9643 or %g2, %lo(static), %g2
9644
9645 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9646 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9647 */
9648
9649 emit_move_insn
9650 (adjust_address (m_tramp, SImode, 0),
9651 expand_binop (SImode, ior_optab,
9652 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9653 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9654 NULL_RTX, 1, OPTAB_DIRECT));
9655
9656 emit_move_insn
9657 (adjust_address (m_tramp, SImode, 4),
9658 expand_binop (SImode, ior_optab,
9659 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9660 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9661 NULL_RTX, 1, OPTAB_DIRECT));
9662
9663 emit_move_insn
9664 (adjust_address (m_tramp, SImode, 8),
9665 expand_binop (SImode, ior_optab,
9666 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9667 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9668 NULL_RTX, 1, OPTAB_DIRECT));
9669
9670 emit_move_insn
9671 (adjust_address (m_tramp, SImode, 12),
9672 expand_binop (SImode, ior_optab,
9673 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9674 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9675 NULL_RTX, 1, OPTAB_DIRECT));
9676
9677 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9678 aligned on a 16 byte boundary so one flush clears it all. */
9679 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9680 if (sparc_cpu != PROCESSOR_ULTRASPARC
9681 && sparc_cpu != PROCESSOR_ULTRASPARC3
9682 && sparc_cpu != PROCESSOR_NIAGARA
9683 && sparc_cpu != PROCESSOR_NIAGARA2
9684 && sparc_cpu != PROCESSOR_NIAGARA3
9685 && sparc_cpu != PROCESSOR_NIAGARA4
9686 && sparc_cpu != PROCESSOR_NIAGARA7
9687 && sparc_cpu != PROCESSOR_M8)
9688 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9689
9690 /* Call __enable_execute_stack after writing onto the stack to make sure
9691 the stack address is accessible. */
9692 #ifdef HAVE_ENABLE_EXECUTE_STACK
9693 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9694 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9695 #endif
9696
9697 }
9698
9699 /* The 64-bit version is simpler because it makes more sense to load the
9700 values as "immediate" data out of the trampoline. It's also easier since
9701 we can read the PC without clobbering a register. */
9702
9703 static void
9704 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9705 {
9706 /* SPARC 64-bit trampoline:
9707
9708 rd %pc, %g1
9709 ldx [%g1+24], %g5
9710 jmp %g5
9711 ldx [%g1+16], %g5
9712 +16 bytes data
9713 */
9714
9715 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9716 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9717 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9718 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9719 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9720 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9721 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9722 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9723 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9724 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9725 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9726
9727 if (sparc_cpu != PROCESSOR_ULTRASPARC
9728 && sparc_cpu != PROCESSOR_ULTRASPARC3
9729 && sparc_cpu != PROCESSOR_NIAGARA
9730 && sparc_cpu != PROCESSOR_NIAGARA2
9731 && sparc_cpu != PROCESSOR_NIAGARA3
9732 && sparc_cpu != PROCESSOR_NIAGARA4
9733 && sparc_cpu != PROCESSOR_NIAGARA7
9734 && sparc_cpu != PROCESSOR_M8)
9735 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9736
9737 /* Call __enable_execute_stack after writing onto the stack to make sure
9738 the stack address is accessible. */
9739 #ifdef HAVE_ENABLE_EXECUTE_STACK
9740 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9741 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9742 #endif
9743 }
9744
9745 /* Worker for TARGET_TRAMPOLINE_INIT. */
9746
9747 static void
9748 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9749 {
9750 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9751 cxt = force_reg (Pmode, cxt);
9752 if (TARGET_ARCH64)
9753 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9754 else
9755 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9756 }
9757 \f
9758 /* Adjust the cost of a scheduling dependency. Return the new cost of
9759 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9760
9761 static int
9762 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9763 int cost)
9764 {
9765 enum attr_type insn_type;
9766
9767 if (recog_memoized (insn) < 0)
9768 return cost;
9769
9770 insn_type = get_attr_type (insn);
9771
9772 if (dep_type == 0)
9773 {
9774 /* Data dependency; DEP_INSN writes a register that INSN reads some
9775 cycles later. */
9776
9777 /* if a load, then the dependence must be on the memory address;
9778 add an extra "cycle". Note that the cost could be two cycles
9779 if the reg was written late in an instruction group; we ca not tell
9780 here. */
9781 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9782 return cost + 3;
9783
9784 /* Get the delay only if the address of the store is the dependence. */
9785 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9786 {
9787 rtx pat = PATTERN(insn);
9788 rtx dep_pat = PATTERN (dep_insn);
9789
9790 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9791 return cost; /* This should not happen! */
9792
9793 /* The dependency between the two instructions was on the data that
9794 is being stored. Assume that this implies that the address of the
9795 store is not dependent. */
9796 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9797 return cost;
9798
9799 return cost + 3; /* An approximation. */
9800 }
9801
9802 /* A shift instruction cannot receive its data from an instruction
9803 in the same cycle; add a one cycle penalty. */
9804 if (insn_type == TYPE_SHIFT)
9805 return cost + 3; /* Split before cascade into shift. */
9806 }
9807 else
9808 {
9809 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9810 INSN writes some cycles later. */
9811
9812 /* These are only significant for the fpu unit; writing a fp reg before
9813 the fpu has finished with it stalls the processor. */
9814
9815 /* Reusing an integer register causes no problems. */
9816 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9817 return 0;
9818 }
9819
9820 return cost;
9821 }
9822
9823 static int
9824 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9825 int cost)
9826 {
9827 enum attr_type insn_type, dep_type;
9828 rtx pat = PATTERN(insn);
9829 rtx dep_pat = PATTERN (dep_insn);
9830
9831 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9832 return cost;
9833
9834 insn_type = get_attr_type (insn);
9835 dep_type = get_attr_type (dep_insn);
9836
9837 switch (dtype)
9838 {
9839 case 0:
9840 /* Data dependency; DEP_INSN writes a register that INSN reads some
9841 cycles later. */
9842
9843 switch (insn_type)
9844 {
9845 case TYPE_STORE:
9846 case TYPE_FPSTORE:
9847 /* Get the delay iff the address of the store is the dependence. */
9848 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9849 return cost;
9850
9851 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9852 return cost;
9853 return cost + 3;
9854
9855 case TYPE_LOAD:
9856 case TYPE_SLOAD:
9857 case TYPE_FPLOAD:
9858 /* If a load, then the dependence must be on the memory address. If
9859 the addresses aren't equal, then it might be a false dependency */
9860 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9861 {
9862 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9863 || GET_CODE (SET_DEST (dep_pat)) != MEM
9864 || GET_CODE (SET_SRC (pat)) != MEM
9865 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9866 XEXP (SET_SRC (pat), 0)))
9867 return cost + 2;
9868
9869 return cost + 8;
9870 }
9871 break;
9872
9873 case TYPE_BRANCH:
9874 /* Compare to branch latency is 0. There is no benefit from
9875 separating compare and branch. */
9876 if (dep_type == TYPE_COMPARE)
9877 return 0;
9878 /* Floating point compare to branch latency is less than
9879 compare to conditional move. */
9880 if (dep_type == TYPE_FPCMP)
9881 return cost - 1;
9882 break;
9883 default:
9884 break;
9885 }
9886 break;
9887
9888 case REG_DEP_ANTI:
9889 /* Anti-dependencies only penalize the fpu unit. */
9890 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9891 return 0;
9892 break;
9893
9894 default:
9895 break;
9896 }
9897
9898 return cost;
9899 }
9900
9901 static int
9902 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9903 unsigned int)
9904 {
9905 switch (sparc_cpu)
9906 {
9907 case PROCESSOR_SUPERSPARC:
9908 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9909 break;
9910 case PROCESSOR_HYPERSPARC:
9911 case PROCESSOR_SPARCLITE86X:
9912 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9913 break;
9914 default:
9915 break;
9916 }
9917 return cost;
9918 }
9919
9920 static void
9921 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9922 int sched_verbose ATTRIBUTE_UNUSED,
9923 int max_ready ATTRIBUTE_UNUSED)
9924 {}
9925
9926 static int
9927 sparc_use_sched_lookahead (void)
9928 {
9929 if (sparc_cpu == PROCESSOR_NIAGARA
9930 || sparc_cpu == PROCESSOR_NIAGARA2
9931 || sparc_cpu == PROCESSOR_NIAGARA3)
9932 return 0;
9933 if (sparc_cpu == PROCESSOR_NIAGARA4
9934 || sparc_cpu == PROCESSOR_NIAGARA7
9935 || sparc_cpu == PROCESSOR_M8)
9936 return 2;
9937 if (sparc_cpu == PROCESSOR_ULTRASPARC
9938 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9939 return 4;
9940 if ((1 << sparc_cpu) &
9941 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9942 (1 << PROCESSOR_SPARCLITE86X)))
9943 return 3;
9944 return 0;
9945 }
9946
9947 static int
9948 sparc_issue_rate (void)
9949 {
9950 switch (sparc_cpu)
9951 {
9952 case PROCESSOR_NIAGARA:
9953 case PROCESSOR_NIAGARA2:
9954 case PROCESSOR_NIAGARA3:
9955 default:
9956 return 1;
9957 case PROCESSOR_NIAGARA4:
9958 case PROCESSOR_NIAGARA7:
9959 case PROCESSOR_V9:
9960 /* Assume V9 processors are capable of at least dual-issue. */
9961 return 2;
9962 case PROCESSOR_SUPERSPARC:
9963 return 3;
9964 case PROCESSOR_HYPERSPARC:
9965 case PROCESSOR_SPARCLITE86X:
9966 return 2;
9967 case PROCESSOR_ULTRASPARC:
9968 case PROCESSOR_ULTRASPARC3:
9969 case PROCESSOR_M8:
9970 return 4;
9971 }
9972 }
9973
9974 static int
9975 set_extends (rtx_insn *insn)
9976 {
9977 register rtx pat = PATTERN (insn);
9978
9979 switch (GET_CODE (SET_SRC (pat)))
9980 {
9981 /* Load and some shift instructions zero extend. */
9982 case MEM:
9983 case ZERO_EXTEND:
9984 /* sethi clears the high bits */
9985 case HIGH:
9986 /* LO_SUM is used with sethi. sethi cleared the high
9987 bits and the values used with lo_sum are positive */
9988 case LO_SUM:
9989 /* Store flag stores 0 or 1 */
9990 case LT: case LTU:
9991 case GT: case GTU:
9992 case LE: case LEU:
9993 case GE: case GEU:
9994 case EQ:
9995 case NE:
9996 return 1;
9997 case AND:
9998 {
9999 rtx op0 = XEXP (SET_SRC (pat), 0);
10000 rtx op1 = XEXP (SET_SRC (pat), 1);
10001 if (GET_CODE (op1) == CONST_INT)
10002 return INTVAL (op1) >= 0;
10003 if (GET_CODE (op0) != REG)
10004 return 0;
10005 if (sparc_check_64 (op0, insn) == 1)
10006 return 1;
10007 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10008 }
10009 case IOR:
10010 case XOR:
10011 {
10012 rtx op0 = XEXP (SET_SRC (pat), 0);
10013 rtx op1 = XEXP (SET_SRC (pat), 1);
10014 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10015 return 0;
10016 if (GET_CODE (op1) == CONST_INT)
10017 return INTVAL (op1) >= 0;
10018 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10019 }
10020 case LSHIFTRT:
10021 return GET_MODE (SET_SRC (pat)) == SImode;
10022 /* Positive integers leave the high bits zero. */
10023 case CONST_INT:
10024 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10025 case ASHIFTRT:
10026 case SIGN_EXTEND:
10027 return - (GET_MODE (SET_SRC (pat)) == SImode);
10028 case REG:
10029 return sparc_check_64 (SET_SRC (pat), insn);
10030 default:
10031 return 0;
10032 }
10033 }
10034
10035 /* We _ought_ to have only one kind per function, but... */
10036 static GTY(()) rtx sparc_addr_diff_list;
10037 static GTY(()) rtx sparc_addr_list;
10038
10039 void
10040 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10041 {
10042 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10043 if (diff)
10044 sparc_addr_diff_list
10045 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10046 else
10047 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10048 }
10049
10050 static void
10051 sparc_output_addr_vec (rtx vec)
10052 {
10053 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10054 int idx, vlen = XVECLEN (body, 0);
10055
10056 #ifdef ASM_OUTPUT_ADDR_VEC_START
10057 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10058 #endif
10059
10060 #ifdef ASM_OUTPUT_CASE_LABEL
10061 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10062 NEXT_INSN (lab));
10063 #else
10064 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10065 #endif
10066
10067 for (idx = 0; idx < vlen; idx++)
10068 {
10069 ASM_OUTPUT_ADDR_VEC_ELT
10070 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10071 }
10072
10073 #ifdef ASM_OUTPUT_ADDR_VEC_END
10074 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10075 #endif
10076 }
10077
10078 static void
10079 sparc_output_addr_diff_vec (rtx vec)
10080 {
10081 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10082 rtx base = XEXP (XEXP (body, 0), 0);
10083 int idx, vlen = XVECLEN (body, 1);
10084
10085 #ifdef ASM_OUTPUT_ADDR_VEC_START
10086 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10087 #endif
10088
10089 #ifdef ASM_OUTPUT_CASE_LABEL
10090 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10091 NEXT_INSN (lab));
10092 #else
10093 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10094 #endif
10095
10096 for (idx = 0; idx < vlen; idx++)
10097 {
10098 ASM_OUTPUT_ADDR_DIFF_ELT
10099 (asm_out_file,
10100 body,
10101 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10102 CODE_LABEL_NUMBER (base));
10103 }
10104
10105 #ifdef ASM_OUTPUT_ADDR_VEC_END
10106 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10107 #endif
10108 }
10109
10110 static void
10111 sparc_output_deferred_case_vectors (void)
10112 {
10113 rtx t;
10114 int align;
10115
10116 if (sparc_addr_list == NULL_RTX
10117 && sparc_addr_diff_list == NULL_RTX)
10118 return;
10119
10120 /* Align to cache line in the function's code section. */
10121 switch_to_section (current_function_section ());
10122
10123 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10124 if (align > 0)
10125 ASM_OUTPUT_ALIGN (asm_out_file, align);
10126
10127 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10128 sparc_output_addr_vec (XEXP (t, 0));
10129 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10130 sparc_output_addr_diff_vec (XEXP (t, 0));
10131
10132 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10133 }
10134
10135 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10136 unknown. Return 1 if the high bits are zero, -1 if the register is
10137 sign extended. */
10138 int
10139 sparc_check_64 (rtx x, rtx_insn *insn)
10140 {
10141 /* If a register is set only once it is safe to ignore insns this
10142 code does not know how to handle. The loop will either recognize
10143 the single set and return the correct value or fail to recognize
10144 it and return 0. */
10145 int set_once = 0;
10146 rtx y = x;
10147
10148 gcc_assert (GET_CODE (x) == REG);
10149
10150 if (GET_MODE (x) == DImode)
10151 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10152
10153 if (flag_expensive_optimizations
10154 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10155 set_once = 1;
10156
10157 if (insn == 0)
10158 {
10159 if (set_once)
10160 insn = get_last_insn_anywhere ();
10161 else
10162 return 0;
10163 }
10164
10165 while ((insn = PREV_INSN (insn)))
10166 {
10167 switch (GET_CODE (insn))
10168 {
10169 case JUMP_INSN:
10170 case NOTE:
10171 break;
10172 case CODE_LABEL:
10173 case CALL_INSN:
10174 default:
10175 if (! set_once)
10176 return 0;
10177 break;
10178 case INSN:
10179 {
10180 rtx pat = PATTERN (insn);
10181 if (GET_CODE (pat) != SET)
10182 return 0;
10183 if (rtx_equal_p (x, SET_DEST (pat)))
10184 return set_extends (insn);
10185 if (y && rtx_equal_p (y, SET_DEST (pat)))
10186 return set_extends (insn);
10187 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10188 return 0;
10189 }
10190 }
10191 }
10192 return 0;
10193 }
10194
10195 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10196 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10197
10198 const char *
10199 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10200 {
10201 static char asm_code[60];
10202
10203 /* The scratch register is only required when the destination
10204 register is not a 64-bit global or out register. */
10205 if (which_alternative != 2)
10206 operands[3] = operands[0];
10207
10208 /* We can only shift by constants <= 63. */
10209 if (GET_CODE (operands[2]) == CONST_INT)
10210 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10211
10212 if (GET_CODE (operands[1]) == CONST_INT)
10213 {
10214 output_asm_insn ("mov\t%1, %3", operands);
10215 }
10216 else
10217 {
10218 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10219 if (sparc_check_64 (operands[1], insn) <= 0)
10220 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10221 output_asm_insn ("or\t%L1, %3, %3", operands);
10222 }
10223
10224 strcpy (asm_code, opcode);
10225
10226 if (which_alternative != 2)
10227 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10228 else
10229 return
10230 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10231 }
10232 \f
10233 /* Output rtl to increment the profiler label LABELNO
10234 for profiling a function entry. */
10235
10236 void
10237 sparc_profile_hook (int labelno)
10238 {
10239 char buf[32];
10240 rtx lab, fun;
10241
10242 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10243 if (NO_PROFILE_COUNTERS)
10244 {
10245 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10246 }
10247 else
10248 {
10249 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10250 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10251 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10252 }
10253 }
10254 \f
10255 #ifdef TARGET_SOLARIS
10256 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10257
10258 static void
10259 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10260 tree decl ATTRIBUTE_UNUSED)
10261 {
10262 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10263 {
10264 solaris_elf_asm_comdat_section (name, flags, decl);
10265 return;
10266 }
10267
10268 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10269
10270 if (!(flags & SECTION_DEBUG))
10271 fputs (",#alloc", asm_out_file);
10272 if (flags & SECTION_WRITE)
10273 fputs (",#write", asm_out_file);
10274 if (flags & SECTION_TLS)
10275 fputs (",#tls", asm_out_file);
10276 if (flags & SECTION_CODE)
10277 fputs (",#execinstr", asm_out_file);
10278
10279 if (flags & SECTION_NOTYPE)
10280 ;
10281 else if (flags & SECTION_BSS)
10282 fputs (",#nobits", asm_out_file);
10283 else
10284 fputs (",#progbits", asm_out_file);
10285
10286 fputc ('\n', asm_out_file);
10287 }
10288 #endif /* TARGET_SOLARIS */
10289
10290 /* We do not allow indirect calls to be optimized into sibling calls.
10291
10292 We cannot use sibling calls when delayed branches are disabled
10293 because they will likely require the call delay slot to be filled.
10294
10295 Also, on SPARC 32-bit we cannot emit a sibling call when the
10296 current function returns a structure. This is because the "unimp
10297 after call" convention would cause the callee to return to the
10298 wrong place. The generic code already disallows cases where the
10299 function being called returns a structure.
10300
10301 It may seem strange how this last case could occur. Usually there
10302 is code after the call which jumps to epilogue code which dumps the
10303 return value into the struct return area. That ought to invalidate
10304 the sibling call right? Well, in the C++ case we can end up passing
10305 the pointer to the struct return area to a constructor (which returns
10306 void) and then nothing else happens. Such a sibling call would look
10307 valid without the added check here.
10308
10309 VxWorks PIC PLT entries require the global pointer to be initialized
10310 on entry. We therefore can't emit sibling calls to them. */
10311 static bool
10312 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10313 {
10314 return (decl
10315 && flag_delayed_branch
10316 && (TARGET_ARCH64 || ! cfun->returns_struct)
10317 && !(TARGET_VXWORKS_RTP
10318 && flag_pic
10319 && !targetm.binds_local_p (decl)));
10320 }
10321 \f
10322 /* libfunc renaming. */
10323
10324 static void
10325 sparc_init_libfuncs (void)
10326 {
10327 if (TARGET_ARCH32)
10328 {
10329 /* Use the subroutines that Sun's library provides for integer
10330 multiply and divide. The `*' prevents an underscore from
10331 being prepended by the compiler. .umul is a little faster
10332 than .mul. */
10333 set_optab_libfunc (smul_optab, SImode, "*.umul");
10334 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10335 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10336 set_optab_libfunc (smod_optab, SImode, "*.rem");
10337 set_optab_libfunc (umod_optab, SImode, "*.urem");
10338
10339 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10340 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10341 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10342 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10343 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10344 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10345
10346 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10347 is because with soft-float, the SFmode and DFmode sqrt
10348 instructions will be absent, and the compiler will notice and
10349 try to use the TFmode sqrt instruction for calls to the
10350 builtin function sqrt, but this fails. */
10351 if (TARGET_FPU)
10352 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10353
10354 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10355 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10356 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10357 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10358 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10359 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10360
10361 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10362 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10363 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10364 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10365
10366 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10367 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10368 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10369 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10370
10371 if (DITF_CONVERSION_LIBFUNCS)
10372 {
10373 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10374 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10375 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10376 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10377 }
10378
10379 if (SUN_CONVERSION_LIBFUNCS)
10380 {
10381 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10382 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10383 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10384 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10385 }
10386 }
10387 if (TARGET_ARCH64)
10388 {
10389 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10390 do not exist in the library. Make sure the compiler does not
10391 emit calls to them by accident. (It should always use the
10392 hardware instructions.) */
10393 set_optab_libfunc (smul_optab, SImode, 0);
10394 set_optab_libfunc (sdiv_optab, SImode, 0);
10395 set_optab_libfunc (udiv_optab, SImode, 0);
10396 set_optab_libfunc (smod_optab, SImode, 0);
10397 set_optab_libfunc (umod_optab, SImode, 0);
10398
10399 if (SUN_INTEGER_MULTIPLY_64)
10400 {
10401 set_optab_libfunc (smul_optab, DImode, "__mul64");
10402 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10403 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10404 set_optab_libfunc (smod_optab, DImode, "__rem64");
10405 set_optab_libfunc (umod_optab, DImode, "__urem64");
10406 }
10407
10408 if (SUN_CONVERSION_LIBFUNCS)
10409 {
10410 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10411 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10412 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10413 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10414 }
10415 }
10416 }
10417 \f
10418 /* SPARC builtins. */
10419 enum sparc_builtins
10420 {
10421 /* FPU builtins. */
10422 SPARC_BUILTIN_LDFSR,
10423 SPARC_BUILTIN_STFSR,
10424
10425 /* VIS 1.0 builtins. */
10426 SPARC_BUILTIN_FPACK16,
10427 SPARC_BUILTIN_FPACK32,
10428 SPARC_BUILTIN_FPACKFIX,
10429 SPARC_BUILTIN_FEXPAND,
10430 SPARC_BUILTIN_FPMERGE,
10431 SPARC_BUILTIN_FMUL8X16,
10432 SPARC_BUILTIN_FMUL8X16AU,
10433 SPARC_BUILTIN_FMUL8X16AL,
10434 SPARC_BUILTIN_FMUL8SUX16,
10435 SPARC_BUILTIN_FMUL8ULX16,
10436 SPARC_BUILTIN_FMULD8SUX16,
10437 SPARC_BUILTIN_FMULD8ULX16,
10438 SPARC_BUILTIN_FALIGNDATAV4HI,
10439 SPARC_BUILTIN_FALIGNDATAV8QI,
10440 SPARC_BUILTIN_FALIGNDATAV2SI,
10441 SPARC_BUILTIN_FALIGNDATADI,
10442 SPARC_BUILTIN_WRGSR,
10443 SPARC_BUILTIN_RDGSR,
10444 SPARC_BUILTIN_ALIGNADDR,
10445 SPARC_BUILTIN_ALIGNADDRL,
10446 SPARC_BUILTIN_PDIST,
10447 SPARC_BUILTIN_EDGE8,
10448 SPARC_BUILTIN_EDGE8L,
10449 SPARC_BUILTIN_EDGE16,
10450 SPARC_BUILTIN_EDGE16L,
10451 SPARC_BUILTIN_EDGE32,
10452 SPARC_BUILTIN_EDGE32L,
10453 SPARC_BUILTIN_FCMPLE16,
10454 SPARC_BUILTIN_FCMPLE32,
10455 SPARC_BUILTIN_FCMPNE16,
10456 SPARC_BUILTIN_FCMPNE32,
10457 SPARC_BUILTIN_FCMPGT16,
10458 SPARC_BUILTIN_FCMPGT32,
10459 SPARC_BUILTIN_FCMPEQ16,
10460 SPARC_BUILTIN_FCMPEQ32,
10461 SPARC_BUILTIN_FPADD16,
10462 SPARC_BUILTIN_FPADD16S,
10463 SPARC_BUILTIN_FPADD32,
10464 SPARC_BUILTIN_FPADD32S,
10465 SPARC_BUILTIN_FPSUB16,
10466 SPARC_BUILTIN_FPSUB16S,
10467 SPARC_BUILTIN_FPSUB32,
10468 SPARC_BUILTIN_FPSUB32S,
10469 SPARC_BUILTIN_ARRAY8,
10470 SPARC_BUILTIN_ARRAY16,
10471 SPARC_BUILTIN_ARRAY32,
10472
10473 /* VIS 2.0 builtins. */
10474 SPARC_BUILTIN_EDGE8N,
10475 SPARC_BUILTIN_EDGE8LN,
10476 SPARC_BUILTIN_EDGE16N,
10477 SPARC_BUILTIN_EDGE16LN,
10478 SPARC_BUILTIN_EDGE32N,
10479 SPARC_BUILTIN_EDGE32LN,
10480 SPARC_BUILTIN_BMASK,
10481 SPARC_BUILTIN_BSHUFFLEV4HI,
10482 SPARC_BUILTIN_BSHUFFLEV8QI,
10483 SPARC_BUILTIN_BSHUFFLEV2SI,
10484 SPARC_BUILTIN_BSHUFFLEDI,
10485
10486 /* VIS 3.0 builtins. */
10487 SPARC_BUILTIN_CMASK8,
10488 SPARC_BUILTIN_CMASK16,
10489 SPARC_BUILTIN_CMASK32,
10490 SPARC_BUILTIN_FCHKSM16,
10491 SPARC_BUILTIN_FSLL16,
10492 SPARC_BUILTIN_FSLAS16,
10493 SPARC_BUILTIN_FSRL16,
10494 SPARC_BUILTIN_FSRA16,
10495 SPARC_BUILTIN_FSLL32,
10496 SPARC_BUILTIN_FSLAS32,
10497 SPARC_BUILTIN_FSRL32,
10498 SPARC_BUILTIN_FSRA32,
10499 SPARC_BUILTIN_PDISTN,
10500 SPARC_BUILTIN_FMEAN16,
10501 SPARC_BUILTIN_FPADD64,
10502 SPARC_BUILTIN_FPSUB64,
10503 SPARC_BUILTIN_FPADDS16,
10504 SPARC_BUILTIN_FPADDS16S,
10505 SPARC_BUILTIN_FPSUBS16,
10506 SPARC_BUILTIN_FPSUBS16S,
10507 SPARC_BUILTIN_FPADDS32,
10508 SPARC_BUILTIN_FPADDS32S,
10509 SPARC_BUILTIN_FPSUBS32,
10510 SPARC_BUILTIN_FPSUBS32S,
10511 SPARC_BUILTIN_FUCMPLE8,
10512 SPARC_BUILTIN_FUCMPNE8,
10513 SPARC_BUILTIN_FUCMPGT8,
10514 SPARC_BUILTIN_FUCMPEQ8,
10515 SPARC_BUILTIN_FHADDS,
10516 SPARC_BUILTIN_FHADDD,
10517 SPARC_BUILTIN_FHSUBS,
10518 SPARC_BUILTIN_FHSUBD,
10519 SPARC_BUILTIN_FNHADDS,
10520 SPARC_BUILTIN_FNHADDD,
10521 SPARC_BUILTIN_UMULXHI,
10522 SPARC_BUILTIN_XMULX,
10523 SPARC_BUILTIN_XMULXHI,
10524
10525 /* VIS 4.0 builtins. */
10526 SPARC_BUILTIN_FPADD8,
10527 SPARC_BUILTIN_FPADDS8,
10528 SPARC_BUILTIN_FPADDUS8,
10529 SPARC_BUILTIN_FPADDUS16,
10530 SPARC_BUILTIN_FPCMPLE8,
10531 SPARC_BUILTIN_FPCMPGT8,
10532 SPARC_BUILTIN_FPCMPULE16,
10533 SPARC_BUILTIN_FPCMPUGT16,
10534 SPARC_BUILTIN_FPCMPULE32,
10535 SPARC_BUILTIN_FPCMPUGT32,
10536 SPARC_BUILTIN_FPMAX8,
10537 SPARC_BUILTIN_FPMAX16,
10538 SPARC_BUILTIN_FPMAX32,
10539 SPARC_BUILTIN_FPMAXU8,
10540 SPARC_BUILTIN_FPMAXU16,
10541 SPARC_BUILTIN_FPMAXU32,
10542 SPARC_BUILTIN_FPMIN8,
10543 SPARC_BUILTIN_FPMIN16,
10544 SPARC_BUILTIN_FPMIN32,
10545 SPARC_BUILTIN_FPMINU8,
10546 SPARC_BUILTIN_FPMINU16,
10547 SPARC_BUILTIN_FPMINU32,
10548 SPARC_BUILTIN_FPSUB8,
10549 SPARC_BUILTIN_FPSUBS8,
10550 SPARC_BUILTIN_FPSUBUS8,
10551 SPARC_BUILTIN_FPSUBUS16,
10552
10553 /* VIS 4.0B builtins. */
10554
10555 /* Note that all the DICTUNPACK* entries should be kept
10556 contiguous. */
10557 SPARC_BUILTIN_FIRST_DICTUNPACK,
10558 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10559 SPARC_BUILTIN_DICTUNPACK16,
10560 SPARC_BUILTIN_DICTUNPACK32,
10561 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10562
10563 /* Note that all the FPCMP*SHL entries should be kept
10564 contiguous. */
10565 SPARC_BUILTIN_FIRST_FPCMPSHL,
10566 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10567 SPARC_BUILTIN_FPCMPGT8SHL,
10568 SPARC_BUILTIN_FPCMPEQ8SHL,
10569 SPARC_BUILTIN_FPCMPNE8SHL,
10570 SPARC_BUILTIN_FPCMPLE16SHL,
10571 SPARC_BUILTIN_FPCMPGT16SHL,
10572 SPARC_BUILTIN_FPCMPEQ16SHL,
10573 SPARC_BUILTIN_FPCMPNE16SHL,
10574 SPARC_BUILTIN_FPCMPLE32SHL,
10575 SPARC_BUILTIN_FPCMPGT32SHL,
10576 SPARC_BUILTIN_FPCMPEQ32SHL,
10577 SPARC_BUILTIN_FPCMPNE32SHL,
10578 SPARC_BUILTIN_FPCMPULE8SHL,
10579 SPARC_BUILTIN_FPCMPUGT8SHL,
10580 SPARC_BUILTIN_FPCMPULE16SHL,
10581 SPARC_BUILTIN_FPCMPUGT16SHL,
10582 SPARC_BUILTIN_FPCMPULE32SHL,
10583 SPARC_BUILTIN_FPCMPUGT32SHL,
10584 SPARC_BUILTIN_FPCMPDE8SHL,
10585 SPARC_BUILTIN_FPCMPDE16SHL,
10586 SPARC_BUILTIN_FPCMPDE32SHL,
10587 SPARC_BUILTIN_FPCMPUR8SHL,
10588 SPARC_BUILTIN_FPCMPUR16SHL,
10589 SPARC_BUILTIN_FPCMPUR32SHL,
10590 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10591
10592 SPARC_BUILTIN_MAX
10593 };
10594
10595 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10596 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10597
10598 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10599 The instruction should require a constant operand of some sort. The
10600 function prints an error if OPVAL is not valid. */
10601
10602 static int
10603 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10604 {
10605 if (GET_CODE (opval) != CONST_INT)
10606 {
10607 error ("%qs expects a constant argument", insn_data[icode].name);
10608 return false;
10609 }
10610
10611 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10612 {
10613 error ("constant argument out of range for %qs", insn_data[icode].name);
10614 return false;
10615 }
10616 return true;
10617 }
10618
10619 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10620 function decl or NULL_TREE if the builtin was not added. */
10621
10622 static tree
10623 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10624 tree type)
10625 {
10626 tree t
10627 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10628
10629 if (t)
10630 {
10631 sparc_builtins[code] = t;
10632 sparc_builtins_icode[code] = icode;
10633 }
10634
10635 return t;
10636 }
10637
10638 /* Likewise, but also marks the function as "const". */
10639
10640 static tree
10641 def_builtin_const (const char *name, enum insn_code icode,
10642 enum sparc_builtins code, tree type)
10643 {
10644 tree t = def_builtin (name, icode, code, type);
10645
10646 if (t)
10647 TREE_READONLY (t) = 1;
10648
10649 return t;
10650 }
10651
10652 /* Implement the TARGET_INIT_BUILTINS target hook.
10653 Create builtin functions for special SPARC instructions. */
10654
10655 static void
10656 sparc_init_builtins (void)
10657 {
10658 if (TARGET_FPU)
10659 sparc_fpu_init_builtins ();
10660
10661 if (TARGET_VIS)
10662 sparc_vis_init_builtins ();
10663 }
10664
10665 /* Create builtin functions for FPU instructions. */
10666
10667 static void
10668 sparc_fpu_init_builtins (void)
10669 {
10670 tree ftype
10671 = build_function_type_list (void_type_node,
10672 build_pointer_type (unsigned_type_node), 0);
10673 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10674 SPARC_BUILTIN_LDFSR, ftype);
10675 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10676 SPARC_BUILTIN_STFSR, ftype);
10677 }
10678
10679 /* Create builtin functions for VIS instructions. */
10680
10681 static void
10682 sparc_vis_init_builtins (void)
10683 {
10684 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10685 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10686 tree v4hi = build_vector_type (intHI_type_node, 4);
10687 tree v2hi = build_vector_type (intHI_type_node, 2);
10688 tree v2si = build_vector_type (intSI_type_node, 2);
10689 tree v1si = build_vector_type (intSI_type_node, 1);
10690
10691 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10692 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10693 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10694 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10695 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10696 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10697 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10698 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10699 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10700 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10701 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10702 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10703 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10704 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10705 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10706 v8qi, v8qi,
10707 intDI_type_node, 0);
10708 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10709 v8qi, v8qi, 0);
10710 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10711 v8qi, v8qi, 0);
10712 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10713 intSI_type_node, 0);
10714 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10715 intSI_type_node, 0);
10716 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10717 intDI_type_node, 0);
10718 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10719 intDI_type_node,
10720 intDI_type_node, 0);
10721 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10722 intSI_type_node,
10723 intSI_type_node, 0);
10724 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10725 ptr_type_node,
10726 intSI_type_node, 0);
10727 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10728 ptr_type_node,
10729 intDI_type_node, 0);
10730 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10731 ptr_type_node,
10732 ptr_type_node, 0);
10733 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10734 ptr_type_node,
10735 ptr_type_node, 0);
10736 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10737 v4hi, v4hi, 0);
10738 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10739 v2si, v2si, 0);
10740 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10741 v4hi, v4hi, 0);
10742 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10743 v2si, v2si, 0);
10744 tree void_ftype_di = build_function_type_list (void_type_node,
10745 intDI_type_node, 0);
10746 tree di_ftype_void = build_function_type_list (intDI_type_node,
10747 void_type_node, 0);
10748 tree void_ftype_si = build_function_type_list (void_type_node,
10749 intSI_type_node, 0);
10750 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10751 float_type_node,
10752 float_type_node, 0);
10753 tree df_ftype_df_df = build_function_type_list (double_type_node,
10754 double_type_node,
10755 double_type_node, 0);
10756
10757 /* Packing and expanding vectors. */
10758 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10759 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10760 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10761 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10762 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10763 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10764 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10765 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10766 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10767 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10768
10769 /* Multiplications. */
10770 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10771 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10772 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10773 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10774 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10775 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10776 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10777 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10778 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10779 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10780 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10781 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10782 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10783 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10784
10785 /* Data aligning. */
10786 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10787 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10788 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10789 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10790 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10791 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10792 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10793 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10794
10795 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10796 SPARC_BUILTIN_WRGSR, void_ftype_di);
10797 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10798 SPARC_BUILTIN_RDGSR, di_ftype_void);
10799
10800 if (TARGET_ARCH64)
10801 {
10802 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10803 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10804 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10805 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10806 }
10807 else
10808 {
10809 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10810 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10811 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10812 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10813 }
10814
10815 /* Pixel distance. */
10816 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10817 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10818
10819 /* Edge handling. */
10820 if (TARGET_ARCH64)
10821 {
10822 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10823 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10824 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10825 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10826 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10827 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10828 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10829 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10830 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10831 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10832 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10833 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10834 }
10835 else
10836 {
10837 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10838 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10839 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10840 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10841 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10842 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10843 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10844 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10845 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10846 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10847 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10848 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10849 }
10850
10851 /* Pixel compare. */
10852 if (TARGET_ARCH64)
10853 {
10854 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10855 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10856 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10857 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10858 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10859 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10860 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10861 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10862 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10863 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10864 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10865 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10866 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10867 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10868 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10869 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10870 }
10871 else
10872 {
10873 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10874 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10875 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10876 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10877 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10878 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10879 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10880 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10881 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10882 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10883 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10884 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10885 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10886 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10887 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10888 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10889 }
10890
10891 /* Addition and subtraction. */
10892 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10893 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10894 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10895 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10896 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10897 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10898 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10899 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10900 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10901 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10902 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10903 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10904 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10905 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10906 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10907 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10908
10909 /* Three-dimensional array addressing. */
10910 if (TARGET_ARCH64)
10911 {
10912 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10913 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10914 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10915 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10916 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10917 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10918 }
10919 else
10920 {
10921 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10922 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10923 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10924 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10925 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10926 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10927 }
10928
10929 if (TARGET_VIS2)
10930 {
10931 /* Edge handling. */
10932 if (TARGET_ARCH64)
10933 {
10934 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10935 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10936 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10937 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10938 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10939 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10940 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10941 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10942 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10943 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10944 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10945 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10946 }
10947 else
10948 {
10949 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10950 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10951 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10952 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10953 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10954 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10955 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10956 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10957 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10958 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10959 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10960 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10961 }
10962
10963 /* Byte mask and shuffle. */
10964 if (TARGET_ARCH64)
10965 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10966 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10967 else
10968 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10969 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10970 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10971 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10972 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10973 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10974 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10975 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10976 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10977 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10978 }
10979
10980 if (TARGET_VIS3)
10981 {
10982 if (TARGET_ARCH64)
10983 {
10984 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10985 SPARC_BUILTIN_CMASK8, void_ftype_di);
10986 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10987 SPARC_BUILTIN_CMASK16, void_ftype_di);
10988 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10989 SPARC_BUILTIN_CMASK32, void_ftype_di);
10990 }
10991 else
10992 {
10993 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10994 SPARC_BUILTIN_CMASK8, void_ftype_si);
10995 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10996 SPARC_BUILTIN_CMASK16, void_ftype_si);
10997 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10998 SPARC_BUILTIN_CMASK32, void_ftype_si);
10999 }
11000
11001 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11002 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11003
11004 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11005 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11006 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11007 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11008 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11009 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11010 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11011 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11012 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11013 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11014 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11015 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11016 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11017 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11018 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11019 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11020
11021 if (TARGET_ARCH64)
11022 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11023 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11024 else
11025 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11026 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11027
11028 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11029 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11030 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11031 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11032 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11033 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11034
11035 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11036 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11037 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11038 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11039 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11040 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11041 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11042 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11043 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11044 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11045 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11046 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11047 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11048 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11049 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11050 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11051
11052 if (TARGET_ARCH64)
11053 {
11054 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11055 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11056 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11057 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11058 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11059 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11060 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11061 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11062 }
11063 else
11064 {
11065 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11066 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11067 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11068 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11069 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11070 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11071 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11072 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11073 }
11074
11075 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11076 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11077 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11078 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11079 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11080 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11081 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11082 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11083 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11084 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11085 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11086 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11087
11088 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11089 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11090 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11091 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11092 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11093 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11094 }
11095
11096 if (TARGET_VIS4)
11097 {
11098 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11099 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11100 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11101 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11102 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11103 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11104 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11105 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11106
11107
11108 if (TARGET_ARCH64)
11109 {
11110 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11111 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11112 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11113 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11114 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11115 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11116 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11117 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11118 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11119 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11120 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11121 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11122 }
11123 else
11124 {
11125 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11126 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11127 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11128 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11129 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11130 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11131 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11132 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11133 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11134 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11135 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11136 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11137 }
11138
11139 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11140 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11141 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11142 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11143 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11144 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11145 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11146 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11147 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11148 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11149 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11150 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11151 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11152 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11153 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11154 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11155 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11156 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11157 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11158 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11159 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11160 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11161 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11162 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11163 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11164 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11165 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11166 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11167 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11168 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11169 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11170 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11171 }
11172
11173 if (TARGET_VIS4B)
11174 {
11175 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11176 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11177 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11178 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11179 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11180 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11181
11182 if (TARGET_ARCH64)
11183 {
11184 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11185 v8qi, v8qi,
11186 intSI_type_node, 0);
11187 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11188 v4hi, v4hi,
11189 intSI_type_node, 0);
11190 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11191 v2si, v2si,
11192 intSI_type_node, 0);
11193
11194 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11195 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11196 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11197 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11198 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11199 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11200 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11201 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11202
11203 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11204 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11205 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11206 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11207 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11208 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11209 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11210 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11211
11212 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11213 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11214 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11215 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11216 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11217 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11218 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11219 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11220
11221
11222 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11223 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11224 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11225 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11226
11227 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11228 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11229 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11230 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11231
11232 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11233 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11234 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11235 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11236
11237 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11238 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11239 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11240 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11241 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11242 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11243
11244 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11245 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11246 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11247 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11248 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11249 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11250
11251 }
11252 else
11253 {
11254 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11255 v8qi, v8qi,
11256 intSI_type_node, 0);
11257 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11258 v4hi, v4hi,
11259 intSI_type_node, 0);
11260 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11261 v2si, v2si,
11262 intSI_type_node, 0);
11263
11264 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11265 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11266 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11267 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11268 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11269 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11270 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11271 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11272
11273 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11274 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11275 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11276 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11277 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11278 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11279 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11280 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11281
11282 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11283 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11284 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11285 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11286 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11287 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11288 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11289 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11290
11291
11292 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11293 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11294 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11295 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11296
11297 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11298 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11299 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11300 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11301
11302 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11303 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11304 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11305 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11306
11307 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11308 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11309 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11310 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11311 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11312 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11313
11314 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11315 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11316 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11317 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11318 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11319 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11320 }
11321 }
11322 }
11323
11324 /* Implement TARGET_BUILTIN_DECL hook. */
11325
11326 static tree
11327 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11328 {
11329 if (code >= SPARC_BUILTIN_MAX)
11330 return error_mark_node;
11331
11332 return sparc_builtins[code];
11333 }
11334
11335 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11336
11337 static rtx
11338 sparc_expand_builtin (tree exp, rtx target,
11339 rtx subtarget ATTRIBUTE_UNUSED,
11340 machine_mode tmode ATTRIBUTE_UNUSED,
11341 int ignore ATTRIBUTE_UNUSED)
11342 {
11343 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11344 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11345 enum insn_code icode = sparc_builtins_icode[code];
11346 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11347 call_expr_arg_iterator iter;
11348 int arg_count = 0;
11349 rtx pat, op[4];
11350 tree arg;
11351
11352 if (nonvoid)
11353 {
11354 machine_mode tmode = insn_data[icode].operand[0].mode;
11355 if (!target
11356 || GET_MODE (target) != tmode
11357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11358 op[0] = gen_reg_rtx (tmode);
11359 else
11360 op[0] = target;
11361 }
11362
11363 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11364 {
11365 const struct insn_operand_data *insn_op;
11366 int idx;
11367
11368 if (arg == error_mark_node)
11369 return NULL_RTX;
11370
11371 arg_count++;
11372 idx = arg_count - !nonvoid;
11373 insn_op = &insn_data[icode].operand[idx];
11374 op[arg_count] = expand_normal (arg);
11375
11376 /* Some of the builtins require constant arguments. We check
11377 for this here. */
11378 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11379 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11380 && arg_count == 3)
11381 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11382 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11383 && arg_count == 2))
11384 {
11385 if (!check_constant_argument (icode, idx, op[arg_count]))
11386 return const0_rtx;
11387 }
11388
11389 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11390 {
11391 if (!address_operand (op[arg_count], SImode))
11392 {
11393 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11394 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11395 }
11396 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11397 }
11398
11399 else if (insn_op->mode == V1DImode
11400 && GET_MODE (op[arg_count]) == DImode)
11401 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11402
11403 else if (insn_op->mode == V1SImode
11404 && GET_MODE (op[arg_count]) == SImode)
11405 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11406
11407 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11408 insn_op->mode))
11409 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11410 }
11411
11412 switch (arg_count)
11413 {
11414 case 0:
11415 pat = GEN_FCN (icode) (op[0]);
11416 break;
11417 case 1:
11418 if (nonvoid)
11419 pat = GEN_FCN (icode) (op[0], op[1]);
11420 else
11421 pat = GEN_FCN (icode) (op[1]);
11422 break;
11423 case 2:
11424 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11425 break;
11426 case 3:
11427 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11428 break;
11429 default:
11430 gcc_unreachable ();
11431 }
11432
11433 if (!pat)
11434 return NULL_RTX;
11435
11436 emit_insn (pat);
11437
11438 return (nonvoid ? op[0] : const0_rtx);
11439 }
11440
11441 /* Return the upper 16 bits of the 8x16 multiplication. */
11442
11443 static int
11444 sparc_vis_mul8x16 (int e8, int e16)
11445 {
11446 return (e8 * e16 + 128) / 256;
11447 }
11448
11449 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11450 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11451
11452 static void
11453 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11454 tree inner_type, tree cst0, tree cst1)
11455 {
11456 unsigned i, num = VECTOR_CST_NELTS (cst0);
11457 int scale;
11458
11459 switch (fncode)
11460 {
11461 case SPARC_BUILTIN_FMUL8X16:
11462 for (i = 0; i < num; ++i)
11463 {
11464 int val
11465 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11466 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11467 n_elts->quick_push (build_int_cst (inner_type, val));
11468 }
11469 break;
11470
11471 case SPARC_BUILTIN_FMUL8X16AU:
11472 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11473
11474 for (i = 0; i < num; ++i)
11475 {
11476 int val
11477 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11478 scale);
11479 n_elts->quick_push (build_int_cst (inner_type, val));
11480 }
11481 break;
11482
11483 case SPARC_BUILTIN_FMUL8X16AL:
11484 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11485
11486 for (i = 0; i < num; ++i)
11487 {
11488 int val
11489 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11490 scale);
11491 n_elts->quick_push (build_int_cst (inner_type, val));
11492 }
11493 break;
11494
11495 default:
11496 gcc_unreachable ();
11497 }
11498 }
11499
11500 /* Implement TARGET_FOLD_BUILTIN hook.
11501
11502 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11503 result of the function call is ignored. NULL_TREE is returned if the
11504 function could not be folded. */
11505
11506 static tree
11507 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11508 tree *args, bool ignore)
11509 {
11510 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11511 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11512 tree arg0, arg1, arg2;
11513
11514 if (ignore)
11515 switch (code)
11516 {
11517 case SPARC_BUILTIN_LDFSR:
11518 case SPARC_BUILTIN_STFSR:
11519 case SPARC_BUILTIN_ALIGNADDR:
11520 case SPARC_BUILTIN_WRGSR:
11521 case SPARC_BUILTIN_BMASK:
11522 case SPARC_BUILTIN_CMASK8:
11523 case SPARC_BUILTIN_CMASK16:
11524 case SPARC_BUILTIN_CMASK32:
11525 break;
11526
11527 default:
11528 return build_zero_cst (rtype);
11529 }
11530
11531 switch (code)
11532 {
11533 case SPARC_BUILTIN_FEXPAND:
11534 arg0 = args[0];
11535 STRIP_NOPS (arg0);
11536
11537 if (TREE_CODE (arg0) == VECTOR_CST)
11538 {
11539 tree inner_type = TREE_TYPE (rtype);
11540 unsigned i;
11541
11542 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11543 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11544 {
11545 unsigned HOST_WIDE_INT val
11546 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11547 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11548 }
11549 return build_vector (rtype, n_elts);
11550 }
11551 break;
11552
11553 case SPARC_BUILTIN_FMUL8X16:
11554 case SPARC_BUILTIN_FMUL8X16AU:
11555 case SPARC_BUILTIN_FMUL8X16AL:
11556 arg0 = args[0];
11557 arg1 = args[1];
11558 STRIP_NOPS (arg0);
11559 STRIP_NOPS (arg1);
11560
11561 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11562 {
11563 tree inner_type = TREE_TYPE (rtype);
11564 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11565 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11566 return build_vector (rtype, n_elts);
11567 }
11568 break;
11569
11570 case SPARC_BUILTIN_FPMERGE:
11571 arg0 = args[0];
11572 arg1 = args[1];
11573 STRIP_NOPS (arg0);
11574 STRIP_NOPS (arg1);
11575
11576 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11577 {
11578 auto_vec<tree, 32> n_elts (2 * VECTOR_CST_NELTS (arg0));
11579 unsigned i;
11580 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11581 {
11582 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11583 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11584 }
11585
11586 return build_vector (rtype, n_elts);
11587 }
11588 break;
11589
11590 case SPARC_BUILTIN_PDIST:
11591 case SPARC_BUILTIN_PDISTN:
11592 arg0 = args[0];
11593 arg1 = args[1];
11594 STRIP_NOPS (arg0);
11595 STRIP_NOPS (arg1);
11596 if (code == SPARC_BUILTIN_PDIST)
11597 {
11598 arg2 = args[2];
11599 STRIP_NOPS (arg2);
11600 }
11601 else
11602 arg2 = integer_zero_node;
11603
11604 if (TREE_CODE (arg0) == VECTOR_CST
11605 && TREE_CODE (arg1) == VECTOR_CST
11606 && TREE_CODE (arg2) == INTEGER_CST)
11607 {
11608 bool overflow = false;
11609 widest_int result = wi::to_widest (arg2);
11610 widest_int tmp;
11611 unsigned i;
11612
11613 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11614 {
11615 tree e0 = VECTOR_CST_ELT (arg0, i);
11616 tree e1 = VECTOR_CST_ELT (arg1, i);
11617
11618 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11619
11620 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11621 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11622 if (wi::neg_p (tmp))
11623 tmp = wi::neg (tmp, &neg2_ovf);
11624 else
11625 neg2_ovf = false;
11626 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11627 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11628 }
11629
11630 gcc_assert (!overflow);
11631
11632 return wide_int_to_tree (rtype, result);
11633 }
11634
11635 default:
11636 break;
11637 }
11638
11639 return NULL_TREE;
11640 }
11641 \f
11642 /* ??? This duplicates information provided to the compiler by the
11643 ??? scheduler description. Some day, teach genautomata to output
11644 ??? the latencies and then CSE will just use that. */
11645
11646 static bool
11647 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11648 int opno ATTRIBUTE_UNUSED,
11649 int *total, bool speed ATTRIBUTE_UNUSED)
11650 {
11651 int code = GET_CODE (x);
11652 bool float_mode_p = FLOAT_MODE_P (mode);
11653
11654 switch (code)
11655 {
11656 case CONST_INT:
11657 if (SMALL_INT (x))
11658 *total = 0;
11659 else
11660 *total = 2;
11661 return true;
11662
11663 case CONST_WIDE_INT:
11664 *total = 0;
11665 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11666 *total += 2;
11667 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11668 *total += 2;
11669 return true;
11670
11671 case HIGH:
11672 *total = 2;
11673 return true;
11674
11675 case CONST:
11676 case LABEL_REF:
11677 case SYMBOL_REF:
11678 *total = 4;
11679 return true;
11680
11681 case CONST_DOUBLE:
11682 *total = 8;
11683 return true;
11684
11685 case MEM:
11686 /* If outer-code was a sign or zero extension, a cost
11687 of COSTS_N_INSNS (1) was already added in. This is
11688 why we are subtracting it back out. */
11689 if (outer_code == ZERO_EXTEND)
11690 {
11691 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11692 }
11693 else if (outer_code == SIGN_EXTEND)
11694 {
11695 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11696 }
11697 else if (float_mode_p)
11698 {
11699 *total = sparc_costs->float_load;
11700 }
11701 else
11702 {
11703 *total = sparc_costs->int_load;
11704 }
11705
11706 return true;
11707
11708 case PLUS:
11709 case MINUS:
11710 if (float_mode_p)
11711 *total = sparc_costs->float_plusminus;
11712 else
11713 *total = COSTS_N_INSNS (1);
11714 return false;
11715
11716 case FMA:
11717 {
11718 rtx sub;
11719
11720 gcc_assert (float_mode_p);
11721 *total = sparc_costs->float_mul;
11722
11723 sub = XEXP (x, 0);
11724 if (GET_CODE (sub) == NEG)
11725 sub = XEXP (sub, 0);
11726 *total += rtx_cost (sub, mode, FMA, 0, speed);
11727
11728 sub = XEXP (x, 2);
11729 if (GET_CODE (sub) == NEG)
11730 sub = XEXP (sub, 0);
11731 *total += rtx_cost (sub, mode, FMA, 2, speed);
11732 return true;
11733 }
11734
11735 case MULT:
11736 if (float_mode_p)
11737 *total = sparc_costs->float_mul;
11738 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11739 *total = COSTS_N_INSNS (25);
11740 else
11741 {
11742 int bit_cost;
11743
11744 bit_cost = 0;
11745 if (sparc_costs->int_mul_bit_factor)
11746 {
11747 int nbits;
11748
11749 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11750 {
11751 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11752 for (nbits = 0; value != 0; value &= value - 1)
11753 nbits++;
11754 }
11755 else
11756 nbits = 7;
11757
11758 if (nbits < 3)
11759 nbits = 3;
11760 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11761 bit_cost = COSTS_N_INSNS (bit_cost);
11762 }
11763
11764 if (mode == DImode || !TARGET_HARD_MUL)
11765 *total = sparc_costs->int_mulX + bit_cost;
11766 else
11767 *total = sparc_costs->int_mul + bit_cost;
11768 }
11769 return false;
11770
11771 case ASHIFT:
11772 case ASHIFTRT:
11773 case LSHIFTRT:
11774 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11775 return false;
11776
11777 case DIV:
11778 case UDIV:
11779 case MOD:
11780 case UMOD:
11781 if (float_mode_p)
11782 {
11783 if (mode == DFmode)
11784 *total = sparc_costs->float_div_df;
11785 else
11786 *total = sparc_costs->float_div_sf;
11787 }
11788 else
11789 {
11790 if (mode == DImode)
11791 *total = sparc_costs->int_divX;
11792 else
11793 *total = sparc_costs->int_div;
11794 }
11795 return false;
11796
11797 case NEG:
11798 if (! float_mode_p)
11799 {
11800 *total = COSTS_N_INSNS (1);
11801 return false;
11802 }
11803 /* FALLTHRU */
11804
11805 case ABS:
11806 case FLOAT:
11807 case UNSIGNED_FLOAT:
11808 case FIX:
11809 case UNSIGNED_FIX:
11810 case FLOAT_EXTEND:
11811 case FLOAT_TRUNCATE:
11812 *total = sparc_costs->float_move;
11813 return false;
11814
11815 case SQRT:
11816 if (mode == DFmode)
11817 *total = sparc_costs->float_sqrt_df;
11818 else
11819 *total = sparc_costs->float_sqrt_sf;
11820 return false;
11821
11822 case COMPARE:
11823 if (float_mode_p)
11824 *total = sparc_costs->float_cmp;
11825 else
11826 *total = COSTS_N_INSNS (1);
11827 return false;
11828
11829 case IF_THEN_ELSE:
11830 if (float_mode_p)
11831 *total = sparc_costs->float_cmove;
11832 else
11833 *total = sparc_costs->int_cmove;
11834 return false;
11835
11836 case IOR:
11837 /* Handle the NAND vector patterns. */
11838 if (sparc_vector_mode_supported_p (mode)
11839 && GET_CODE (XEXP (x, 0)) == NOT
11840 && GET_CODE (XEXP (x, 1)) == NOT)
11841 {
11842 *total = COSTS_N_INSNS (1);
11843 return true;
11844 }
11845 else
11846 return false;
11847
11848 default:
11849 return false;
11850 }
11851 }
11852
11853 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11854
11855 static inline bool
11856 general_or_i64_p (reg_class_t rclass)
11857 {
11858 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11859 }
11860
11861 /* Implement TARGET_REGISTER_MOVE_COST. */
11862
11863 static int
11864 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11865 reg_class_t from, reg_class_t to)
11866 {
11867 bool need_memory = false;
11868
11869 /* This helps postreload CSE to eliminate redundant comparisons. */
11870 if (from == NO_REGS || to == NO_REGS)
11871 return 100;
11872
11873 if (from == FPCC_REGS || to == FPCC_REGS)
11874 need_memory = true;
11875 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11876 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11877 {
11878 if (TARGET_VIS3)
11879 {
11880 int size = GET_MODE_SIZE (mode);
11881 if (size == 8 || size == 4)
11882 {
11883 if (! TARGET_ARCH32 || size == 4)
11884 return 4;
11885 else
11886 return 6;
11887 }
11888 }
11889 need_memory = true;
11890 }
11891
11892 if (need_memory)
11893 {
11894 if (sparc_cpu == PROCESSOR_ULTRASPARC
11895 || sparc_cpu == PROCESSOR_ULTRASPARC3
11896 || sparc_cpu == PROCESSOR_NIAGARA
11897 || sparc_cpu == PROCESSOR_NIAGARA2
11898 || sparc_cpu == PROCESSOR_NIAGARA3
11899 || sparc_cpu == PROCESSOR_NIAGARA4
11900 || sparc_cpu == PROCESSOR_NIAGARA7
11901 || sparc_cpu == PROCESSOR_M8)
11902 return 12;
11903
11904 return 6;
11905 }
11906
11907 return 2;
11908 }
11909
11910 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11911 This is achieved by means of a manual dynamic stack space allocation in
11912 the current frame. We make the assumption that SEQ doesn't contain any
11913 function calls, with the possible exception of calls to the GOT helper. */
11914
11915 static void
11916 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11917 {
11918 /* We must preserve the lowest 16 words for the register save area. */
11919 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11920 /* We really need only 2 words of fresh stack space. */
11921 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11922
11923 rtx slot
11924 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11925 SPARC_STACK_BIAS + offset));
11926
11927 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11928 emit_insn (gen_rtx_SET (slot, reg));
11929 if (reg2)
11930 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11931 reg2));
11932 emit_insn (seq);
11933 if (reg2)
11934 emit_insn (gen_rtx_SET (reg2,
11935 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11936 emit_insn (gen_rtx_SET (reg, slot));
11937 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11938 }
11939
11940 /* Output the assembler code for a thunk function. THUNK_DECL is the
11941 declaration for the thunk function itself, FUNCTION is the decl for
11942 the target function. DELTA is an immediate constant offset to be
11943 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11944 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11945
11946 static void
11947 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11948 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11949 tree function)
11950 {
11951 rtx this_rtx, funexp;
11952 rtx_insn *insn;
11953 unsigned int int_arg_first;
11954
11955 reload_completed = 1;
11956 epilogue_completed = 1;
11957
11958 emit_note (NOTE_INSN_PROLOGUE_END);
11959
11960 if (TARGET_FLAT)
11961 {
11962 sparc_leaf_function_p = 1;
11963
11964 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11965 }
11966 else if (flag_delayed_branch)
11967 {
11968 /* We will emit a regular sibcall below, so we need to instruct
11969 output_sibcall that we are in a leaf function. */
11970 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11971
11972 /* This will cause final.c to invoke leaf_renumber_regs so we
11973 must behave as if we were in a not-yet-leafified function. */
11974 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11975 }
11976 else
11977 {
11978 /* We will emit the sibcall manually below, so we will need to
11979 manually spill non-leaf registers. */
11980 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11981
11982 /* We really are in a leaf function. */
11983 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11984 }
11985
11986 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11987 returns a structure, the structure return pointer is there instead. */
11988 if (TARGET_ARCH64
11989 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11990 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11991 else
11992 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11993
11994 /* Add DELTA. When possible use a plain add, otherwise load it into
11995 a register first. */
11996 if (delta)
11997 {
11998 rtx delta_rtx = GEN_INT (delta);
11999
12000 if (! SPARC_SIMM13_P (delta))
12001 {
12002 rtx scratch = gen_rtx_REG (Pmode, 1);
12003 emit_move_insn (scratch, delta_rtx);
12004 delta_rtx = scratch;
12005 }
12006
12007 /* THIS_RTX += DELTA. */
12008 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12009 }
12010
12011 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12012 if (vcall_offset)
12013 {
12014 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12015 rtx scratch = gen_rtx_REG (Pmode, 1);
12016
12017 gcc_assert (vcall_offset < 0);
12018
12019 /* SCRATCH = *THIS_RTX. */
12020 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12021
12022 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12023 may not have any available scratch register at this point. */
12024 if (SPARC_SIMM13_P (vcall_offset))
12025 ;
12026 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12027 else if (! fixed_regs[5]
12028 /* The below sequence is made up of at least 2 insns,
12029 while the default method may need only one. */
12030 && vcall_offset < -8192)
12031 {
12032 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12033 emit_move_insn (scratch2, vcall_offset_rtx);
12034 vcall_offset_rtx = scratch2;
12035 }
12036 else
12037 {
12038 rtx increment = GEN_INT (-4096);
12039
12040 /* VCALL_OFFSET is a negative number whose typical range can be
12041 estimated as -32768..0 in 32-bit mode. In almost all cases
12042 it is therefore cheaper to emit multiple add insns than
12043 spilling and loading the constant into a register (at least
12044 6 insns). */
12045 while (! SPARC_SIMM13_P (vcall_offset))
12046 {
12047 emit_insn (gen_add2_insn (scratch, increment));
12048 vcall_offset += 4096;
12049 }
12050 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12051 }
12052
12053 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12054 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12055 gen_rtx_PLUS (Pmode,
12056 scratch,
12057 vcall_offset_rtx)));
12058
12059 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12060 emit_insn (gen_add2_insn (this_rtx, scratch));
12061 }
12062
12063 /* Generate a tail call to the target function. */
12064 if (! TREE_USED (function))
12065 {
12066 assemble_external (function);
12067 TREE_USED (function) = 1;
12068 }
12069 funexp = XEXP (DECL_RTL (function), 0);
12070
12071 if (flag_delayed_branch)
12072 {
12073 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12074 insn = emit_call_insn (gen_sibcall (funexp));
12075 SIBLING_CALL_P (insn) = 1;
12076 }
12077 else
12078 {
12079 /* The hoops we have to jump through in order to generate a sibcall
12080 without using delay slots... */
12081 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12082
12083 if (flag_pic)
12084 {
12085 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12086 start_sequence ();
12087 load_got_register (); /* clobbers %o7 */
12088 scratch = sparc_legitimize_pic_address (funexp, scratch);
12089 seq = get_insns ();
12090 end_sequence ();
12091 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12092 }
12093 else if (TARGET_ARCH32)
12094 {
12095 emit_insn (gen_rtx_SET (scratch,
12096 gen_rtx_HIGH (SImode, funexp)));
12097 emit_insn (gen_rtx_SET (scratch,
12098 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12099 }
12100 else /* TARGET_ARCH64 */
12101 {
12102 switch (sparc_cmodel)
12103 {
12104 case CM_MEDLOW:
12105 case CM_MEDMID:
12106 /* The destination can serve as a temporary. */
12107 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12108 break;
12109
12110 case CM_MEDANY:
12111 case CM_EMBMEDANY:
12112 /* The destination cannot serve as a temporary. */
12113 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12114 start_sequence ();
12115 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12116 seq = get_insns ();
12117 end_sequence ();
12118 emit_and_preserve (seq, spill_reg, 0);
12119 break;
12120
12121 default:
12122 gcc_unreachable ();
12123 }
12124 }
12125
12126 emit_jump_insn (gen_indirect_jump (scratch));
12127 }
12128
12129 emit_barrier ();
12130
12131 /* Run just enough of rest_of_compilation to get the insns emitted.
12132 There's not really enough bulk here to make other passes such as
12133 instruction scheduling worth while. Note that use_thunk calls
12134 assemble_start_function and assemble_end_function. */
12135 insn = get_insns ();
12136 shorten_branches (insn);
12137 final_start_function (insn, file, 1);
12138 final (insn, file, 1);
12139 final_end_function ();
12140
12141 reload_completed = 0;
12142 epilogue_completed = 0;
12143 }
12144
12145 /* Return true if sparc_output_mi_thunk would be able to output the
12146 assembler code for the thunk function specified by the arguments
12147 it is passed, and false otherwise. */
12148 static bool
12149 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12150 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12151 HOST_WIDE_INT vcall_offset,
12152 const_tree function ATTRIBUTE_UNUSED)
12153 {
12154 /* Bound the loop used in the default method above. */
12155 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12156 }
12157
12158 /* How to allocate a 'struct machine_function'. */
12159
12160 static struct machine_function *
12161 sparc_init_machine_status (void)
12162 {
12163 return ggc_cleared_alloc<machine_function> ();
12164 }
12165
12166 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12167 We need to emit DTP-relative relocations. */
12168
12169 static void
12170 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12171 {
12172 switch (size)
12173 {
12174 case 4:
12175 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12176 break;
12177 case 8:
12178 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12179 break;
12180 default:
12181 gcc_unreachable ();
12182 }
12183 output_addr_const (file, x);
12184 fputs (")", file);
12185 }
12186
12187 /* Do whatever processing is required at the end of a file. */
12188
12189 static void
12190 sparc_file_end (void)
12191 {
12192 /* If we need to emit the special GOT helper function, do so now. */
12193 if (got_helper_rtx)
12194 {
12195 const char *name = XSTR (got_helper_rtx, 0);
12196 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12197 #ifdef DWARF2_UNWIND_INFO
12198 bool do_cfi;
12199 #endif
12200
12201 if (USE_HIDDEN_LINKONCE)
12202 {
12203 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12204 get_identifier (name),
12205 build_function_type_list (void_type_node,
12206 NULL_TREE));
12207 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12208 NULL_TREE, void_type_node);
12209 TREE_PUBLIC (decl) = 1;
12210 TREE_STATIC (decl) = 1;
12211 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12212 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12213 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12214 resolve_unique_section (decl, 0, flag_function_sections);
12215 allocate_struct_function (decl, true);
12216 cfun->is_thunk = 1;
12217 current_function_decl = decl;
12218 init_varasm_status ();
12219 assemble_start_function (decl, name);
12220 }
12221 else
12222 {
12223 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12224 switch_to_section (text_section);
12225 if (align > 0)
12226 ASM_OUTPUT_ALIGN (asm_out_file, align);
12227 ASM_OUTPUT_LABEL (asm_out_file, name);
12228 }
12229
12230 #ifdef DWARF2_UNWIND_INFO
12231 do_cfi = dwarf2out_do_cfi_asm ();
12232 if (do_cfi)
12233 fprintf (asm_out_file, "\t.cfi_startproc\n");
12234 #endif
12235 if (flag_delayed_branch)
12236 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12237 reg_name, reg_name);
12238 else
12239 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12240 reg_name, reg_name);
12241 #ifdef DWARF2_UNWIND_INFO
12242 if (do_cfi)
12243 fprintf (asm_out_file, "\t.cfi_endproc\n");
12244 #endif
12245 }
12246
12247 if (NEED_INDICATE_EXEC_STACK)
12248 file_end_indicate_exec_stack ();
12249
12250 #ifdef TARGET_SOLARIS
12251 solaris_file_end ();
12252 #endif
12253 }
12254
12255 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12256 /* Implement TARGET_MANGLE_TYPE. */
12257
12258 static const char *
12259 sparc_mangle_type (const_tree type)
12260 {
12261 if (TARGET_ARCH32
12262 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12263 && TARGET_LONG_DOUBLE_128)
12264 return "g";
12265
12266 /* For all other types, use normal C++ mangling. */
12267 return NULL;
12268 }
12269 #endif
12270
12271 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12272 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12273 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12274
12275 void
12276 sparc_emit_membar_for_model (enum memmodel model,
12277 int load_store, int before_after)
12278 {
12279 /* Bits for the MEMBAR mmask field. */
12280 const int LoadLoad = 1;
12281 const int StoreLoad = 2;
12282 const int LoadStore = 4;
12283 const int StoreStore = 8;
12284
12285 int mm = 0, implied = 0;
12286
12287 switch (sparc_memory_model)
12288 {
12289 case SMM_SC:
12290 /* Sequential Consistency. All memory transactions are immediately
12291 visible in sequential execution order. No barriers needed. */
12292 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12293 break;
12294
12295 case SMM_TSO:
12296 /* Total Store Ordering: all memory transactions with store semantics
12297 are followed by an implied StoreStore. */
12298 implied |= StoreStore;
12299
12300 /* If we're not looking for a raw barrer (before+after), then atomic
12301 operations get the benefit of being both load and store. */
12302 if (load_store == 3 && before_after == 1)
12303 implied |= StoreLoad;
12304 /* FALLTHRU */
12305
12306 case SMM_PSO:
12307 /* Partial Store Ordering: all memory transactions with load semantics
12308 are followed by an implied LoadLoad | LoadStore. */
12309 implied |= LoadLoad | LoadStore;
12310
12311 /* If we're not looking for a raw barrer (before+after), then atomic
12312 operations get the benefit of being both load and store. */
12313 if (load_store == 3 && before_after == 2)
12314 implied |= StoreLoad | StoreStore;
12315 /* FALLTHRU */
12316
12317 case SMM_RMO:
12318 /* Relaxed Memory Ordering: no implicit bits. */
12319 break;
12320
12321 default:
12322 gcc_unreachable ();
12323 }
12324
12325 if (before_after & 1)
12326 {
12327 if (is_mm_release (model) || is_mm_acq_rel (model)
12328 || is_mm_seq_cst (model))
12329 {
12330 if (load_store & 1)
12331 mm |= LoadLoad | StoreLoad;
12332 if (load_store & 2)
12333 mm |= LoadStore | StoreStore;
12334 }
12335 }
12336 if (before_after & 2)
12337 {
12338 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12339 || is_mm_seq_cst (model))
12340 {
12341 if (load_store & 1)
12342 mm |= LoadLoad | LoadStore;
12343 if (load_store & 2)
12344 mm |= StoreLoad | StoreStore;
12345 }
12346 }
12347
12348 /* Remove the bits implied by the system memory model. */
12349 mm &= ~implied;
12350
12351 /* For raw barriers (before+after), always emit a barrier.
12352 This will become a compile-time barrier if needed. */
12353 if (mm || before_after == 3)
12354 emit_insn (gen_membar (GEN_INT (mm)));
12355 }
12356
12357 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12358 compare and swap on the word containing the byte or half-word. */
12359
12360 static void
12361 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12362 rtx oldval, rtx newval)
12363 {
12364 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12365 rtx addr = gen_reg_rtx (Pmode);
12366 rtx off = gen_reg_rtx (SImode);
12367 rtx oldv = gen_reg_rtx (SImode);
12368 rtx newv = gen_reg_rtx (SImode);
12369 rtx oldvalue = gen_reg_rtx (SImode);
12370 rtx newvalue = gen_reg_rtx (SImode);
12371 rtx res = gen_reg_rtx (SImode);
12372 rtx resv = gen_reg_rtx (SImode);
12373 rtx memsi, val, mask, cc;
12374
12375 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12376
12377 if (Pmode != SImode)
12378 addr1 = gen_lowpart (SImode, addr1);
12379 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12380
12381 memsi = gen_rtx_MEM (SImode, addr);
12382 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12383 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12384
12385 val = copy_to_reg (memsi);
12386
12387 emit_insn (gen_rtx_SET (off,
12388 gen_rtx_XOR (SImode, off,
12389 GEN_INT (GET_MODE (mem) == QImode
12390 ? 3 : 2))));
12391
12392 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12393
12394 if (GET_MODE (mem) == QImode)
12395 mask = force_reg (SImode, GEN_INT (0xff));
12396 else
12397 mask = force_reg (SImode, GEN_INT (0xffff));
12398
12399 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12400
12401 emit_insn (gen_rtx_SET (val,
12402 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12403 val)));
12404
12405 oldval = gen_lowpart (SImode, oldval);
12406 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12407
12408 newval = gen_lowpart_common (SImode, newval);
12409 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12410
12411 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12412
12413 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12414
12415 rtx_code_label *end_label = gen_label_rtx ();
12416 rtx_code_label *loop_label = gen_label_rtx ();
12417 emit_label (loop_label);
12418
12419 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12420
12421 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12422
12423 emit_move_insn (bool_result, const1_rtx);
12424
12425 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12426
12427 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12428
12429 emit_insn (gen_rtx_SET (resv,
12430 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12431 res)));
12432
12433 emit_move_insn (bool_result, const0_rtx);
12434
12435 cc = gen_compare_reg_1 (NE, resv, val);
12436 emit_insn (gen_rtx_SET (val, resv));
12437
12438 /* Use cbranchcc4 to separate the compare and branch! */
12439 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12440 cc, const0_rtx, loop_label));
12441
12442 emit_label (end_label);
12443
12444 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12445
12446 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12447
12448 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12449 }
12450
12451 /* Expand code to perform a compare-and-swap. */
12452
12453 void
12454 sparc_expand_compare_and_swap (rtx operands[])
12455 {
12456 rtx bval, retval, mem, oldval, newval;
12457 machine_mode mode;
12458 enum memmodel model;
12459
12460 bval = operands[0];
12461 retval = operands[1];
12462 mem = operands[2];
12463 oldval = operands[3];
12464 newval = operands[4];
12465 model = (enum memmodel) INTVAL (operands[6]);
12466 mode = GET_MODE (mem);
12467
12468 sparc_emit_membar_for_model (model, 3, 1);
12469
12470 if (reg_overlap_mentioned_p (retval, oldval))
12471 oldval = copy_to_reg (oldval);
12472
12473 if (mode == QImode || mode == HImode)
12474 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12475 else
12476 {
12477 rtx (*gen) (rtx, rtx, rtx, rtx);
12478 rtx x;
12479
12480 if (mode == SImode)
12481 gen = gen_atomic_compare_and_swapsi_1;
12482 else
12483 gen = gen_atomic_compare_and_swapdi_1;
12484 emit_insn (gen (retval, mem, oldval, newval));
12485
12486 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12487 if (x != bval)
12488 convert_move (bval, x, 1);
12489 }
12490
12491 sparc_emit_membar_for_model (model, 3, 2);
12492 }
12493
12494 void
12495 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12496 {
12497 rtx t_1, t_2, t_3;
12498
12499 sel = gen_lowpart (DImode, sel);
12500 switch (vmode)
12501 {
12502 case E_V2SImode:
12503 /* inp = xxxxxxxAxxxxxxxB */
12504 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12505 NULL_RTX, 1, OPTAB_DIRECT);
12506 /* t_1 = ....xxxxxxxAxxx. */
12507 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12508 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12509 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12510 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12511 /* sel = .......B */
12512 /* t_1 = ...A.... */
12513 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12514 /* sel = ...A...B */
12515 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12516 /* sel = AAAABBBB * 4 */
12517 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12518 /* sel = { A*4, A*4+1, A*4+2, ... } */
12519 break;
12520
12521 case E_V4HImode:
12522 /* inp = xxxAxxxBxxxCxxxD */
12523 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12524 NULL_RTX, 1, OPTAB_DIRECT);
12525 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12526 NULL_RTX, 1, OPTAB_DIRECT);
12527 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12528 NULL_RTX, 1, OPTAB_DIRECT);
12529 /* t_1 = ..xxxAxxxBxxxCxx */
12530 /* t_2 = ....xxxAxxxBxxxC */
12531 /* t_3 = ......xxxAxxxBxx */
12532 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12533 GEN_INT (0x07),
12534 NULL_RTX, 1, OPTAB_DIRECT);
12535 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12536 GEN_INT (0x0700),
12537 NULL_RTX, 1, OPTAB_DIRECT);
12538 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12539 GEN_INT (0x070000),
12540 NULL_RTX, 1, OPTAB_DIRECT);
12541 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12542 GEN_INT (0x07000000),
12543 NULL_RTX, 1, OPTAB_DIRECT);
12544 /* sel = .......D */
12545 /* t_1 = .....C.. */
12546 /* t_2 = ...B.... */
12547 /* t_3 = .A...... */
12548 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12549 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12550 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12551 /* sel = .A.B.C.D */
12552 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12553 /* sel = AABBCCDD * 2 */
12554 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12555 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12556 break;
12557
12558 case E_V8QImode:
12559 /* input = xAxBxCxDxExFxGxH */
12560 sel = expand_simple_binop (DImode, AND, sel,
12561 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12562 | 0x0f0f0f0f),
12563 NULL_RTX, 1, OPTAB_DIRECT);
12564 /* sel = .A.B.C.D.E.F.G.H */
12565 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12566 NULL_RTX, 1, OPTAB_DIRECT);
12567 /* t_1 = ..A.B.C.D.E.F.G. */
12568 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12569 NULL_RTX, 1, OPTAB_DIRECT);
12570 /* sel = .AABBCCDDEEFFGGH */
12571 sel = expand_simple_binop (DImode, AND, sel,
12572 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12573 | 0xff00ff),
12574 NULL_RTX, 1, OPTAB_DIRECT);
12575 /* sel = ..AB..CD..EF..GH */
12576 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12577 NULL_RTX, 1, OPTAB_DIRECT);
12578 /* t_1 = ....AB..CD..EF.. */
12579 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12580 NULL_RTX, 1, OPTAB_DIRECT);
12581 /* sel = ..ABABCDCDEFEFGH */
12582 sel = expand_simple_binop (DImode, AND, sel,
12583 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12584 NULL_RTX, 1, OPTAB_DIRECT);
12585 /* sel = ....ABCD....EFGH */
12586 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12587 NULL_RTX, 1, OPTAB_DIRECT);
12588 /* t_1 = ........ABCD.... */
12589 sel = gen_lowpart (SImode, sel);
12590 t_1 = gen_lowpart (SImode, t_1);
12591 break;
12592
12593 default:
12594 gcc_unreachable ();
12595 }
12596
12597 /* Always perform the final addition/merge within the bmask insn. */
12598 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12599 }
12600
12601 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12602
12603 static bool
12604 sparc_frame_pointer_required (void)
12605 {
12606 /* If the stack pointer is dynamically modified in the function, it cannot
12607 serve as the frame pointer. */
12608 if (cfun->calls_alloca)
12609 return true;
12610
12611 /* If the function receives nonlocal gotos, it needs to save the frame
12612 pointer in the nonlocal_goto_save_area object. */
12613 if (cfun->has_nonlocal_label)
12614 return true;
12615
12616 /* In flat mode, that's it. */
12617 if (TARGET_FLAT)
12618 return false;
12619
12620 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12621 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12622 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12623 }
12624
12625 /* The way this is structured, we can't eliminate SFP in favor of SP
12626 if the frame pointer is required: we want to use the SFP->HFP elimination
12627 in that case. But the test in update_eliminables doesn't know we are
12628 assuming below that we only do the former elimination. */
12629
12630 static bool
12631 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12632 {
12633 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12634 }
12635
12636 /* Return the hard frame pointer directly to bypass the stack bias. */
12637
12638 static rtx
12639 sparc_builtin_setjmp_frame_value (void)
12640 {
12641 return hard_frame_pointer_rtx;
12642 }
12643
12644 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12645 they won't be allocated. */
12646
12647 static void
12648 sparc_conditional_register_usage (void)
12649 {
12650 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12651 {
12652 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12653 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12654 }
12655 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12656 /* then honor it. */
12657 if (TARGET_ARCH32 && fixed_regs[5])
12658 fixed_regs[5] = 1;
12659 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12660 fixed_regs[5] = 0;
12661 if (! TARGET_V9)
12662 {
12663 int regno;
12664 for (regno = SPARC_FIRST_V9_FP_REG;
12665 regno <= SPARC_LAST_V9_FP_REG;
12666 regno++)
12667 fixed_regs[regno] = 1;
12668 /* %fcc0 is used by v8 and v9. */
12669 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12670 regno <= SPARC_LAST_V9_FCC_REG;
12671 regno++)
12672 fixed_regs[regno] = 1;
12673 }
12674 if (! TARGET_FPU)
12675 {
12676 int regno;
12677 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12678 fixed_regs[regno] = 1;
12679 }
12680 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12681 /* then honor it. Likewise with g3 and g4. */
12682 if (fixed_regs[2] == 2)
12683 fixed_regs[2] = ! TARGET_APP_REGS;
12684 if (fixed_regs[3] == 2)
12685 fixed_regs[3] = ! TARGET_APP_REGS;
12686 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12687 fixed_regs[4] = ! TARGET_APP_REGS;
12688 else if (TARGET_CM_EMBMEDANY)
12689 fixed_regs[4] = 1;
12690 else if (fixed_regs[4] == 2)
12691 fixed_regs[4] = 0;
12692 if (TARGET_FLAT)
12693 {
12694 int regno;
12695 /* Disable leaf functions. */
12696 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12697 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12698 leaf_reg_remap [regno] = regno;
12699 }
12700 if (TARGET_VIS)
12701 global_regs[SPARC_GSR_REG] = 1;
12702 }
12703
12704 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12705
12706 - We can't load constants into FP registers.
12707 - We can't load FP constants into integer registers when soft-float,
12708 because there is no soft-float pattern with a r/F constraint.
12709 - We can't load FP constants into integer registers for TFmode unless
12710 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12711 - Try and reload integer constants (symbolic or otherwise) back into
12712 registers directly, rather than having them dumped to memory. */
12713
12714 static reg_class_t
12715 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12716 {
12717 machine_mode mode = GET_MODE (x);
12718 if (CONSTANT_P (x))
12719 {
12720 if (FP_REG_CLASS_P (rclass)
12721 || rclass == GENERAL_OR_FP_REGS
12722 || rclass == GENERAL_OR_EXTRA_FP_REGS
12723 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12724 || (mode == TFmode && ! const_zero_operand (x, mode)))
12725 return NO_REGS;
12726
12727 if (GET_MODE_CLASS (mode) == MODE_INT)
12728 return GENERAL_REGS;
12729
12730 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12731 {
12732 if (! FP_REG_CLASS_P (rclass)
12733 || !(const_zero_operand (x, mode)
12734 || const_all_ones_operand (x, mode)))
12735 return NO_REGS;
12736 }
12737 }
12738
12739 if (TARGET_VIS3
12740 && ! TARGET_ARCH64
12741 && (rclass == EXTRA_FP_REGS
12742 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12743 {
12744 int regno = true_regnum (x);
12745
12746 if (SPARC_INT_REG_P (regno))
12747 return (rclass == EXTRA_FP_REGS
12748 ? FP_REGS : GENERAL_OR_FP_REGS);
12749 }
12750
12751 return rclass;
12752 }
12753
12754 /* Return true if we use LRA instead of reload pass. */
12755
12756 static bool
12757 sparc_lra_p (void)
12758 {
12759 return TARGET_LRA;
12760 }
12761
12762 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12763 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12764
12765 const char *
12766 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12767 {
12768 char mulstr[32];
12769
12770 gcc_assert (! TARGET_ARCH64);
12771
12772 if (sparc_check_64 (operands[1], insn) <= 0)
12773 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12774 if (which_alternative == 1)
12775 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12776 if (GET_CODE (operands[2]) == CONST_INT)
12777 {
12778 if (which_alternative == 1)
12779 {
12780 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12781 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12782 output_asm_insn (mulstr, operands);
12783 return "srlx\t%L0, 32, %H0";
12784 }
12785 else
12786 {
12787 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12788 output_asm_insn ("or\t%L1, %3, %3", operands);
12789 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12790 output_asm_insn (mulstr, operands);
12791 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12792 return "mov\t%3, %L0";
12793 }
12794 }
12795 else if (rtx_equal_p (operands[1], operands[2]))
12796 {
12797 if (which_alternative == 1)
12798 {
12799 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12800 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12801 output_asm_insn (mulstr, operands);
12802 return "srlx\t%L0, 32, %H0";
12803 }
12804 else
12805 {
12806 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12807 output_asm_insn ("or\t%L1, %3, %3", operands);
12808 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12809 output_asm_insn (mulstr, operands);
12810 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12811 return "mov\t%3, %L0";
12812 }
12813 }
12814 if (sparc_check_64 (operands[2], insn) <= 0)
12815 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12816 if (which_alternative == 1)
12817 {
12818 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12819 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12820 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12821 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12822 output_asm_insn (mulstr, operands);
12823 return "srlx\t%L0, 32, %H0";
12824 }
12825 else
12826 {
12827 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12828 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12829 output_asm_insn ("or\t%L1, %3, %3", operands);
12830 output_asm_insn ("or\t%L2, %4, %4", operands);
12831 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12832 output_asm_insn (mulstr, operands);
12833 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12834 return "mov\t%3, %L0";
12835 }
12836 }
12837
12838 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12839 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12840 and INNER_MODE are the modes describing TARGET. */
12841
12842 static void
12843 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12844 machine_mode inner_mode)
12845 {
12846 rtx t1, final_insn, sel;
12847 int bmask;
12848
12849 t1 = gen_reg_rtx (mode);
12850
12851 elt = convert_modes (SImode, inner_mode, elt, true);
12852 emit_move_insn (gen_lowpart(SImode, t1), elt);
12853
12854 switch (mode)
12855 {
12856 case E_V2SImode:
12857 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12858 bmask = 0x45674567;
12859 break;
12860 case E_V4HImode:
12861 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12862 bmask = 0x67676767;
12863 break;
12864 case E_V8QImode:
12865 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12866 bmask = 0x77777777;
12867 break;
12868 default:
12869 gcc_unreachable ();
12870 }
12871
12872 sel = force_reg (SImode, GEN_INT (bmask));
12873 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12874 emit_insn (final_insn);
12875 }
12876
12877 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12878 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12879
12880 static void
12881 vector_init_fpmerge (rtx target, rtx elt)
12882 {
12883 rtx t1, t2, t2_low, t3, t3_low;
12884
12885 t1 = gen_reg_rtx (V4QImode);
12886 elt = convert_modes (SImode, QImode, elt, true);
12887 emit_move_insn (gen_lowpart (SImode, t1), elt);
12888
12889 t2 = gen_reg_rtx (V8QImode);
12890 t2_low = gen_lowpart (V4QImode, t2);
12891 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12892
12893 t3 = gen_reg_rtx (V8QImode);
12894 t3_low = gen_lowpart (V4QImode, t3);
12895 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12896
12897 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12898 }
12899
12900 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12901 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12902
12903 static void
12904 vector_init_faligndata (rtx target, rtx elt)
12905 {
12906 rtx t1 = gen_reg_rtx (V4HImode);
12907 int i;
12908
12909 elt = convert_modes (SImode, HImode, elt, true);
12910 emit_move_insn (gen_lowpart (SImode, t1), elt);
12911
12912 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12913 force_reg (SImode, GEN_INT (6)),
12914 const0_rtx));
12915
12916 for (i = 0; i < 4; i++)
12917 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12918 }
12919
12920 /* Emit code to initialize TARGET to values for individual fields VALS. */
12921
12922 void
12923 sparc_expand_vector_init (rtx target, rtx vals)
12924 {
12925 const machine_mode mode = GET_MODE (target);
12926 const machine_mode inner_mode = GET_MODE_INNER (mode);
12927 const int n_elts = GET_MODE_NUNITS (mode);
12928 int i, n_var = 0;
12929 bool all_same = true;
12930 rtx mem;
12931
12932 for (i = 0; i < n_elts; i++)
12933 {
12934 rtx x = XVECEXP (vals, 0, i);
12935 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12936 n_var++;
12937
12938 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12939 all_same = false;
12940 }
12941
12942 if (n_var == 0)
12943 {
12944 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12945 return;
12946 }
12947
12948 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12949 {
12950 if (GET_MODE_SIZE (inner_mode) == 4)
12951 {
12952 emit_move_insn (gen_lowpart (SImode, target),
12953 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12954 return;
12955 }
12956 else if (GET_MODE_SIZE (inner_mode) == 8)
12957 {
12958 emit_move_insn (gen_lowpart (DImode, target),
12959 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12960 return;
12961 }
12962 }
12963 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12964 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12965 {
12966 emit_move_insn (gen_highpart (word_mode, target),
12967 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12968 emit_move_insn (gen_lowpart (word_mode, target),
12969 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12970 return;
12971 }
12972
12973 if (all_same && GET_MODE_SIZE (mode) == 8)
12974 {
12975 if (TARGET_VIS2)
12976 {
12977 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12978 return;
12979 }
12980 if (mode == V8QImode)
12981 {
12982 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12983 return;
12984 }
12985 if (mode == V4HImode)
12986 {
12987 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12988 return;
12989 }
12990 }
12991
12992 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12993 for (i = 0; i < n_elts; i++)
12994 emit_move_insn (adjust_address_nv (mem, inner_mode,
12995 i * GET_MODE_SIZE (inner_mode)),
12996 XVECEXP (vals, 0, i));
12997 emit_move_insn (target, mem);
12998 }
12999
13000 /* Implement TARGET_SECONDARY_RELOAD. */
13001
13002 static reg_class_t
13003 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13004 machine_mode mode, secondary_reload_info *sri)
13005 {
13006 enum reg_class rclass = (enum reg_class) rclass_i;
13007
13008 sri->icode = CODE_FOR_nothing;
13009 sri->extra_cost = 0;
13010
13011 /* We need a temporary when loading/storing a HImode/QImode value
13012 between memory and the FPU registers. This can happen when combine puts
13013 a paradoxical subreg in a float/fix conversion insn. */
13014 if (FP_REG_CLASS_P (rclass)
13015 && (mode == HImode || mode == QImode)
13016 && (GET_CODE (x) == MEM
13017 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13018 && true_regnum (x) == -1)))
13019 return GENERAL_REGS;
13020
13021 /* On 32-bit we need a temporary when loading/storing a DFmode value
13022 between unaligned memory and the upper FPU registers. */
13023 if (TARGET_ARCH32
13024 && rclass == EXTRA_FP_REGS
13025 && mode == DFmode
13026 && GET_CODE (x) == MEM
13027 && ! mem_min_alignment (x, 8))
13028 return FP_REGS;
13029
13030 if (((TARGET_CM_MEDANY
13031 && symbolic_operand (x, mode))
13032 || (TARGET_CM_EMBMEDANY
13033 && text_segment_operand (x, mode)))
13034 && ! flag_pic)
13035 {
13036 if (in_p)
13037 sri->icode = direct_optab_handler (reload_in_optab, mode);
13038 else
13039 sri->icode = direct_optab_handler (reload_out_optab, mode);
13040 return NO_REGS;
13041 }
13042
13043 if (TARGET_VIS3 && TARGET_ARCH32)
13044 {
13045 int regno = true_regnum (x);
13046
13047 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13048 to move 8-byte values in 4-byte pieces. This only works via
13049 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13050 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13051 an FP_REGS intermediate move. */
13052 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13053 || ((general_or_i64_p (rclass)
13054 || rclass == GENERAL_OR_FP_REGS)
13055 && SPARC_FP_REG_P (regno)))
13056 {
13057 sri->extra_cost = 2;
13058 return FP_REGS;
13059 }
13060 }
13061
13062 return NO_REGS;
13063 }
13064
13065 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13066
13067 On SPARC when not VIS3 it is not possible to directly move data
13068 between GENERAL_REGS and FP_REGS. */
13069
13070 static bool
13071 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13072 reg_class_t class2)
13073 {
13074 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13075 && (! TARGET_VIS3
13076 || GET_MODE_SIZE (mode) > 8
13077 || GET_MODE_SIZE (mode) < 4));
13078 }
13079
13080 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13081
13082 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13083 because the movsi and movsf patterns don't handle r/f moves.
13084 For v8 we copy the default definition. */
13085
13086 static machine_mode
13087 sparc_secondary_memory_needed_mode (machine_mode mode)
13088 {
13089 if (TARGET_ARCH64)
13090 {
13091 if (GET_MODE_BITSIZE (mode) < 32)
13092 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13093 return mode;
13094 }
13095 else
13096 {
13097 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13098 return mode_for_size (BITS_PER_WORD,
13099 GET_MODE_CLASS (mode), 0).require ();
13100 return mode;
13101 }
13102 }
13103
13104 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13105 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13106
13107 bool
13108 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13109 {
13110 enum rtx_code rc = GET_CODE (operands[1]);
13111 machine_mode cmp_mode;
13112 rtx cc_reg, dst, cmp;
13113
13114 cmp = operands[1];
13115 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13116 return false;
13117
13118 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13119 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13120
13121 cmp_mode = GET_MODE (XEXP (cmp, 0));
13122 rc = GET_CODE (cmp);
13123
13124 dst = operands[0];
13125 if (! rtx_equal_p (operands[2], dst)
13126 && ! rtx_equal_p (operands[3], dst))
13127 {
13128 if (reg_overlap_mentioned_p (dst, cmp))
13129 dst = gen_reg_rtx (mode);
13130
13131 emit_move_insn (dst, operands[3]);
13132 }
13133 else if (operands[2] == dst)
13134 {
13135 operands[2] = operands[3];
13136
13137 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13138 rc = reverse_condition_maybe_unordered (rc);
13139 else
13140 rc = reverse_condition (rc);
13141 }
13142
13143 if (XEXP (cmp, 1) == const0_rtx
13144 && GET_CODE (XEXP (cmp, 0)) == REG
13145 && cmp_mode == DImode
13146 && v9_regcmp_p (rc))
13147 cc_reg = XEXP (cmp, 0);
13148 else
13149 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13150
13151 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13152
13153 emit_insn (gen_rtx_SET (dst,
13154 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13155
13156 if (dst != operands[0])
13157 emit_move_insn (operands[0], dst);
13158
13159 return true;
13160 }
13161
13162 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13163 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13164 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13165 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13166 code to be used for the condition mask. */
13167
13168 void
13169 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13170 {
13171 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13172 enum rtx_code code = GET_CODE (operands[3]);
13173
13174 mask = gen_reg_rtx (Pmode);
13175 cop0 = operands[4];
13176 cop1 = operands[5];
13177 if (code == LT || code == GE)
13178 {
13179 rtx t;
13180
13181 code = swap_condition (code);
13182 t = cop0; cop0 = cop1; cop1 = t;
13183 }
13184
13185 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13186
13187 fcmp = gen_rtx_UNSPEC (Pmode,
13188 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13189 fcode);
13190
13191 cmask = gen_rtx_UNSPEC (DImode,
13192 gen_rtvec (2, mask, gsr),
13193 ccode);
13194
13195 bshuf = gen_rtx_UNSPEC (mode,
13196 gen_rtvec (3, operands[1], operands[2], gsr),
13197 UNSPEC_BSHUFFLE);
13198
13199 emit_insn (gen_rtx_SET (mask, fcmp));
13200 emit_insn (gen_rtx_SET (gsr, cmask));
13201
13202 emit_insn (gen_rtx_SET (operands[0], bshuf));
13203 }
13204
13205 /* On sparc, any mode which naturally allocates into the float
13206 registers should return 4 here. */
13207
13208 unsigned int
13209 sparc_regmode_natural_size (machine_mode mode)
13210 {
13211 int size = UNITS_PER_WORD;
13212
13213 if (TARGET_ARCH64)
13214 {
13215 enum mode_class mclass = GET_MODE_CLASS (mode);
13216
13217 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13218 size = 4;
13219 }
13220
13221 return size;
13222 }
13223
13224 /* Implement TARGET_HARD_REGNO_NREGS.
13225
13226 On SPARC, ordinary registers hold 32 bits worth; this means both
13227 integer and floating point registers. On v9, integer regs hold 64
13228 bits worth; floating point regs hold 32 bits worth (this includes the
13229 new fp regs as even the odd ones are included in the hard register
13230 count). */
13231
13232 static unsigned int
13233 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13234 {
13235 if (regno == SPARC_GSR_REG)
13236 return 1;
13237 if (TARGET_ARCH64)
13238 {
13239 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13240 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13241 return CEIL (GET_MODE_SIZE (mode), 4);
13242 }
13243 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13244 }
13245
13246 /* Implement TARGET_HARD_REGNO_MODE_OK.
13247
13248 ??? Because of the funny way we pass parameters we should allow certain
13249 ??? types of float/complex values to be in integer registers during
13250 ??? RTL generation. This only matters on arch32. */
13251
13252 static bool
13253 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13254 {
13255 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13256 }
13257
13258 /* Implement TARGET_MODES_TIEABLE_P.
13259
13260 For V9 we have to deal with the fact that only the lower 32 floating
13261 point registers are 32-bit addressable. */
13262
13263 static bool
13264 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13265 {
13266 enum mode_class mclass1, mclass2;
13267 unsigned short size1, size2;
13268
13269 if (mode1 == mode2)
13270 return true;
13271
13272 mclass1 = GET_MODE_CLASS (mode1);
13273 mclass2 = GET_MODE_CLASS (mode2);
13274 if (mclass1 != mclass2)
13275 return false;
13276
13277 if (! TARGET_V9)
13278 return true;
13279
13280 /* Classes are the same and we are V9 so we have to deal with upper
13281 vs. lower floating point registers. If one of the modes is a
13282 4-byte mode, and the other is not, we have to mark them as not
13283 tieable because only the lower 32 floating point register are
13284 addressable 32-bits at a time.
13285
13286 We can't just test explicitly for SFmode, otherwise we won't
13287 cover the vector mode cases properly. */
13288
13289 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13290 return true;
13291
13292 size1 = GET_MODE_SIZE (mode1);
13293 size2 = GET_MODE_SIZE (mode2);
13294 if ((size1 > 4 && size2 == 4)
13295 || (size2 > 4 && size1 == 4))
13296 return false;
13297
13298 return true;
13299 }
13300
13301 /* Implement TARGET_CSTORE_MODE. */
13302
13303 static scalar_int_mode
13304 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13305 {
13306 return (TARGET_ARCH64 ? DImode : SImode);
13307 }
13308
13309 /* Return the compound expression made of T1 and T2. */
13310
13311 static inline tree
13312 compound_expr (tree t1, tree t2)
13313 {
13314 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13315 }
13316
13317 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13318
13319 static void
13320 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13321 {
13322 if (!TARGET_FPU)
13323 return;
13324
13325 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13326 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13327
13328 /* We generate the equivalent of feholdexcept (&fenv_var):
13329
13330 unsigned int fenv_var;
13331 __builtin_store_fsr (&fenv_var);
13332
13333 unsigned int tmp1_var;
13334 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13335
13336 __builtin_load_fsr (&tmp1_var); */
13337
13338 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13339 TREE_ADDRESSABLE (fenv_var) = 1;
13340 tree fenv_addr = build_fold_addr_expr (fenv_var);
13341 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13342 tree hold_stfsr
13343 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13344 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13345
13346 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13347 TREE_ADDRESSABLE (tmp1_var) = 1;
13348 tree masked_fenv_var
13349 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13350 build_int_cst (unsigned_type_node,
13351 ~(accrued_exception_mask | trap_enable_mask)));
13352 tree hold_mask
13353 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13354 NULL_TREE, NULL_TREE);
13355
13356 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13357 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13358 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13359
13360 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13361
13362 /* We reload the value of tmp1_var to clear the exceptions:
13363
13364 __builtin_load_fsr (&tmp1_var); */
13365
13366 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13367
13368 /* We generate the equivalent of feupdateenv (&fenv_var):
13369
13370 unsigned int tmp2_var;
13371 __builtin_store_fsr (&tmp2_var);
13372
13373 __builtin_load_fsr (&fenv_var);
13374
13375 if (SPARC_LOW_FE_EXCEPT_VALUES)
13376 tmp2_var >>= 5;
13377 __atomic_feraiseexcept ((int) tmp2_var); */
13378
13379 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13380 TREE_ADDRESSABLE (tmp2_var) = 1;
13381 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13382 tree update_stfsr
13383 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13384 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13385
13386 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13387
13388 tree atomic_feraiseexcept
13389 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13390 tree update_call
13391 = build_call_expr (atomic_feraiseexcept, 1,
13392 fold_convert (integer_type_node, tmp2_var));
13393
13394 if (SPARC_LOW_FE_EXCEPT_VALUES)
13395 {
13396 tree shifted_tmp2_var
13397 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13398 build_int_cst (unsigned_type_node, 5));
13399 tree update_shift
13400 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13401 update_call = compound_expr (update_shift, update_call);
13402 }
13403
13404 *update
13405 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13406 }
13407
13408 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13409
13410 SImode loads to floating-point registers are not zero-extended.
13411 The definition for LOAD_EXTEND_OP specifies that integer loads
13412 narrower than BITS_PER_WORD will be zero-extended. As a result,
13413 we inhibit changes from SImode unless they are to a mode that is
13414 identical in size.
13415
13416 Likewise for SFmode, since word-mode paradoxical subregs are
13417 problematic on big-endian architectures. */
13418
13419 static bool
13420 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13421 reg_class_t rclass)
13422 {
13423 if (TARGET_ARCH64
13424 && GET_MODE_SIZE (from) == 4
13425 && GET_MODE_SIZE (to) != 4)
13426 return !reg_classes_intersect_p (rclass, FP_REGS);
13427 return true;
13428 }
13429
13430 #include "gt-sparc.h"