]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
Wrap option names in gcc internal messages with %< and %>.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
fbd26352 1/* Copyright (C) 2006-2019 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
785790dc 17#define IN_TARGET_CODE 1
18
644459d0 19#include "config.h"
20#include "system.h"
21#include "coretypes.h"
9ef16211 22#include "backend.h"
c1eb80de 23#include "target.h"
24#include "rtl.h"
9ef16211 25#include "tree.h"
26#include "gimple.h"
c1eb80de 27#include "cfghooks.h"
28#include "cfgloop.h"
9ef16211 29#include "df.h"
ad7b10a2 30#include "memmodel.h"
c1eb80de 31#include "tm_p.h"
32#include "stringpool.h"
30a86690 33#include "attribs.h"
c1eb80de 34#include "expmed.h"
35#include "optabs.h"
644459d0 36#include "regs.h"
c1eb80de 37#include "emit-rtl.h"
644459d0 38#include "recog.h"
c1eb80de 39#include "diagnostic-core.h"
40#include "insn-attr.h"
b20a8bb4 41#include "alias.h"
b20a8bb4 42#include "fold-const.h"
9ed99284 43#include "stor-layout.h"
44#include "calls.h"
45#include "varasm.h"
d53441c8 46#include "explow.h"
644459d0 47#include "expr.h"
644459d0 48#include "output.h"
94ea8568 49#include "cfgrtl.h"
94ea8568 50#include "cfgbuild.h"
644459d0 51#include "langhooks.h"
52#include "reload.h"
644459d0 53#include "sched-int.h"
54#include "params.h"
a8783bee 55#include "gimplify.h"
644459d0 56#include "tm-constrs.h"
94ea8568 57#include "ddg.h"
b9ed1410 58#include "dumpfile.h"
f7715905 59#include "builtins.h"
6f4e40cd 60#include "rtl-iter.h"
6848a0ae 61#include "flags.h"
62#include "toplev.h"
6352eedf 63
0c71fb4f 64/* This file should be included last. */
4b498588 65#include "target-def.h"
66
6352eedf 67/* Builtin types, data and prototypes. */
c2233b46 68
69enum spu_builtin_type_index
70{
71 SPU_BTI_END_OF_PARAMS,
72
73 /* We create new type nodes for these. */
74 SPU_BTI_V16QI,
75 SPU_BTI_V8HI,
76 SPU_BTI_V4SI,
77 SPU_BTI_V2DI,
78 SPU_BTI_V4SF,
79 SPU_BTI_V2DF,
80 SPU_BTI_UV16QI,
81 SPU_BTI_UV8HI,
82 SPU_BTI_UV4SI,
83 SPU_BTI_UV2DI,
84
85 /* A 16-byte type. (Implemented with V16QI_type_node) */
86 SPU_BTI_QUADWORD,
87
88 /* These all correspond to intSI_type_node */
89 SPU_BTI_7,
90 SPU_BTI_S7,
91 SPU_BTI_U7,
92 SPU_BTI_S10,
93 SPU_BTI_S10_4,
94 SPU_BTI_U14,
95 SPU_BTI_16,
96 SPU_BTI_S16,
97 SPU_BTI_S16_2,
98 SPU_BTI_U16,
99 SPU_BTI_U16_2,
100 SPU_BTI_U18,
101
102 /* These correspond to the standard types */
103 SPU_BTI_INTQI,
104 SPU_BTI_INTHI,
105 SPU_BTI_INTSI,
106 SPU_BTI_INTDI,
107
108 SPU_BTI_UINTQI,
109 SPU_BTI_UINTHI,
110 SPU_BTI_UINTSI,
111 SPU_BTI_UINTDI,
112
113 SPU_BTI_FLOAT,
114 SPU_BTI_DOUBLE,
115
116 SPU_BTI_VOID,
117 SPU_BTI_PTR,
118
119 SPU_BTI_MAX
120};
121
122#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
123#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
124#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
125#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
126#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
127#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
128#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
129#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
130#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
131#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
132
133static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
134
6352eedf 135struct spu_builtin_range
136{
137 int low, high;
138};
139
140static struct spu_builtin_range spu_builtin_range[] = {
141 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
142 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
143 {0ll, 0x7fll}, /* SPU_BTI_U7 */
144 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
145 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
146 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
147 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
148 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
149 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
150 {0ll, 0xffffll}, /* SPU_BTI_U16 */
151 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
152 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
153};
154
644459d0 155\f
156/* Target specific attribute specifications. */
157char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
158
159/* Prototypes and external defs. */
0af56f80 160static int get_pipe (rtx_insn *insn);
644459d0 161static int spu_naked_function_p (tree func);
644459d0 162static int mem_is_padded_component_ref (rtx x);
c7b91b14 163static void fix_range (const char *);
9d98604b 164static rtx spu_expand_load (rtx, rtx, rtx, int);
644459d0 165
5474166e 166/* Which instruction set architecture to use. */
167int spu_arch;
168/* Which cpu are we tuning for. */
169int spu_tune;
170
5a976006 171/* The hardware requires 8 insns between a hint and the branch it
172 effects. This variable describes how many rtl instructions the
173 compiler needs to see before inserting a hint, and then the compiler
174 will insert enough nops to make it at least 8 insns. The default is
175 for the compiler to allow up to 2 nops be emitted. The nops are
176 inserted in pairs, so we round down. */
177int spu_hint_dist = (8*4) - (2*4);
178
644459d0 179enum spu_immediate {
180 SPU_NONE,
181 SPU_IL,
182 SPU_ILA,
183 SPU_ILH,
184 SPU_ILHU,
185 SPU_ORI,
186 SPU_ORHI,
187 SPU_ORBI,
99369027 188 SPU_IOHL
644459d0 189};
dea01258 190enum immediate_class
191{
192 IC_POOL, /* constant pool */
193 IC_IL1, /* one il* instruction */
194 IC_IL2, /* both ilhu and iohl instructions */
195 IC_IL1s, /* one il* instruction */
196 IC_IL2s, /* both ilhu and iohl instructions */
197 IC_FSMBI, /* the fsmbi instruction */
198 IC_CPAT, /* one of the c*d instructions */
5df189be 199 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 200};
644459d0 201
202static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
203static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 204static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
205static enum immediate_class classify_immediate (rtx op,
3754d046 206 machine_mode mode);
644459d0 207
6cf5579e 208/* Pointer mode for __ea references. */
209#define EAmode (spu_ea_model != 32 ? DImode : SImode)
210
ef51d1e3 211\f
5eb28709 212/* Define the structure for the machine field in struct function. */
213struct GTY(()) machine_function
214{
215 /* Register to use for PIC accesses. */
216 rtx pic_reg;
217};
218
219/* How to allocate a 'struct machine_function'. */
220static struct machine_function *
221spu_init_machine_status (void)
222{
25a27413 223 return ggc_cleared_alloc<machine_function> ();
5eb28709 224}
225
4c834714 226/* Implement TARGET_OPTION_OVERRIDE. */
227static void
228spu_option_override (void)
644459d0 229{
5eb28709 230 /* Set up function hooks. */
231 init_machine_status = spu_init_machine_status;
232
14d408d9 233 /* Small loops will be unpeeled at -O3. For SPU it is more important
234 to keep code small by default. */
686e2769 235 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 236 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 237 global_options.x_param_values,
238 global_options_set.x_param_values);
14d408d9 239
644459d0 240 flag_omit_frame_pointer = 1;
241
5a976006 242 /* Functions must be 8 byte aligned so we correctly handle dual issue */
6848a0ae 243 parse_alignment_opts ();
6fceef7a 244 if (align_functions.levels[0].get_value () < 8)
6848a0ae 245 str_align_functions = "8";
c7b91b14 246
5a976006 247 spu_hint_dist = 8*4 - spu_max_nops*4;
248 if (spu_hint_dist < 0)
249 spu_hint_dist = 0;
250
c7b91b14 251 if (spu_fixed_range_string)
252 fix_range (spu_fixed_range_string);
5474166e 253
254 /* Determine processor architectural level. */
255 if (spu_arch_string)
256 {
257 if (strcmp (&spu_arch_string[0], "cell") == 0)
258 spu_arch = PROCESSOR_CELL;
259 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
260 spu_arch = PROCESSOR_CELLEDP;
261 else
2f6d557f 262 error ("bad value (%s) for %<-march=%> switch", spu_arch_string);
5474166e 263 }
264
265 /* Determine processor to tune for. */
266 if (spu_tune_string)
267 {
268 if (strcmp (&spu_tune_string[0], "cell") == 0)
269 spu_tune = PROCESSOR_CELL;
270 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
271 spu_tune = PROCESSOR_CELLEDP;
272 else
2f6d557f 273 error ("bad value (%s) for %<-mtune=%> switch", spu_tune_string);
5474166e 274 }
98bbec1e 275
13684256 276 /* Change defaults according to the processor architecture. */
277 if (spu_arch == PROCESSOR_CELLEDP)
278 {
279 /* If no command line option has been otherwise specified, change
280 the default to -mno-safe-hints on celledp -- only the original
281 Cell/B.E. processors require this workaround. */
282 if (!(target_flags_explicit & MASK_SAFE_HINTS))
283 target_flags &= ~MASK_SAFE_HINTS;
284 }
285
98bbec1e 286 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 287}
288\f
74f68e49 289/* Implement TARGET_HARD_REGNO_NREGS. */
290
291static unsigned int
292spu_hard_regno_nregs (unsigned int, machine_mode mode)
293{
294 return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
295}
296
644459d0 297/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
298 struct attribute_spec.handler. */
299
644459d0 300/* True if MODE is valid for the target. By "valid", we mean able to
301 be manipulated in non-trivial ways. In particular, this means all
302 the arithmetic is supported. */
303static bool
8aec1ebb 304spu_scalar_mode_supported_p (scalar_mode mode)
644459d0 305{
306 switch (mode)
307 {
916ace94 308 case E_QImode:
309 case E_HImode:
310 case E_SImode:
311 case E_SFmode:
312 case E_DImode:
313 case E_TImode:
314 case E_DFmode:
644459d0 315 return true;
316
317 default:
318 return false;
319 }
320}
321
322/* Similarly for vector modes. "Supported" here is less strict. At
323 least some operations are supported; need to check optabs or builtins
324 for further details. */
325static bool
3754d046 326spu_vector_mode_supported_p (machine_mode mode)
644459d0 327{
328 switch (mode)
329 {
916ace94 330 case E_V16QImode:
331 case E_V8HImode:
332 case E_V4SImode:
333 case E_V2DImode:
334 case E_V4SFmode:
335 case E_V2DFmode:
644459d0 336 return true;
337
338 default:
339 return false;
340 }
341}
342
343/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
344 least significant bytes of the outer mode. This function returns
345 TRUE for the SUBREG's where this is correct. */
346int
347valid_subreg (rtx op)
348{
3754d046 349 machine_mode om = GET_MODE (op);
350 machine_mode im = GET_MODE (SUBREG_REG (op));
644459d0 351 return om != VOIDmode && im != VOIDmode
352 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 353 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
354 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 355}
356
357/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 358 and adjust the start offset. */
644459d0 359static rtx
360adjust_operand (rtx op, HOST_WIDE_INT * start)
361{
3754d046 362 machine_mode mode;
644459d0 363 int op_size;
38aca5eb 364 /* Strip any paradoxical SUBREG. */
365 if (GET_CODE (op) == SUBREG
366 && (GET_MODE_BITSIZE (GET_MODE (op))
367 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 368 {
369 if (start)
370 *start -=
371 GET_MODE_BITSIZE (GET_MODE (op)) -
372 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
373 op = SUBREG_REG (op);
374 }
375 /* If it is smaller than SI, assure a SUBREG */
376 op_size = GET_MODE_BITSIZE (GET_MODE (op));
377 if (op_size < 32)
378 {
379 if (start)
380 *start += 32 - op_size;
381 op_size = 32;
382 }
383 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
517be012 384 mode = int_mode_for_size (op_size, 0).require ();
644459d0 385 if (mode != GET_MODE (op))
386 op = gen_rtx_SUBREG (mode, op, 0);
387 return op;
388}
389
390void
391spu_expand_extv (rtx ops[], int unsignedp)
392{
9d98604b 393 rtx dst = ops[0], src = ops[1];
644459d0 394 HOST_WIDE_INT width = INTVAL (ops[2]);
395 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 396 HOST_WIDE_INT align_mask;
397 rtx s0, s1, mask, r0;
644459d0 398
9d98604b 399 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 400
9d98604b 401 if (MEM_P (src))
644459d0 402 {
9d98604b 403 /* First, determine if we need 1 TImode load or 2. We need only 1
404 if the bits being extracted do not cross the alignment boundary
405 as determined by the MEM and its address. */
406
407 align_mask = -MEM_ALIGN (src);
408 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 409 {
9d98604b 410 /* Alignment is sufficient for 1 load. */
411 s0 = gen_reg_rtx (TImode);
412 r0 = spu_expand_load (s0, 0, src, start / 8);
413 start &= 7;
414 if (r0)
415 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 416 }
9d98604b 417 else
418 {
419 /* Need 2 loads. */
420 s0 = gen_reg_rtx (TImode);
421 s1 = gen_reg_rtx (TImode);
422 r0 = spu_expand_load (s0, s1, src, start / 8);
423 start &= 7;
424
425 gcc_assert (start + width <= 128);
426 if (r0)
427 {
428 rtx r1 = gen_reg_rtx (SImode);
429 mask = gen_reg_rtx (TImode);
430 emit_move_insn (mask, GEN_INT (-1));
431 emit_insn (gen_rotqby_ti (s0, s0, r0));
432 emit_insn (gen_rotqby_ti (s1, s1, r0));
433 if (GET_CODE (r0) == CONST_INT)
434 r1 = GEN_INT (INTVAL (r0) & 15);
435 else
436 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
437 emit_insn (gen_shlqby_ti (mask, mask, r1));
438 emit_insn (gen_selb (s0, s1, s0, mask));
439 }
440 }
441
442 }
443 else if (GET_CODE (src) == SUBREG)
444 {
445 rtx r = SUBREG_REG (src);
446 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
447 s0 = gen_reg_rtx (TImode);
448 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
d1f9b275 449 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
9d98604b 450 else
451 emit_move_insn (s0, src);
452 }
453 else
454 {
455 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
456 s0 = gen_reg_rtx (TImode);
457 emit_move_insn (s0, src);
644459d0 458 }
459
9d98604b 460 /* Now s0 is TImode and contains the bits to extract at start. */
461
462 if (start)
463 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
464
465 if (128 - width)
f5ff0b21 466 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 467
9d98604b 468 emit_move_insn (dst, s0);
644459d0 469}
470
471void
472spu_expand_insv (rtx ops[])
473{
474 HOST_WIDE_INT width = INTVAL (ops[1]);
475 HOST_WIDE_INT start = INTVAL (ops[2]);
a8cf79e6 476 unsigned HOST_WIDE_INT maskbits;
3754d046 477 machine_mode dst_mode;
644459d0 478 rtx dst = ops[0], src = ops[3];
4cbad5bb 479 int dst_size;
644459d0 480 rtx mask;
481 rtx shift_reg;
482 int shift;
483
484
485 if (GET_CODE (ops[0]) == MEM)
486 dst = gen_reg_rtx (TImode);
487 else
488 dst = adjust_operand (dst, &start);
489 dst_mode = GET_MODE (dst);
490 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
491
492 if (CONSTANT_P (src))
493 {
3754d046 494 machine_mode m =
644459d0 495 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
496 src = force_reg (m, convert_to_mode (m, src, 0));
497 }
498 src = adjust_operand (src, 0);
644459d0 499
500 mask = gen_reg_rtx (dst_mode);
501 shift_reg = gen_reg_rtx (dst_mode);
502 shift = dst_size - start - width;
503
504 /* It's not safe to use subreg here because the compiler assumes
505 that the SUBREG_REG is right justified in the SUBREG. */
506 convert_move (shift_reg, src, 1);
507
508 if (shift > 0)
509 {
510 switch (dst_mode)
511 {
916ace94 512 case E_SImode:
644459d0 513 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 break;
916ace94 515 case E_DImode:
644459d0 516 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
517 break;
916ace94 518 case E_TImode:
644459d0 519 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
520 break;
521 default:
522 abort ();
523 }
524 }
525 else if (shift < 0)
526 abort ();
527
528 switch (dst_size)
529 {
530 case 32:
a8cf79e6 531 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
644459d0 532 if (start)
a8cf79e6 533 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
644459d0 534 emit_move_insn (mask, GEN_INT (maskbits));
535 break;
536 case 64:
a8cf79e6 537 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
644459d0 538 if (start)
a8cf79e6 539 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
644459d0 540 emit_move_insn (mask, GEN_INT (maskbits));
541 break;
542 case 128:
543 {
544 unsigned char arr[16];
545 int i = start / 8;
546 memset (arr, 0, sizeof (arr));
547 arr[i] = 0xff >> (start & 7);
548 for (i++; i <= (start + width - 1) / 8; i++)
549 arr[i] = 0xff;
550 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
551 emit_move_insn (mask, array_to_constant (TImode, arr));
552 }
553 break;
554 default:
555 abort ();
556 }
557 if (GET_CODE (ops[0]) == MEM)
558 {
644459d0 559 rtx low = gen_reg_rtx (SImode);
644459d0 560 rtx rotl = gen_reg_rtx (SImode);
561 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 562 rtx addr;
563 rtx addr0;
564 rtx addr1;
644459d0 565 rtx mem;
566
9d98604b 567 addr = force_reg (Pmode, XEXP (ops[0], 0));
568 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 569 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
570 emit_insn (gen_negsi2 (rotl, low));
571 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
572 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 573 mem = change_address (ops[0], TImode, addr0);
644459d0 574 set_mem_alias_set (mem, 0);
575 emit_move_insn (dst, mem);
576 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 577 if (start + width > MEM_ALIGN (ops[0]))
578 {
579 rtx shl = gen_reg_rtx (SImode);
580 rtx mask1 = gen_reg_rtx (TImode);
581 rtx dst1 = gen_reg_rtx (TImode);
582 rtx mem1;
29c05e22 583 addr1 = plus_constant (Pmode, addr, 16);
9d98604b 584 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 585 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
586 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 587 mem1 = change_address (ops[0], TImode, addr1);
644459d0 588 set_mem_alias_set (mem1, 0);
589 emit_move_insn (dst1, mem1);
590 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
591 emit_move_insn (mem1, dst1);
592 }
9d98604b 593 emit_move_insn (mem, dst);
644459d0 594 }
595 else
71cd778d 596 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 597}
598
599
600int
601spu_expand_block_move (rtx ops[])
602{
603 HOST_WIDE_INT bytes, align, offset;
604 rtx src, dst, sreg, dreg, target;
605 int i;
606 if (GET_CODE (ops[2]) != CONST_INT
607 || GET_CODE (ops[3]) != CONST_INT
48eb4342 608 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 609 return 0;
610
611 bytes = INTVAL (ops[2]);
612 align = INTVAL (ops[3]);
613
614 if (bytes <= 0)
615 return 1;
616
617 dst = ops[0];
618 src = ops[1];
619
620 if (align == 16)
621 {
622 for (offset = 0; offset + 16 <= bytes; offset += 16)
623 {
624 dst = adjust_address (ops[0], V16QImode, offset);
625 src = adjust_address (ops[1], V16QImode, offset);
626 emit_move_insn (dst, src);
627 }
628 if (offset < bytes)
629 {
630 rtx mask;
631 unsigned char arr[16] = { 0 };
632 for (i = 0; i < bytes - offset; i++)
633 arr[i] = 0xff;
634 dst = adjust_address (ops[0], V16QImode, offset);
635 src = adjust_address (ops[1], V16QImode, offset);
636 mask = gen_reg_rtx (V16QImode);
637 sreg = gen_reg_rtx (V16QImode);
638 dreg = gen_reg_rtx (V16QImode);
639 target = gen_reg_rtx (V16QImode);
640 emit_move_insn (mask, array_to_constant (V16QImode, arr));
641 emit_move_insn (dreg, dst);
642 emit_move_insn (sreg, src);
643 emit_insn (gen_selb (target, dreg, sreg, mask));
644 emit_move_insn (dst, target);
645 }
646 return 1;
647 }
648 return 0;
649}
650
651enum spu_comp_code
652{ SPU_EQ, SPU_GT, SPU_GTU };
653
5474166e 654int spu_comp_icode[12][3] = {
655 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
656 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
657 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
658 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
659 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
660 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
661 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
662 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
663 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
664 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
665 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
666 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 667};
668
669/* Generate a compare for CODE. Return a brand-new rtx that represents
670 the result of the compare. GCC can figure this out too if we don't
671 provide all variations of compares, but GCC always wants to use
672 WORD_MODE, we can generate better code in most cases if we do it
673 ourselves. */
674void
74f4459c 675spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 676{
677 int reverse_compare = 0;
678 int reverse_test = 0;
5d70b918 679 rtx compare_result, eq_result;
680 rtx comp_rtx, eq_rtx;
3754d046 681 machine_mode comp_mode;
682 machine_mode op_mode;
b9c74b4d 683 enum spu_comp_code scode, eq_code;
684 enum insn_code ior_code;
74f4459c 685 enum rtx_code code = GET_CODE (cmp);
686 rtx op0 = XEXP (cmp, 0);
687 rtx op1 = XEXP (cmp, 1);
644459d0 688 int index;
5d70b918 689 int eq_test = 0;
644459d0 690
74f4459c 691 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 692 and so on, to keep the constant in operand 1. */
74f4459c 693 if (GET_CODE (op1) == CONST_INT)
644459d0 694 {
74f4459c 695 HOST_WIDE_INT val = INTVAL (op1) - 1;
696 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 697 switch (code)
698 {
699 case GE:
74f4459c 700 op1 = GEN_INT (val);
644459d0 701 code = GT;
702 break;
703 case LT:
74f4459c 704 op1 = GEN_INT (val);
644459d0 705 code = LE;
706 break;
707 case GEU:
74f4459c 708 op1 = GEN_INT (val);
644459d0 709 code = GTU;
710 break;
711 case LTU:
74f4459c 712 op1 = GEN_INT (val);
644459d0 713 code = LEU;
714 break;
715 default:
716 break;
717 }
718 }
719
686195ea 720 /* However, if we generate an integer result, performing a reverse test
721 would require an extra negation, so avoid that where possible. */
722 if (GET_CODE (op1) == CONST_INT && is_set == 1)
723 {
724 HOST_WIDE_INT val = INTVAL (op1) + 1;
725 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
726 switch (code)
727 {
728 case LE:
729 op1 = GEN_INT (val);
730 code = LT;
731 break;
732 case LEU:
733 op1 = GEN_INT (val);
734 code = LTU;
735 break;
736 default:
737 break;
738 }
739 }
740
5d70b918 741 comp_mode = SImode;
74f4459c 742 op_mode = GET_MODE (op0);
5d70b918 743
644459d0 744 switch (code)
745 {
746 case GE:
644459d0 747 scode = SPU_GT;
07027691 748 if (HONOR_NANS (op_mode))
5d70b918 749 {
750 reverse_compare = 0;
751 reverse_test = 0;
752 eq_test = 1;
753 eq_code = SPU_EQ;
754 }
755 else
756 {
757 reverse_compare = 1;
758 reverse_test = 1;
759 }
644459d0 760 break;
761 case LE:
644459d0 762 scode = SPU_GT;
07027691 763 if (HONOR_NANS (op_mode))
5d70b918 764 {
765 reverse_compare = 1;
766 reverse_test = 0;
767 eq_test = 1;
768 eq_code = SPU_EQ;
769 }
770 else
771 {
772 reverse_compare = 0;
773 reverse_test = 1;
774 }
644459d0 775 break;
776 case LT:
777 reverse_compare = 1;
778 reverse_test = 0;
779 scode = SPU_GT;
780 break;
781 case GEU:
782 reverse_compare = 1;
783 reverse_test = 1;
784 scode = SPU_GTU;
785 break;
786 case LEU:
787 reverse_compare = 0;
788 reverse_test = 1;
789 scode = SPU_GTU;
790 break;
791 case LTU:
792 reverse_compare = 1;
793 reverse_test = 0;
794 scode = SPU_GTU;
795 break;
796 case NE:
797 reverse_compare = 0;
798 reverse_test = 1;
799 scode = SPU_EQ;
800 break;
801
802 case EQ:
803 scode = SPU_EQ;
804 break;
805 case GT:
806 scode = SPU_GT;
807 break;
808 case GTU:
809 scode = SPU_GTU;
810 break;
811 default:
812 scode = SPU_EQ;
813 break;
814 }
815
644459d0 816 switch (op_mode)
817 {
916ace94 818 case E_QImode:
644459d0 819 index = 0;
820 comp_mode = QImode;
821 break;
916ace94 822 case E_HImode:
644459d0 823 index = 1;
824 comp_mode = HImode;
825 break;
916ace94 826 case E_SImode:
644459d0 827 index = 2;
828 break;
916ace94 829 case E_DImode:
644459d0 830 index = 3;
831 break;
916ace94 832 case E_TImode:
644459d0 833 index = 4;
834 break;
916ace94 835 case E_SFmode:
644459d0 836 index = 5;
837 break;
916ace94 838 case E_DFmode:
644459d0 839 index = 6;
840 break;
916ace94 841 case E_V16QImode:
5474166e 842 index = 7;
843 comp_mode = op_mode;
844 break;
916ace94 845 case E_V8HImode:
5474166e 846 index = 8;
847 comp_mode = op_mode;
848 break;
916ace94 849 case E_V4SImode:
5474166e 850 index = 9;
851 comp_mode = op_mode;
852 break;
916ace94 853 case E_V4SFmode:
5474166e 854 index = 10;
855 comp_mode = V4SImode;
856 break;
916ace94 857 case E_V2DFmode:
5474166e 858 index = 11;
859 comp_mode = V2DImode;
644459d0 860 break;
916ace94 861 case E_V2DImode:
644459d0 862 default:
863 abort ();
864 }
865
74f4459c 866 if (GET_MODE (op1) == DFmode
07027691 867 && (scode != SPU_GT && scode != SPU_EQ))
868 abort ();
644459d0 869
74f4459c 870 if (is_set == 0 && op1 == const0_rtx
871 && (GET_MODE (op0) == SImode
686195ea 872 || GET_MODE (op0) == HImode
873 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
644459d0 874 {
875 /* Don't need to set a register with the result when we are
876 comparing against zero and branching. */
877 reverse_test = !reverse_test;
74f4459c 878 compare_result = op0;
644459d0 879 }
880 else
881 {
882 compare_result = gen_reg_rtx (comp_mode);
883
884 if (reverse_compare)
885 {
74f4459c 886 rtx t = op1;
887 op1 = op0;
888 op0 = t;
644459d0 889 }
890
891 if (spu_comp_icode[index][scode] == 0)
892 abort ();
893
894 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 895 (op0, op_mode))
896 op0 = force_reg (op_mode, op0);
644459d0 897 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 898 (op1, op_mode))
899 op1 = force_reg (op_mode, op1);
644459d0 900 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 901 op0, op1);
644459d0 902 if (comp_rtx == 0)
903 abort ();
904 emit_insn (comp_rtx);
905
5d70b918 906 if (eq_test)
907 {
908 eq_result = gen_reg_rtx (comp_mode);
909 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 910 op0, op1);
5d70b918 911 if (eq_rtx == 0)
912 abort ();
913 emit_insn (eq_rtx);
d6bf3b14 914 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 915 gcc_assert (ior_code != CODE_FOR_nothing);
916 emit_insn (GEN_FCN (ior_code)
917 (compare_result, compare_result, eq_result));
918 }
644459d0 919 }
920
921 if (is_set == 0)
922 {
923 rtx bcomp;
924 rtx loc_ref;
925
926 /* We don't have branch on QI compare insns, so we convert the
927 QI compare result to a HI result. */
928 if (comp_mode == QImode)
929 {
930 rtx old_res = compare_result;
931 compare_result = gen_reg_rtx (HImode);
932 comp_mode = HImode;
933 emit_insn (gen_extendqihi2 (compare_result, old_res));
934 }
935
936 if (reverse_test)
937 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
938 else
939 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
940
74f4459c 941 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
d1f9b275 942 emit_jump_insn (gen_rtx_SET (pc_rtx,
644459d0 943 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
944 loc_ref, pc_rtx)));
945 }
946 else if (is_set == 2)
947 {
74f4459c 948 rtx target = operands[0];
644459d0 949 int compare_size = GET_MODE_BITSIZE (comp_mode);
950 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
517be012 951 machine_mode mode = int_mode_for_size (target_size, 0).require ();
644459d0 952 rtx select_mask;
953 rtx op_t = operands[2];
954 rtx op_f = operands[3];
955
956 /* The result of the comparison can be SI, HI or QI mode. Create a
957 mask based on that result. */
958 if (target_size > compare_size)
959 {
960 select_mask = gen_reg_rtx (mode);
961 emit_insn (gen_extend_compare (select_mask, compare_result));
962 }
963 else if (target_size < compare_size)
964 select_mask =
965 gen_rtx_SUBREG (mode, compare_result,
966 (compare_size - target_size) / BITS_PER_UNIT);
967 else if (comp_mode != mode)
968 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
969 else
970 select_mask = compare_result;
971
972 if (GET_MODE (target) != GET_MODE (op_t)
973 || GET_MODE (target) != GET_MODE (op_f))
974 abort ();
975
976 if (reverse_test)
977 emit_insn (gen_selb (target, op_t, op_f, select_mask));
978 else
979 emit_insn (gen_selb (target, op_f, op_t, select_mask));
980 }
981 else
982 {
74f4459c 983 rtx target = operands[0];
644459d0 984 if (reverse_test)
d1f9b275 985 emit_insn (gen_rtx_SET (compare_result,
644459d0 986 gen_rtx_NOT (comp_mode, compare_result)));
987 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
988 emit_insn (gen_extendhisi2 (target, compare_result));
989 else if (GET_MODE (target) == SImode
990 && GET_MODE (compare_result) == QImode)
991 emit_insn (gen_extend_compare (target, compare_result));
992 else
993 emit_move_insn (target, compare_result);
994 }
995}
996
997HOST_WIDE_INT
998const_double_to_hwint (rtx x)
999{
1000 HOST_WIDE_INT val;
644459d0 1001 if (GET_MODE (x) == SFmode)
945f7b03 1002 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
644459d0 1003 else if (GET_MODE (x) == DFmode)
1004 {
1005 long l[2];
945f7b03 1006 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
644459d0 1007 val = l[0];
1008 val = (val << 32) | (l[1] & 0xffffffff);
1009 }
1010 else
1011 abort ();
1012 return val;
1013}
1014
1015rtx
3754d046 1016hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
644459d0 1017{
1018 long tv[2];
1019 REAL_VALUE_TYPE rv;
1020 gcc_assert (mode == SFmode || mode == DFmode);
1021
1022 if (mode == SFmode)
1023 tv[0] = (v << 32) >> 32;
1024 else if (mode == DFmode)
1025 {
1026 tv[1] = (v << 32) >> 32;
1027 tv[0] = v >> 32;
1028 }
1029 real_from_target (&rv, tv, mode);
d5f9611d 1030 return const_double_from_real_value (rv, mode);
644459d0 1031}
1032
1033void
1034print_operand_address (FILE * file, register rtx addr)
1035{
1036 rtx reg;
1037 rtx offset;
1038
e04cf423 1039 if (GET_CODE (addr) == AND
1040 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1041 && INTVAL (XEXP (addr, 1)) == -16)
1042 addr = XEXP (addr, 0);
1043
644459d0 1044 switch (GET_CODE (addr))
1045 {
1046 case REG:
1047 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1048 break;
1049
1050 case PLUS:
1051 reg = XEXP (addr, 0);
1052 offset = XEXP (addr, 1);
1053 if (GET_CODE (offset) == REG)
1054 {
1055 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1056 reg_names[REGNO (offset)]);
1057 }
1058 else if (GET_CODE (offset) == CONST_INT)
1059 {
1060 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1061 INTVAL (offset), reg_names[REGNO (reg)]);
1062 }
1063 else
1064 abort ();
1065 break;
1066
1067 case CONST:
1068 case LABEL_REF:
1069 case SYMBOL_REF:
1070 case CONST_INT:
1071 output_addr_const (file, addr);
1072 break;
1073
1074 default:
1075 debug_rtx (addr);
1076 abort ();
1077 }
1078}
1079
1080void
1081print_operand (FILE * file, rtx x, int code)
1082{
3754d046 1083 machine_mode mode = GET_MODE (x);
644459d0 1084 HOST_WIDE_INT val;
1085 unsigned char arr[16];
1086 int xcode = GET_CODE (x);
dea01258 1087 int i, info;
644459d0 1088 if (GET_MODE (x) == VOIDmode)
1089 switch (code)
1090 {
644459d0 1091 case 'L': /* 128 bits, signed */
1092 case 'm': /* 128 bits, signed */
1093 case 'T': /* 128 bits, signed */
1094 case 't': /* 128 bits, signed */
1095 mode = TImode;
1096 break;
644459d0 1097 case 'K': /* 64 bits, signed */
1098 case 'k': /* 64 bits, signed */
1099 case 'D': /* 64 bits, signed */
1100 case 'd': /* 64 bits, signed */
1101 mode = DImode;
1102 break;
644459d0 1103 case 'J': /* 32 bits, signed */
1104 case 'j': /* 32 bits, signed */
1105 case 's': /* 32 bits, signed */
1106 case 'S': /* 32 bits, signed */
1107 mode = SImode;
1108 break;
1109 }
1110 switch (code)
1111 {
1112
1113 case 'j': /* 32 bits, signed */
1114 case 'k': /* 64 bits, signed */
1115 case 'm': /* 128 bits, signed */
1116 if (xcode == CONST_INT
1117 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1118 {
1119 gcc_assert (logical_immediate_p (x, mode));
1120 constant_to_array (mode, x, arr);
1121 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1122 val = trunc_int_for_mode (val, SImode);
1123 switch (which_logical_immediate (val))
1124 {
1125 case SPU_ORI:
1126 break;
1127 case SPU_ORHI:
1128 fprintf (file, "h");
1129 break;
1130 case SPU_ORBI:
1131 fprintf (file, "b");
1132 break;
1133 default:
1134 gcc_unreachable();
1135 }
1136 }
1137 else
1138 gcc_unreachable();
1139 return;
1140
1141 case 'J': /* 32 bits, signed */
1142 case 'K': /* 64 bits, signed */
1143 case 'L': /* 128 bits, signed */
1144 if (xcode == CONST_INT
1145 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1146 {
1147 gcc_assert (logical_immediate_p (x, mode)
1148 || iohl_immediate_p (x, mode));
1149 constant_to_array (mode, x, arr);
1150 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1151 val = trunc_int_for_mode (val, SImode);
1152 switch (which_logical_immediate (val))
1153 {
1154 case SPU_ORI:
1155 case SPU_IOHL:
1156 break;
1157 case SPU_ORHI:
1158 val = trunc_int_for_mode (val, HImode);
1159 break;
1160 case SPU_ORBI:
1161 val = trunc_int_for_mode (val, QImode);
1162 break;
1163 default:
1164 gcc_unreachable();
1165 }
1166 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1167 }
1168 else
1169 gcc_unreachable();
1170 return;
1171
1172 case 't': /* 128 bits, signed */
1173 case 'd': /* 64 bits, signed */
1174 case 's': /* 32 bits, signed */
dea01258 1175 if (CONSTANT_P (x))
644459d0 1176 {
dea01258 1177 enum immediate_class c = classify_immediate (x, mode);
1178 switch (c)
1179 {
1180 case IC_IL1:
1181 constant_to_array (mode, x, arr);
1182 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1183 val = trunc_int_for_mode (val, SImode);
1184 switch (which_immediate_load (val))
1185 {
1186 case SPU_IL:
1187 break;
1188 case SPU_ILA:
1189 fprintf (file, "a");
1190 break;
1191 case SPU_ILH:
1192 fprintf (file, "h");
1193 break;
1194 case SPU_ILHU:
1195 fprintf (file, "hu");
1196 break;
1197 default:
1198 gcc_unreachable ();
1199 }
1200 break;
1201 case IC_CPAT:
1202 constant_to_array (mode, x, arr);
1203 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1204 if (info == 1)
1205 fprintf (file, "b");
1206 else if (info == 2)
1207 fprintf (file, "h");
1208 else if (info == 4)
1209 fprintf (file, "w");
1210 else if (info == 8)
1211 fprintf (file, "d");
1212 break;
1213 case IC_IL1s:
1214 if (xcode == CONST_VECTOR)
1215 {
1216 x = CONST_VECTOR_ELT (x, 0);
1217 xcode = GET_CODE (x);
1218 }
1219 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1220 fprintf (file, "a");
1221 else if (xcode == HIGH)
1222 fprintf (file, "hu");
1223 break;
1224 case IC_FSMBI:
5df189be 1225 case IC_FSMBI2:
dea01258 1226 case IC_IL2:
1227 case IC_IL2s:
1228 case IC_POOL:
1229 abort ();
1230 }
644459d0 1231 }
644459d0 1232 else
1233 gcc_unreachable ();
1234 return;
1235
1236 case 'T': /* 128 bits, signed */
1237 case 'D': /* 64 bits, signed */
1238 case 'S': /* 32 bits, signed */
dea01258 1239 if (CONSTANT_P (x))
644459d0 1240 {
dea01258 1241 enum immediate_class c = classify_immediate (x, mode);
1242 switch (c)
644459d0 1243 {
dea01258 1244 case IC_IL1:
1245 constant_to_array (mode, x, arr);
1246 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1247 val = trunc_int_for_mode (val, SImode);
1248 switch (which_immediate_load (val))
1249 {
1250 case SPU_IL:
1251 case SPU_ILA:
1252 break;
1253 case SPU_ILH:
1254 case SPU_ILHU:
1255 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1256 break;
1257 default:
1258 gcc_unreachable ();
1259 }
1260 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1261 break;
1262 case IC_FSMBI:
1263 constant_to_array (mode, x, arr);
1264 val = 0;
1265 for (i = 0; i < 16; i++)
1266 {
1267 val <<= 1;
1268 val |= arr[i] & 1;
1269 }
1270 print_operand (file, GEN_INT (val), 0);
1271 break;
1272 case IC_CPAT:
1273 constant_to_array (mode, x, arr);
1274 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1276 break;
dea01258 1277 case IC_IL1s:
dea01258 1278 if (xcode == HIGH)
5df189be 1279 x = XEXP (x, 0);
1280 if (GET_CODE (x) == CONST_VECTOR)
1281 x = CONST_VECTOR_ELT (x, 0);
1282 output_addr_const (file, x);
1283 if (xcode == HIGH)
1284 fprintf (file, "@h");
644459d0 1285 break;
dea01258 1286 case IC_IL2:
1287 case IC_IL2s:
5df189be 1288 case IC_FSMBI2:
dea01258 1289 case IC_POOL:
1290 abort ();
644459d0 1291 }
c8befdb9 1292 }
644459d0 1293 else
1294 gcc_unreachable ();
1295 return;
1296
644459d0 1297 case 'C':
1298 if (xcode == CONST_INT)
1299 {
1300 /* Only 4 least significant bits are relevant for generate
1301 control word instructions. */
1302 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1303 return;
1304 }
1305 break;
1306
1307 case 'M': /* print code for c*d */
1308 if (GET_CODE (x) == CONST_INT)
1309 switch (INTVAL (x))
1310 {
1311 case 1:
1312 fprintf (file, "b");
1313 break;
1314 case 2:
1315 fprintf (file, "h");
1316 break;
1317 case 4:
1318 fprintf (file, "w");
1319 break;
1320 case 8:
1321 fprintf (file, "d");
1322 break;
1323 default:
1324 gcc_unreachable();
1325 }
1326 else
1327 gcc_unreachable();
1328 return;
1329
1330 case 'N': /* Negate the operand */
1331 if (xcode == CONST_INT)
1332 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1333 else if (xcode == CONST_VECTOR)
1334 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1335 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1336 return;
1337
1338 case 'I': /* enable/disable interrupts */
1339 if (xcode == CONST_INT)
1340 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1341 return;
1342
1343 case 'b': /* branch modifiers */
1344 if (xcode == REG)
1345 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1346 else if (COMPARISON_P (x))
1347 fprintf (file, "%s", xcode == NE ? "n" : "");
1348 return;
1349
1350 case 'i': /* indirect call */
1351 if (xcode == MEM)
1352 {
1353 if (GET_CODE (XEXP (x, 0)) == REG)
1354 /* Used in indirect function calls. */
1355 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1356 else
3c047fe9 1357 output_address (GET_MODE (x), XEXP (x, 0));
644459d0 1358 }
1359 return;
1360
1361 case 'p': /* load/store */
1362 if (xcode == MEM)
1363 {
1364 x = XEXP (x, 0);
1365 xcode = GET_CODE (x);
1366 }
e04cf423 1367 if (xcode == AND)
1368 {
1369 x = XEXP (x, 0);
1370 xcode = GET_CODE (x);
1371 }
644459d0 1372 if (xcode == REG)
1373 fprintf (file, "d");
1374 else if (xcode == CONST_INT)
1375 fprintf (file, "a");
1376 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1377 fprintf (file, "r");
1378 else if (xcode == PLUS || xcode == LO_SUM)
1379 {
1380 if (GET_CODE (XEXP (x, 1)) == REG)
1381 fprintf (file, "x");
1382 else
1383 fprintf (file, "d");
1384 }
1385 return;
1386
5df189be 1387 case 'e':
1388 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1389 val &= 0x7;
1390 output_addr_const (file, GEN_INT (val));
1391 return;
1392
1393 case 'f':
1394 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1395 val &= 0x1f;
1396 output_addr_const (file, GEN_INT (val));
1397 return;
1398
1399 case 'g':
1400 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1401 val &= 0x3f;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1404
1405 case 'h':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val = (val >> 3) & 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'E':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val = -val;
1414 val &= 0x7;
1415 output_addr_const (file, GEN_INT (val));
1416 return;
1417
1418 case 'F':
1419 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420 val = -val;
1421 val &= 0x1f;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1424
1425 case 'G':
1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427 val = -val;
1428 val &= 0x3f;
1429 output_addr_const (file, GEN_INT (val));
1430 return;
1431
1432 case 'H':
1433 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1434 val = -(val & -8ll);
1435 val = (val >> 3) & 0x1f;
1436 output_addr_const (file, GEN_INT (val));
1437 return;
1438
56c7bfc2 1439 case 'v':
1440 case 'w':
1441 constant_to_array (mode, x, arr);
1442 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1443 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1444 return;
1445
644459d0 1446 case 0:
1447 if (xcode == REG)
1448 fprintf (file, "%s", reg_names[REGNO (x)]);
1449 else if (xcode == MEM)
3c047fe9 1450 output_address (GET_MODE (x), XEXP (x, 0));
644459d0 1451 else if (xcode == CONST_VECTOR)
dea01258 1452 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1453 else
1454 output_addr_const (file, x);
1455 return;
1456
f6a0d06f 1457 /* unused letters
56c7bfc2 1458 o qr u yz
5df189be 1459 AB OPQR UVWXYZ */
644459d0 1460 default:
1461 output_operand_lossage ("invalid %%xn code");
1462 }
1463 gcc_unreachable ();
1464}
1465
644459d0 1466/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1467 caller saved register. For leaf functions it is more efficient to
1468 use a volatile register because we won't need to save and restore the
1469 pic register. This routine is only valid after register allocation
1470 is completed, so we can pick an unused register. */
1471static rtx
1472get_pic_reg (void)
1473{
644459d0 1474 if (!reload_completed && !reload_in_progress)
1475 abort ();
5eb28709 1476
1477 /* If we've already made the decision, we need to keep with it. Once we've
1478 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1479 return true since the register is now live; this should not cause us to
1480 "switch back" to using pic_offset_table_rtx. */
1481 if (!cfun->machine->pic_reg)
1482 {
d5bf7b64 1483 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
5eb28709 1484 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1485 else
1486 cfun->machine->pic_reg = pic_offset_table_rtx;
1487 }
1488
1489 return cfun->machine->pic_reg;
644459d0 1490}
1491
5df189be 1492/* Split constant addresses to handle cases that are too large.
1493 Add in the pic register when in PIC mode.
1494 Split immediates that require more than 1 instruction. */
dea01258 1495int
1496spu_split_immediate (rtx * ops)
c8befdb9 1497{
3754d046 1498 machine_mode mode = GET_MODE (ops[0]);
dea01258 1499 enum immediate_class c = classify_immediate (ops[1], mode);
1500
1501 switch (c)
c8befdb9 1502 {
dea01258 1503 case IC_IL2:
1504 {
1505 unsigned char arrhi[16];
1506 unsigned char arrlo[16];
98bbec1e 1507 rtx to, temp, hi, lo;
dea01258 1508 int i;
98bbec1e 1509 /* We need to do reals as ints because the constant used in the
1510 IOR might not be a legitimate real constant. */
2cf1bb25 1511 scalar_int_mode imode = int_mode_for_mode (mode).require ();
dea01258 1512 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1513 if (imode != mode)
1514 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1515 else
1516 to = ops[0];
1517 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1518 for (i = 0; i < 16; i += 4)
1519 {
1520 arrlo[i + 2] = arrhi[i + 2];
1521 arrlo[i + 3] = arrhi[i + 3];
1522 arrlo[i + 0] = arrlo[i + 1] = 0;
1523 arrhi[i + 2] = arrhi[i + 3] = 0;
1524 }
98bbec1e 1525 hi = array_to_constant (imode, arrhi);
1526 lo = array_to_constant (imode, arrlo);
1527 emit_move_insn (temp, hi);
d1f9b275 1528 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1529 return 1;
1530 }
5df189be 1531 case IC_FSMBI2:
1532 {
1533 unsigned char arr_fsmbi[16];
1534 unsigned char arr_andbi[16];
1535 rtx to, reg_fsmbi, reg_and;
1536 int i;
5df189be 1537 /* We need to do reals as ints because the constant used in the
1538 * AND might not be a legitimate real constant. */
2cf1bb25 1539 scalar_int_mode imode = int_mode_for_mode (mode).require ();
5df189be 1540 constant_to_array (mode, ops[1], arr_fsmbi);
1541 if (imode != mode)
1542 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1543 else
1544 to = ops[0];
1545 for (i = 0; i < 16; i++)
1546 if (arr_fsmbi[i] != 0)
1547 {
1548 arr_andbi[0] = arr_fsmbi[i];
1549 arr_fsmbi[i] = 0xff;
1550 }
1551 for (i = 1; i < 16; i++)
1552 arr_andbi[i] = arr_andbi[0];
1553 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1554 reg_and = array_to_constant (imode, arr_andbi);
1555 emit_move_insn (to, reg_fsmbi);
d1f9b275 1556 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
5df189be 1557 return 1;
1558 }
dea01258 1559 case IC_POOL:
1560 if (reload_in_progress || reload_completed)
1561 {
1562 rtx mem = force_const_mem (mode, ops[1]);
1563 if (TARGET_LARGE_MEM)
1564 {
1565 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1566 emit_move_insn (addr, XEXP (mem, 0));
1567 mem = replace_equiv_address (mem, addr);
1568 }
1569 emit_move_insn (ops[0], mem);
1570 return 1;
1571 }
1572 break;
1573 case IC_IL1s:
1574 case IC_IL2s:
1575 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1576 {
1577 if (c == IC_IL2s)
1578 {
5df189be 1579 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1580 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1581 }
1582 else if (flag_pic)
1583 emit_insn (gen_pic (ops[0], ops[1]));
1584 if (flag_pic)
1585 {
1586 rtx pic_reg = get_pic_reg ();
1587 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
dea01258 1588 }
1589 return flag_pic || c == IC_IL2s;
1590 }
1591 break;
1592 case IC_IL1:
1593 case IC_FSMBI:
1594 case IC_CPAT:
1595 break;
c8befdb9 1596 }
dea01258 1597 return 0;
c8befdb9 1598}
1599
644459d0 1600/* SAVING is TRUE when we are generating the actual load and store
1601 instructions for REGNO. When determining the size of the stack
1602 needed for saving register we must allocate enough space for the
1603 worst case, because we don't always have the information early enough
1604 to not allocate it. But we can at least eliminate the actual loads
1605 and stores during the prologue/epilogue. */
1606static int
1607need_to_save_reg (int regno, int saving)
1608{
3072d30e 1609 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1610 return 1;
1611 if (flag_pic
1612 && regno == PIC_OFFSET_TABLE_REGNUM
5eb28709 1613 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
644459d0 1614 return 1;
1615 return 0;
1616}
1617
1618/* This function is only correct starting with local register
1619 allocation */
1620int
1621spu_saved_regs_size (void)
1622{
1623 int reg_save_size = 0;
1624 int regno;
1625
1626 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1627 if (need_to_save_reg (regno, 0))
1628 reg_save_size += 0x10;
1629 return reg_save_size;
1630}
1631
0af56f80 1632static rtx_insn *
644459d0 1633frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1634{
1635 rtx reg = gen_rtx_REG (V4SImode, regno);
1636 rtx mem =
1637 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1638 return emit_insn (gen_movv4si (mem, reg));
1639}
1640
0af56f80 1641static rtx_insn *
644459d0 1642frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1643{
1644 rtx reg = gen_rtx_REG (V4SImode, regno);
1645 rtx mem =
1646 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1647 return emit_insn (gen_movv4si (reg, mem));
1648}
1649
1650/* This happens after reload, so we need to expand it. */
0af56f80 1651static rtx_insn *
644459d0 1652frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1653{
0af56f80 1654 rtx_insn *insn;
644459d0 1655 if (satisfies_constraint_K (GEN_INT (imm)))
1656 {
1657 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1658 }
1659 else
1660 {
3072d30e 1661 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1662 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1663 if (REGNO (src) == REGNO (scratch))
1664 abort ();
1665 }
644459d0 1666 return insn;
1667}
1668
1669/* Return nonzero if this function is known to have a null epilogue. */
1670
1671int
1672direct_return (void)
1673{
1674 if (reload_completed)
1675 {
1676 if (cfun->static_chain_decl == 0
1677 && (spu_saved_regs_size ()
1678 + get_frame_size ()
abe32cce 1679 + crtl->outgoing_args_size
1680 + crtl->args.pretend_args_size == 0)
d5bf7b64 1681 && crtl->is_leaf)
644459d0 1682 return 1;
1683 }
1684 return 0;
1685}
1686
1687/*
1688 The stack frame looks like this:
1689 +-------------+
1690 | incoming |
a8e019fa 1691 | args |
1692 AP -> +-------------+
644459d0 1693 | $lr save |
1694 +-------------+
1695 prev SP | back chain |
1696 +-------------+
1697 | var args |
abe32cce 1698 | reg save | crtl->args.pretend_args_size bytes
644459d0 1699 +-------------+
1700 | ... |
1701 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1702 FP -> +-------------+
644459d0 1703 | ... |
a8e019fa 1704 | vars | get_frame_size() bytes
1705 HFP -> +-------------+
644459d0 1706 | ... |
1707 | outgoing |
abe32cce 1708 | args | crtl->outgoing_args_size bytes
644459d0 1709 +-------------+
1710 | $lr of next |
1711 | frame |
1712 +-------------+
a8e019fa 1713 | back chain |
1714 SP -> +-------------+
644459d0 1715
1716*/
1717void
1718spu_expand_prologue (void)
1719{
1720 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1721 HOST_WIDE_INT total_size;
1722 HOST_WIDE_INT saved_regs_size;
1723 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1724 rtx scratch_reg_0, scratch_reg_1;
0af56f80 1725 rtx_insn *insn;
1726 rtx real;
644459d0 1727
5eb28709 1728 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1729 cfun->machine->pic_reg = pic_offset_table_rtx;
644459d0 1730
1731 if (spu_naked_function_p (current_function_decl))
1732 return;
1733
1734 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1735 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1736
1737 saved_regs_size = spu_saved_regs_size ();
1738 total_size = size + saved_regs_size
abe32cce 1739 + crtl->outgoing_args_size
1740 + crtl->args.pretend_args_size;
644459d0 1741
d5bf7b64 1742 if (!crtl->is_leaf
18d50ae6 1743 || cfun->calls_alloca || total_size > 0)
644459d0 1744 total_size += STACK_POINTER_OFFSET;
1745
1746 /* Save this first because code after this might use the link
1747 register as a scratch register. */
d5bf7b64 1748 if (!crtl->is_leaf)
644459d0 1749 {
1750 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1751 RTX_FRAME_RELATED_P (insn) = 1;
1752 }
1753
1754 if (total_size > 0)
1755 {
abe32cce 1756 offset = -crtl->args.pretend_args_size;
644459d0 1757 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1758 if (need_to_save_reg (regno, 1))
1759 {
1760 offset -= 16;
1761 insn = frame_emit_store (regno, sp_reg, offset);
1762 RTX_FRAME_RELATED_P (insn) = 1;
1763 }
1764 }
1765
5eb28709 1766 if (flag_pic && cfun->machine->pic_reg)
644459d0 1767 {
5eb28709 1768 rtx pic_reg = cfun->machine->pic_reg;
644459d0 1769 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1770 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1771 }
1772
1773 if (total_size > 0)
1774 {
1e81f1d8 1775 if (flag_stack_check || flag_stack_clash_protection)
644459d0 1776 {
d819917f 1777 /* We compare against total_size-1 because
644459d0 1778 ($sp >= total_size) <=> ($sp > total_size-1) */
1779 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1780 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1781 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1782 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1783 {
1784 emit_move_insn (scratch_v4si, size_v4si);
1785 size_v4si = scratch_v4si;
1786 }
1787 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
447443f5 1788 emit_insn (gen_vec_extractv4sisi
644459d0 1789 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1790 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1791 }
1792
1793 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1794 the value of the previous $sp because we save it as the back
1795 chain. */
1796 if (total_size <= 2000)
1797 {
1798 /* In this case we save the back chain first. */
1799 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1800 insn =
1801 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1802 }
644459d0 1803 else
1804 {
1805 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1806 insn =
1807 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1808 }
1809 RTX_FRAME_RELATED_P (insn) = 1;
1810 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1811 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1812
1813 if (total_size > 2000)
1814 {
1815 /* Save the back chain ptr */
1816 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1817 }
1818
1819 if (frame_pointer_needed)
1820 {
1821 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1822 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1823 + crtl->outgoing_args_size;
644459d0 1824 /* Set the new frame_pointer */
d8dfeb55 1825 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1826 RTX_FRAME_RELATED_P (insn) = 1;
1827 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1828 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1829 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1830 }
1831 }
1832
8c0dd614 1833 if (flag_stack_usage_info)
a512540d 1834 current_function_static_stack_size = total_size;
644459d0 1835}
1836
1837void
1838spu_expand_epilogue (bool sibcall_p)
1839{
1840 int size = get_frame_size (), offset, regno;
1841 HOST_WIDE_INT saved_regs_size, total_size;
1842 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 1843 rtx scratch_reg_0;
644459d0 1844
644459d0 1845 if (spu_naked_function_p (current_function_decl))
1846 return;
1847
1848 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1849
1850 saved_regs_size = spu_saved_regs_size ();
1851 total_size = size + saved_regs_size
abe32cce 1852 + crtl->outgoing_args_size
1853 + crtl->args.pretend_args_size;
644459d0 1854
d5bf7b64 1855 if (!crtl->is_leaf
18d50ae6 1856 || cfun->calls_alloca || total_size > 0)
644459d0 1857 total_size += STACK_POINTER_OFFSET;
1858
1859 if (total_size > 0)
1860 {
18d50ae6 1861 if (cfun->calls_alloca)
644459d0 1862 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1863 else
1864 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1865
1866
1867 if (saved_regs_size > 0)
1868 {
abe32cce 1869 offset = -crtl->args.pretend_args_size;
644459d0 1870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1871 if (need_to_save_reg (regno, 1))
1872 {
1873 offset -= 0x10;
1874 frame_emit_load (regno, sp_reg, offset);
1875 }
1876 }
1877 }
1878
d5bf7b64 1879 if (!crtl->is_leaf)
644459d0 1880 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1881
1882 if (!sibcall_p)
1883 {
18b42941 1884 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 1885 emit_jump_insn (gen__return ());
644459d0 1886 }
644459d0 1887}
1888
1889rtx
1890spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1891{
1892 if (count != 0)
1893 return 0;
1894 /* This is inefficient because it ends up copying to a save-register
1895 which then gets saved even though $lr has already been saved. But
1896 it does generate better code for leaf functions and we don't need
1897 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1898 used for __builtin_return_address anyway, so maybe we don't care if
1899 it's inefficient. */
1900 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1901}
1902\f
1903
1904/* Given VAL, generate a constant appropriate for MODE.
1905 If MODE is a vector mode, every element will be VAL.
1906 For TImode, VAL will be zero extended to 128 bits. */
1907rtx
3754d046 1908spu_const (machine_mode mode, HOST_WIDE_INT val)
644459d0 1909{
1910 rtx inner;
644459d0 1911
1912 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1913 || GET_MODE_CLASS (mode) == MODE_FLOAT
1914 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1915 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1916
1917 if (GET_MODE_CLASS (mode) == MODE_INT)
1918 return immed_double_const (val, 0, mode);
1919
1920 /* val is the bit representation of the float */
1921 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1922 return hwint_to_const_double (mode, val);
1923
1924 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1925 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1926 else
1927 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1928
0b51f5ce 1929 return gen_const_vec_duplicate (mode, inner);
644459d0 1930}
644459d0 1931
5474166e 1932/* Create a MODE vector constant from 4 ints. */
1933rtx
3754d046 1934spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
5474166e 1935{
1936 unsigned char arr[16];
1937 arr[0] = (a >> 24) & 0xff;
1938 arr[1] = (a >> 16) & 0xff;
1939 arr[2] = (a >> 8) & 0xff;
1940 arr[3] = (a >> 0) & 0xff;
1941 arr[4] = (b >> 24) & 0xff;
1942 arr[5] = (b >> 16) & 0xff;
1943 arr[6] = (b >> 8) & 0xff;
1944 arr[7] = (b >> 0) & 0xff;
1945 arr[8] = (c >> 24) & 0xff;
1946 arr[9] = (c >> 16) & 0xff;
1947 arr[10] = (c >> 8) & 0xff;
1948 arr[11] = (c >> 0) & 0xff;
1949 arr[12] = (d >> 24) & 0xff;
1950 arr[13] = (d >> 16) & 0xff;
1951 arr[14] = (d >> 8) & 0xff;
1952 arr[15] = (d >> 0) & 0xff;
1953 return array_to_constant(mode, arr);
1954}
5a976006 1955\f
1956/* branch hint stuff */
5474166e 1957
644459d0 1958/* An array of these is used to propagate hints to predecessor blocks. */
1959struct spu_bb_info
1960{
0af56f80 1961 rtx_insn *prop_jump; /* propagated from another block */
5a976006 1962 int bb_index; /* the original block. */
644459d0 1963};
5a976006 1964static struct spu_bb_info *spu_bb_info;
644459d0 1965
5a976006 1966#define STOP_HINT_P(INSN) \
aa90bb35 1967 (CALL_P(INSN) \
5a976006 1968 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1969 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1970
1971/* 1 when RTX is a hinted branch or its target. We keep track of
1972 what has been hinted so the safe-hint code can test it easily. */
1973#define HINTED_P(RTX) \
1974 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1975
1976/* 1 when RTX is an insn that must be scheduled on an even boundary. */
1977#define SCHED_ON_EVEN_P(RTX) \
1978 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1979
1980/* Emit a nop for INSN such that the two will dual issue. This assumes
1981 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1982 We check for TImode to handle a MULTI1 insn which has dual issued its
b1135d9a 1983 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
5a976006 1984static void
0af56f80 1985emit_nop_for_insn (rtx_insn *insn)
644459d0 1986{
5a976006 1987 int p;
0af56f80 1988 rtx_insn *new_insn;
b1135d9a 1989
1990 /* We need to handle JUMP_TABLE_DATA separately. */
1991 if (JUMP_TABLE_DATA_P (insn))
1992 {
1993 new_insn = emit_insn_after (gen_lnop(), insn);
1994 recog_memoized (new_insn);
1995 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1996 return;
1997 }
1998
5a976006 1999 p = get_pipe (insn);
2000 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2001 new_insn = emit_insn_after (gen_lnop (), insn);
2002 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2003 {
5a976006 2004 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2005 PUT_MODE (new_insn, TImode);
2006 PUT_MODE (insn, VOIDmode);
2007 }
2008 else
2009 new_insn = emit_insn_after (gen_lnop (), insn);
2010 recog_memoized (new_insn);
d53c050c 2011 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
5a976006 2012}
2013
2014/* Insert nops in basic blocks to meet dual issue alignment
2015 requirements. Also make sure hbrp and hint instructions are at least
2016 one cycle apart, possibly inserting a nop. */
2017static void
2018pad_bb(void)
2019{
0af56f80 2020 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
5a976006 2021 int length;
2022 int addr;
2023
2024 /* This sets up INSN_ADDRESSES. */
2025 shorten_branches (get_insns ());
2026
2027 /* Keep track of length added by nops. */
2028 length = 0;
2029
2030 prev_insn = 0;
2031 insn = get_insns ();
2032 if (!active_insn_p (insn))
2033 insn = next_active_insn (insn);
2034 for (; insn; insn = next_insn)
2035 {
2036 next_insn = next_active_insn (insn);
5904cc6f 2037 if (INSN_P (insn)
2038 && (INSN_CODE (insn) == CODE_FOR_iprefetch
2039 || INSN_CODE (insn) == CODE_FOR_hbr))
644459d0 2040 {
5a976006 2041 if (hbr_insn)
2042 {
2043 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2044 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2045 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2046 || (a1 - a0 == 4))
2047 {
2048 prev_insn = emit_insn_before (gen_lnop (), insn);
2049 PUT_MODE (prev_insn, GET_MODE (insn));
2050 PUT_MODE (insn, TImode);
d53c050c 2051 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
5a976006 2052 length += 4;
2053 }
2054 }
2055 hbr_insn = insn;
2056 }
5904cc6f 2057 if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
5a976006 2058 {
2059 if (GET_MODE (insn) == TImode)
2060 PUT_MODE (next_insn, TImode);
2061 insn = next_insn;
2062 next_insn = next_active_insn (insn);
2063 }
2064 addr = INSN_ADDRESSES (INSN_UID (insn));
2065 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2066 {
2067 if (((addr + length) & 7) != 0)
2068 {
2069 emit_nop_for_insn (prev_insn);
2070 length += 4;
2071 }
644459d0 2072 }
5a976006 2073 else if (GET_MODE (insn) == TImode
2074 && ((next_insn && GET_MODE (next_insn) != TImode)
2075 || get_attr_type (insn) == TYPE_MULTI0)
2076 && ((addr + length) & 7) != 0)
2077 {
2078 /* prev_insn will always be set because the first insn is
2079 always 8-byte aligned. */
2080 emit_nop_for_insn (prev_insn);
2081 length += 4;
2082 }
2083 prev_insn = insn;
644459d0 2084 }
644459d0 2085}
2086
5a976006 2087\f
2088/* Routines for branch hints. */
2089
644459d0 2090static void
0af56f80 2091spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
5a976006 2092 int distance, sbitmap blocks)
644459d0 2093{
0af56f80 2094 rtx_insn *hint;
2095 rtx_insn *insn;
c86d86ff 2096 rtx_jump_table_data *table;
644459d0 2097
2098 if (before == 0 || branch == 0 || target == 0)
2099 return;
2100
5a976006 2101 /* While scheduling we require hints to be no further than 600, so
2102 we need to enforce that here too */
644459d0 2103 if (distance > 600)
2104 return;
2105
5a976006 2106 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2107 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2108 before = NEXT_INSN (before);
644459d0 2109
753de566 2110 rtx_code_label *branch_label = gen_label_rtx ();
644459d0 2111 LABEL_NUSES (branch_label)++;
2112 LABEL_PRESERVE_P (branch_label) = 1;
2113 insn = emit_label_before (branch_label, branch);
753de566 2114 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
08b7917c 2115 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
5a976006 2116
753de566 2117 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
5a976006 2118 recog_memoized (hint);
d53c050c 2119 INSN_LOCATION (hint) = INSN_LOCATION (branch);
5a976006 2120 HINTED_P (branch) = 1;
644459d0 2121
5a976006 2122 if (GET_CODE (target) == LABEL_REF)
2123 HINTED_P (XEXP (target, 0)) = 1;
2124 else if (tablejump_p (branch, 0, &table))
644459d0 2125 {
5a976006 2126 rtvec vec;
2127 int j;
2128 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2129 vec = XVEC (PATTERN (table), 0);
2130 else
2131 vec = XVEC (PATTERN (table), 1);
2132 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2133 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2134 }
5a976006 2135
2136 if (distance >= 588)
644459d0 2137 {
5a976006 2138 /* Make sure the hint isn't scheduled any earlier than this point,
2139 which could make it too far for the branch offest to fit */
2fbdf9ef 2140 insn = emit_insn_before (gen_blockage (), hint);
2141 recog_memoized (insn);
d53c050c 2142 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2143 }
2144 else if (distance <= 8 * 4)
2145 {
2146 /* To guarantee at least 8 insns between the hint and branch we
2147 insert nops. */
2148 int d;
2149 for (d = distance; d < 8 * 4; d += 4)
2150 {
2151 insn =
2152 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2153 recog_memoized (insn);
d53c050c 2154 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2155 }
2156
2157 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2158 insn = emit_insn_after (gen_blockage (), hint);
2159 recog_memoized (insn);
d53c050c 2160 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2161
2162 /* Make sure any nops inserted aren't scheduled after the call. */
2163 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2164 {
2165 insn = emit_insn_before (gen_blockage (), branch);
2166 recog_memoized (insn);
d53c050c 2167 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2fbdf9ef 2168 }
644459d0 2169 }
644459d0 2170}
2171
2172/* Returns 0 if we don't want a hint for this branch. Otherwise return
2173 the rtx for the branch target. */
2174static rtx
0af56f80 2175get_branch_target (rtx_insn *branch)
644459d0 2176{
aa90bb35 2177 if (JUMP_P (branch))
644459d0 2178 {
2179 rtx set, src;
2180
2181 /* Return statements */
2182 if (GET_CODE (PATTERN (branch)) == RETURN)
2183 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2184
fcc31b99 2185 /* ASM GOTOs. */
604157f6 2186 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2187 return NULL;
2188
644459d0 2189 set = single_set (branch);
2190 src = SET_SRC (set);
2191 if (GET_CODE (SET_DEST (set)) != PC)
2192 abort ();
2193
2194 if (GET_CODE (src) == IF_THEN_ELSE)
2195 {
2196 rtx lab = 0;
2197 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2198 if (note)
2199 {
2200 /* If the more probable case is not a fall through, then
2201 try a branch hint. */
61cb1816 2202 int prob = profile_probability::from_reg_br_prob_note
2203 (XINT (note, 0)).to_reg_br_prob_base ();
644459d0 2204 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2205 && GET_CODE (XEXP (src, 1)) != PC)
2206 lab = XEXP (src, 1);
2207 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2208 && GET_CODE (XEXP (src, 2)) != PC)
2209 lab = XEXP (src, 2);
2210 }
2211 if (lab)
2212 {
2213 if (GET_CODE (lab) == RETURN)
2214 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2215 return lab;
2216 }
2217 return 0;
2218 }
2219
2220 return src;
2221 }
aa90bb35 2222 else if (CALL_P (branch))
644459d0 2223 {
2224 rtx call;
2225 /* All of our call patterns are in a PARALLEL and the CALL is
2226 the first pattern in the PARALLEL. */
2227 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2228 abort ();
2229 call = XVECEXP (PATTERN (branch), 0, 0);
2230 if (GET_CODE (call) == SET)
2231 call = SET_SRC (call);
2232 if (GET_CODE (call) != CALL)
2233 abort ();
2234 return XEXP (XEXP (call, 0), 0);
2235 }
2236 return 0;
2237}
2238
5a976006 2239/* The special $hbr register is used to prevent the insn scheduler from
2240 moving hbr insns across instructions which invalidate them. It
2241 should only be used in a clobber, and this function searches for
2242 insns which clobber it. */
2243static bool
0af56f80 2244insn_clobbers_hbr (rtx_insn *insn)
5a976006 2245{
2246 if (INSN_P (insn)
2247 && GET_CODE (PATTERN (insn)) == PARALLEL)
2248 {
2249 rtx parallel = PATTERN (insn);
2250 rtx clobber;
2251 int j;
2252 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2253 {
2254 clobber = XVECEXP (parallel, 0, j);
2255 if (GET_CODE (clobber) == CLOBBER
2256 && GET_CODE (XEXP (clobber, 0)) == REG
2257 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2258 return 1;
2259 }
2260 }
2261 return 0;
2262}
2263
2264/* Search up to 32 insns starting at FIRST:
2265 - at any kind of hinted branch, just return
2266 - at any unconditional branch in the first 15 insns, just return
2267 - at a call or indirect branch, after the first 15 insns, force it to
2268 an even address and return
2269 - at any unconditional branch, after the first 15 insns, force it to
2270 an even address.
2271 At then end of the search, insert an hbrp within 4 insns of FIRST,
2272 and an hbrp within 16 instructions of FIRST.
2273 */
644459d0 2274static void
0af56f80 2275insert_hbrp_for_ilb_runout (rtx_insn *first)
644459d0 2276{
0af56f80 2277 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
5a976006 2278 int addr = 0, length, first_addr = -1;
2279 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2280 int insert_lnop_after = 0;
2281 for (insn = first; insn; insn = NEXT_INSN (insn))
2282 if (INSN_P (insn))
2283 {
2284 if (first_addr == -1)
2285 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2286 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2287 length = get_attr_length (insn);
2288
2289 if (before_4 == 0 && addr + length >= 4 * 4)
2290 before_4 = insn;
2291 /* We test for 14 instructions because the first hbrp will add
2292 up to 2 instructions. */
2293 if (before_16 == 0 && addr + length >= 14 * 4)
2294 before_16 = insn;
2295
2296 if (INSN_CODE (insn) == CODE_FOR_hbr)
2297 {
2298 /* Make sure an hbrp is at least 2 cycles away from a hint.
2299 Insert an lnop after the hbrp when necessary. */
2300 if (before_4 == 0 && addr > 0)
2301 {
2302 before_4 = insn;
2303 insert_lnop_after |= 1;
2304 }
2305 else if (before_4 && addr <= 4 * 4)
2306 insert_lnop_after |= 1;
2307 if (before_16 == 0 && addr > 10 * 4)
2308 {
2309 before_16 = insn;
2310 insert_lnop_after |= 2;
2311 }
2312 else if (before_16 && addr <= 14 * 4)
2313 insert_lnop_after |= 2;
2314 }
644459d0 2315
5a976006 2316 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2317 {
2318 if (addr < hbrp_addr0)
2319 hbrp_addr0 = addr;
2320 else if (addr < hbrp_addr1)
2321 hbrp_addr1 = addr;
2322 }
644459d0 2323
5a976006 2324 if (CALL_P (insn) || JUMP_P (insn))
2325 {
2326 if (HINTED_P (insn))
2327 return;
2328
2329 /* Any branch after the first 15 insns should be on an even
2330 address to avoid a special case branch. There might be
2331 some nops and/or hbrps inserted, so we test after 10
2332 insns. */
2333 if (addr > 10 * 4)
2334 SCHED_ON_EVEN_P (insn) = 1;
2335 }
644459d0 2336
5a976006 2337 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2338 return;
2339
2340
2341 if (addr + length >= 32 * 4)
644459d0 2342 {
5a976006 2343 gcc_assert (before_4 && before_16);
2344 if (hbrp_addr0 > 4 * 4)
644459d0 2345 {
5a976006 2346 insn =
2347 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2348 recog_memoized (insn);
d53c050c 2349 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2350 INSN_ADDRESSES_NEW (insn,
2351 INSN_ADDRESSES (INSN_UID (before_4)));
2352 PUT_MODE (insn, GET_MODE (before_4));
2353 PUT_MODE (before_4, TImode);
2354 if (insert_lnop_after & 1)
644459d0 2355 {
5a976006 2356 insn = emit_insn_before (gen_lnop (), before_4);
2357 recog_memoized (insn);
d53c050c 2358 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2359 INSN_ADDRESSES_NEW (insn,
2360 INSN_ADDRESSES (INSN_UID (before_4)));
2361 PUT_MODE (insn, TImode);
644459d0 2362 }
644459d0 2363 }
5a976006 2364 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2365 && hbrp_addr1 > 16 * 4)
644459d0 2366 {
5a976006 2367 insn =
2368 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2369 recog_memoized (insn);
d53c050c 2370 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2371 INSN_ADDRESSES_NEW (insn,
2372 INSN_ADDRESSES (INSN_UID (before_16)));
2373 PUT_MODE (insn, GET_MODE (before_16));
2374 PUT_MODE (before_16, TImode);
2375 if (insert_lnop_after & 2)
644459d0 2376 {
5a976006 2377 insn = emit_insn_before (gen_lnop (), before_16);
2378 recog_memoized (insn);
d53c050c 2379 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2380 INSN_ADDRESSES_NEW (insn,
2381 INSN_ADDRESSES (INSN_UID
2382 (before_16)));
2383 PUT_MODE (insn, TImode);
644459d0 2384 }
2385 }
5a976006 2386 return;
644459d0 2387 }
644459d0 2388 }
5a976006 2389 else if (BARRIER_P (insn))
2390 return;
644459d0 2391
644459d0 2392}
5a976006 2393
2394/* The SPU might hang when it executes 48 inline instructions after a
2395 hinted branch jumps to its hinted target. The beginning of a
851d9296 2396 function and the return from a call might have been hinted, and
2397 must be handled as well. To prevent a hang we insert 2 hbrps. The
2398 first should be within 6 insns of the branch target. The second
2399 should be within 22 insns of the branch target. When determining
2400 if hbrps are necessary, we look for only 32 inline instructions,
2401 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2402 when inserting new hbrps, we insert them within 4 and 16 insns of
2403 the target. */
644459d0 2404static void
5a976006 2405insert_hbrp (void)
644459d0 2406{
0af56f80 2407 rtx_insn *insn;
5a976006 2408 if (TARGET_SAFE_HINTS)
644459d0 2409 {
5a976006 2410 shorten_branches (get_insns ());
2411 /* Insert hbrp at beginning of function */
2412 insn = next_active_insn (get_insns ());
2413 if (insn)
2414 insert_hbrp_for_ilb_runout (insn);
2415 /* Insert hbrp after hinted targets. */
2416 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2417 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2418 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2419 }
644459d0 2420}
2421
5a976006 2422static int in_spu_reorg;
2423
8a42230a 2424static void
2425spu_var_tracking (void)
2426{
2427 if (flag_var_tracking)
2428 {
2429 df_analyze ();
2430 timevar_push (TV_VAR_TRACKING);
2431 variable_tracking_main ();
2432 timevar_pop (TV_VAR_TRACKING);
2433 df_finish_pass (false);
2434 }
2435}
2436
5a976006 2437/* Insert branch hints. There are no branch optimizations after this
2438 pass, so it's safe to set our branch hints now. */
644459d0 2439static void
5a976006 2440spu_machine_dependent_reorg (void)
644459d0 2441{
5a976006 2442 sbitmap blocks;
2443 basic_block bb;
0af56f80 2444 rtx_insn *branch, *insn;
5a976006 2445 rtx branch_target = 0;
2446 int branch_addr = 0, insn_addr, required_dist = 0;
2447 int i;
2448 unsigned int j;
644459d0 2449
5a976006 2450 if (!TARGET_BRANCH_HINTS || optimize == 0)
2451 {
2452 /* We still do it for unoptimized code because an external
2453 function might have hinted a call or return. */
a54ca889 2454 compute_bb_for_insn ();
5a976006 2455 insert_hbrp ();
2456 pad_bb ();
8a42230a 2457 spu_var_tracking ();
a54ca889 2458 free_bb_for_insn ();
5a976006 2459 return;
2460 }
644459d0 2461
fe672ac0 2462 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
53c5d9d4 2463 bitmap_clear (blocks);
644459d0 2464
5a976006 2465 in_spu_reorg = 1;
2466 compute_bb_for_insn ();
2467
a7a0184d 2468 /* (Re-)discover loops so that bb->loop_father can be used
2469 in the analysis below. */
2470 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2471
5a976006 2472 compact_blocks ();
2473
2474 spu_bb_info =
a28770e1 2475 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
5a976006 2476 sizeof (struct spu_bb_info));
2477
2478 /* We need exact insn addresses and lengths. */
2479 shorten_branches (get_insns ());
2480
a28770e1 2481 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
644459d0 2482 {
f5a6b05f 2483 bb = BASIC_BLOCK_FOR_FN (cfun, i);
5a976006 2484 branch = 0;
2485 if (spu_bb_info[i].prop_jump)
644459d0 2486 {
5a976006 2487 branch = spu_bb_info[i].prop_jump;
2488 branch_target = get_branch_target (branch);
2489 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2490 required_dist = spu_hint_dist;
2491 }
2492 /* Search from end of a block to beginning. In this loop, find
2493 jumps which need a branch and emit them only when:
2494 - it's an indirect branch and we're at the insn which sets
2495 the register
2496 - we're at an insn that will invalidate the hint. e.g., a
2497 call, another hint insn, inline asm that clobbers $hbr, and
2498 some inlined operations (divmodsi4). Don't consider jumps
2499 because they are only at the end of a block and are
2500 considered when we are deciding whether to propagate
2501 - we're getting too far away from the branch. The hbr insns
2502 only have a signed 10 bit offset
2503 We go back as far as possible so the branch will be considered
2504 for propagation when we get to the beginning of the block. */
2505 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2506 {
2507 if (INSN_P (insn))
2508 {
2509 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2510 if (branch
2511 && ((GET_CODE (branch_target) == REG
2512 && set_of (branch_target, insn) != NULL_RTX)
2513 || insn_clobbers_hbr (insn)
2514 || branch_addr - insn_addr > 600))
2515 {
0af56f80 2516 rtx_insn *next = NEXT_INSN (insn);
5a976006 2517 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2518 if (insn != BB_END (bb)
2519 && branch_addr - next_addr >= required_dist)
2520 {
2521 if (dump_file)
2522 fprintf (dump_file,
2523 "hint for %i in block %i before %i\n",
2524 INSN_UID (branch), bb->index,
2525 INSN_UID (next));
2526 spu_emit_branch_hint (next, branch, branch_target,
2527 branch_addr - next_addr, blocks);
2528 }
2529 branch = 0;
2530 }
2531
2532 /* JUMP_P will only be true at the end of a block. When
2533 branch is already set it means we've previously decided
2534 to propagate a hint for that branch into this block. */
2535 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2536 {
2537 branch = 0;
2538 if ((branch_target = get_branch_target (insn)))
2539 {
2540 branch = insn;
2541 branch_addr = insn_addr;
2542 required_dist = spu_hint_dist;
2543 }
2544 }
2545 }
2546 if (insn == BB_HEAD (bb))
2547 break;
2548 }
2549
2550 if (branch)
2551 {
2552 /* If we haven't emitted a hint for this branch yet, it might
2553 be profitable to emit it in one of the predecessor blocks,
2554 especially for loops. */
0af56f80 2555 rtx_insn *bbend;
5a976006 2556 basic_block prev = 0, prop = 0, prev2 = 0;
2557 int loop_exit = 0, simple_loop = 0;
2558 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2559
2560 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2561 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2562 prev = EDGE_PRED (bb, j)->src;
2563 else
2564 prev2 = EDGE_PRED (bb, j)->src;
2565
2566 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2567 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2568 loop_exit = 1;
2569 else if (EDGE_SUCC (bb, j)->dest == bb)
2570 simple_loop = 1;
2571
2572 /* If this branch is a loop exit then propagate to previous
2573 fallthru block. This catches the cases when it is a simple
2574 loop or when there is an initial branch into the loop. */
2575 if (prev && (loop_exit || simple_loop)
a7a0184d 2576 && bb_loop_depth (prev) <= bb_loop_depth (bb))
5a976006 2577 prop = prev;
2578
2579 /* If there is only one adjacent predecessor. Don't propagate
a7a0184d 2580 outside this loop. */
5a976006 2581 else if (prev && single_pred_p (bb)
a7a0184d 2582 && prev->loop_father == bb->loop_father)
5a976006 2583 prop = prev;
2584
2585 /* If this is the JOIN block of a simple IF-THEN then
9d75589a 2586 propagate the hint to the HEADER block. */
5a976006 2587 else if (prev && prev2
2588 && EDGE_COUNT (bb->preds) == 2
2589 && EDGE_COUNT (prev->preds) == 1
2590 && EDGE_PRED (prev, 0)->src == prev2
a7a0184d 2591 && prev2->loop_father == bb->loop_father
5a976006 2592 && GET_CODE (branch_target) != REG)
2593 prop = prev;
2594
2595 /* Don't propagate when:
2596 - this is a simple loop and the hint would be too far
2597 - this is not a simple loop and there are 16 insns in
2598 this block already
2599 - the predecessor block ends in a branch that will be
2600 hinted
2601 - the predecessor block ends in an insn that invalidates
2602 the hint */
2603 if (prop
2604 && prop->index >= 0
2605 && (bbend = BB_END (prop))
2606 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2607 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2608 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2609 {
2610 if (dump_file)
2611 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2612 "for %i (loop_exit %i simple_loop %i dist %i)\n",
a7a0184d 2613 bb->index, prop->index, bb_loop_depth (bb),
5a976006 2614 INSN_UID (branch), loop_exit, simple_loop,
2615 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2616
2617 spu_bb_info[prop->index].prop_jump = branch;
2618 spu_bb_info[prop->index].bb_index = i;
2619 }
2620 else if (branch_addr - next_addr >= required_dist)
2621 {
2622 if (dump_file)
2623 fprintf (dump_file, "hint for %i in block %i before %i\n",
2624 INSN_UID (branch), bb->index,
2625 INSN_UID (NEXT_INSN (insn)));
2626 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2627 branch_addr - next_addr, blocks);
2628 }
2629 branch = 0;
644459d0 2630 }
644459d0 2631 }
5a976006 2632 free (spu_bb_info);
644459d0 2633
53c5d9d4 2634 if (!bitmap_empty_p (blocks))
5a976006 2635 find_many_sub_basic_blocks (blocks);
2636
2637 /* We have to schedule to make sure alignment is ok. */
fc00614f 2638 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
5a976006 2639
2640 /* The hints need to be scheduled, so call it again. */
2641 schedule_insns ();
2fbdf9ef 2642 df_finish_pass (true);
5a976006 2643
2644 insert_hbrp ();
2645
2646 pad_bb ();
2647
8f1d58ad 2648 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2649 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2650 {
2651 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2652 between its branch label and the branch . We don't move the
2653 label because GCC expects it at the beginning of the block. */
2654 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2655 rtx label_ref = XVECEXP (unspec, 0, 0);
4cd001d5 2656 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2657 rtx_insn *branch;
8f1d58ad 2658 int offset = 0;
2659 for (branch = NEXT_INSN (label);
2660 !JUMP_P (branch) && !CALL_P (branch);
2661 branch = NEXT_INSN (branch))
2662 if (NONJUMP_INSN_P (branch))
2663 offset += get_attr_length (branch);
2664 if (offset > 0)
29c05e22 2665 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
8f1d58ad 2666 }
5a976006 2667
8a42230a 2668 spu_var_tracking ();
5a976006 2669
a7a0184d 2670 loop_optimizer_finalize ();
2671
5a976006 2672 free_bb_for_insn ();
2673
2674 in_spu_reorg = 0;
644459d0 2675}
2676\f
2677
2678/* Insn scheduling routines, primarily for dual issue. */
2679static int
2680spu_sched_issue_rate (void)
2681{
2682 return 2;
2683}
2684
2685static int
0af56f80 2686uses_ls_unit(rtx_insn *insn)
644459d0 2687{
5a976006 2688 rtx set = single_set (insn);
2689 if (set != 0
2690 && (GET_CODE (SET_DEST (set)) == MEM
2691 || GET_CODE (SET_SRC (set)) == MEM))
2692 return 1;
2693 return 0;
644459d0 2694}
2695
2696static int
0af56f80 2697get_pipe (rtx_insn *insn)
644459d0 2698{
2699 enum attr_type t;
2700 /* Handle inline asm */
2701 if (INSN_CODE (insn) == -1)
2702 return -1;
2703 t = get_attr_type (insn);
2704 switch (t)
2705 {
2706 case TYPE_CONVERT:
2707 return -2;
2708 case TYPE_MULTI0:
2709 return -1;
2710
2711 case TYPE_FX2:
2712 case TYPE_FX3:
2713 case TYPE_SPR:
2714 case TYPE_NOP:
2715 case TYPE_FXB:
2716 case TYPE_FPD:
2717 case TYPE_FP6:
2718 case TYPE_FP7:
644459d0 2719 return 0;
2720
2721 case TYPE_LNOP:
2722 case TYPE_SHUF:
2723 case TYPE_LOAD:
2724 case TYPE_STORE:
2725 case TYPE_BR:
2726 case TYPE_MULTI1:
2727 case TYPE_HBR:
5a976006 2728 case TYPE_IPREFETCH:
644459d0 2729 return 1;
2730 default:
2731 abort ();
2732 }
2733}
2734
5a976006 2735
2736/* haifa-sched.c has a static variable that keeps track of the current
2737 cycle. It is passed to spu_sched_reorder, and we record it here for
2738 use by spu_sched_variable_issue. It won't be accurate if the
2739 scheduler updates it's clock_var between the two calls. */
2740static int clock_var;
2741
2742/* This is used to keep track of insn alignment. Set to 0 at the
2743 beginning of each block and increased by the "length" attr of each
2744 insn scheduled. */
2745static int spu_sched_length;
2746
2747/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2748 ready list appropriately in spu_sched_reorder(). */
2749static int pipe0_clock;
2750static int pipe1_clock;
2751
2752static int prev_clock_var;
2753
2754static int prev_priority;
2755
2756/* The SPU needs to load the next ilb sometime during the execution of
2757 the previous ilb. There is a potential conflict if every cycle has a
2758 load or store. To avoid the conflict we make sure the load/store
2759 unit is free for at least one cycle during the execution of insns in
2760 the previous ilb. */
2761static int spu_ls_first;
2762static int prev_ls_clock;
2763
2764static void
2765spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2766 int max_ready ATTRIBUTE_UNUSED)
2767{
2768 spu_sched_length = 0;
2769}
2770
2771static void
2772spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2773 int max_ready ATTRIBUTE_UNUSED)
2774{
6fceef7a 2775 if (align_labels.levels[0].get_value () > 4
2776 || align_loops.levels[0].get_value () > 4
2777 || align_jumps.levels[0].get_value () > 4)
5a976006 2778 {
2779 /* When any block might be at least 8-byte aligned, assume they
2780 will all be at least 8-byte aligned to make sure dual issue
2781 works out correctly. */
2782 spu_sched_length = 0;
2783 }
2784 spu_ls_first = INT_MAX;
2785 clock_var = -1;
2786 prev_ls_clock = -1;
2787 pipe0_clock = -1;
2788 pipe1_clock = -1;
2789 prev_clock_var = -1;
2790 prev_priority = -1;
2791}
2792
644459d0 2793static int
5a976006 2794spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
0af56f80 2795 int verbose ATTRIBUTE_UNUSED,
18282db0 2796 rtx_insn *insn, int more)
644459d0 2797{
5a976006 2798 int len;
2799 int p;
644459d0 2800 if (GET_CODE (PATTERN (insn)) == USE
2801 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2802 || (len = get_attr_length (insn)) == 0)
2803 return more;
2804
2805 spu_sched_length += len;
2806
2807 /* Reset on inline asm */
2808 if (INSN_CODE (insn) == -1)
2809 {
2810 spu_ls_first = INT_MAX;
2811 pipe0_clock = -1;
2812 pipe1_clock = -1;
2813 return 0;
2814 }
2815 p = get_pipe (insn);
2816 if (p == 0)
2817 pipe0_clock = clock_var;
2818 else
2819 pipe1_clock = clock_var;
2820
2821 if (in_spu_reorg)
2822 {
2823 if (clock_var - prev_ls_clock > 1
2824 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2825 spu_ls_first = INT_MAX;
2826 if (uses_ls_unit (insn))
2827 {
2828 if (spu_ls_first == INT_MAX)
2829 spu_ls_first = spu_sched_length;
2830 prev_ls_clock = clock_var;
2831 }
2832
2833 /* The scheduler hasn't inserted the nop, but we will later on.
2834 Include those nops in spu_sched_length. */
2835 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2836 spu_sched_length += 4;
2837 prev_clock_var = clock_var;
2838
2839 /* more is -1 when called from spu_sched_reorder for new insns
2840 that don't have INSN_PRIORITY */
2841 if (more >= 0)
2842 prev_priority = INSN_PRIORITY (insn);
2843 }
2844
9d75589a 2845 /* Always try issuing more insns. spu_sched_reorder will decide
5a976006 2846 when the cycle should be advanced. */
2847 return 1;
2848}
2849
2850/* This function is called for both TARGET_SCHED_REORDER and
2851 TARGET_SCHED_REORDER2. */
2852static int
2853spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
b24ef467 2854 rtx_insn **ready, int *nreadyp, int clock)
5a976006 2855{
2856 int i, nready = *nreadyp;
2857 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
b24ef467 2858 rtx_insn *insn;
5a976006 2859
2860 clock_var = clock;
2861
2862 if (nready <= 0 || pipe1_clock >= clock)
2863 return 0;
2864
2865 /* Find any rtl insns that don't generate assembly insns and schedule
2866 them first. */
2867 for (i = nready - 1; i >= 0; i--)
2868 {
2869 insn = ready[i];
2870 if (INSN_CODE (insn) == -1
2871 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 2872 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 2873 {
2874 ready[i] = ready[nready - 1];
2875 ready[nready - 1] = insn;
2876 return 1;
2877 }
2878 }
2879
2880 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2881 for (i = 0; i < nready; i++)
2882 if (INSN_CODE (ready[i]) != -1)
2883 {
2884 insn = ready[i];
2885 switch (get_attr_type (insn))
2886 {
2887 default:
2888 case TYPE_MULTI0:
2889 case TYPE_CONVERT:
2890 case TYPE_FX2:
2891 case TYPE_FX3:
2892 case TYPE_SPR:
2893 case TYPE_NOP:
2894 case TYPE_FXB:
2895 case TYPE_FPD:
2896 case TYPE_FP6:
2897 case TYPE_FP7:
2898 pipe_0 = i;
2899 break;
2900 case TYPE_LOAD:
2901 case TYPE_STORE:
2902 pipe_ls = i;
fd0f8124 2903 /* FALLTHRU */
5a976006 2904 case TYPE_LNOP:
2905 case TYPE_SHUF:
2906 case TYPE_BR:
2907 case TYPE_MULTI1:
2908 case TYPE_HBR:
2909 pipe_1 = i;
2910 break;
2911 case TYPE_IPREFETCH:
2912 pipe_hbrp = i;
2913 break;
2914 }
2915 }
2916
2917 /* In the first scheduling phase, schedule loads and stores together
2918 to increase the chance they will get merged during postreload CSE. */
2919 if (!reload_completed && pipe_ls >= 0)
2920 {
2921 insn = ready[pipe_ls];
2922 ready[pipe_ls] = ready[nready - 1];
2923 ready[nready - 1] = insn;
2924 return 1;
2925 }
2926
2927 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2928 if (pipe_hbrp >= 0)
2929 pipe_1 = pipe_hbrp;
2930
2931 /* When we have loads/stores in every cycle of the last 15 insns and
2932 we are about to schedule another load/store, emit an hbrp insn
2933 instead. */
2934 if (in_spu_reorg
2935 && spu_sched_length - spu_ls_first >= 4 * 15
2936 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2937 {
2938 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2939 recog_memoized (insn);
2940 if (pipe0_clock < clock)
2941 PUT_MODE (insn, TImode);
2942 spu_sched_variable_issue (file, verbose, insn, -1);
2943 return 0;
2944 }
2945
2946 /* In general, we want to emit nops to increase dual issue, but dual
2947 issue isn't faster when one of the insns could be scheduled later
2948 without effecting the critical path. We look at INSN_PRIORITY to
2949 make a good guess, but it isn't perfect so -mdual-nops=n can be
2950 used to effect it. */
2951 if (in_spu_reorg && spu_dual_nops < 10)
2952 {
9d75589a 2953 /* When we are at an even address and we are not issuing nops to
5a976006 2954 improve scheduling then we need to advance the cycle. */
2955 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2956 && (spu_dual_nops == 0
2957 || (pipe_1 != -1
2958 && prev_priority >
2959 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2960 return 0;
2961
2962 /* When at an odd address, schedule the highest priority insn
2963 without considering pipeline. */
2964 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2965 && (spu_dual_nops == 0
2966 || (prev_priority >
2967 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2968 return 1;
2969 }
2970
2971
2972 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2973 pipe0 insn in the ready list, schedule it. */
2974 if (pipe0_clock < clock && pipe_0 >= 0)
2975 schedule_i = pipe_0;
2976
2977 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2978 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2979 else
2980 schedule_i = pipe_1;
2981
2982 if (schedule_i > -1)
2983 {
2984 insn = ready[schedule_i];
2985 ready[schedule_i] = ready[nready - 1];
2986 ready[nready - 1] = insn;
2987 return 1;
2988 }
2989 return 0;
644459d0 2990}
2991
2992/* INSN is dependent on DEP_INSN. */
2993static int
99f52c2b 2994spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2995 int cost, unsigned int)
644459d0 2996{
5a976006 2997 rtx set;
2998
2999 /* The blockage pattern is used to prevent instructions from being
3000 moved across it and has no cost. */
3001 if (INSN_CODE (insn) == CODE_FOR_blockage
3002 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3003 return 0;
3004
9d98604b 3005 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3006 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3007 return 0;
3008
3009 /* Make sure hbrps are spread out. */
3010 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3011 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3012 return 8;
3013
3014 /* Make sure hints and hbrps are 2 cycles apart. */
3015 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3016 || INSN_CODE (insn) == CODE_FOR_hbr)
3017 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3018 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3019 return 2;
3020
3021 /* An hbrp has no real dependency on other insns. */
3022 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3023 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3024 return 0;
3025
3026 /* Assuming that it is unlikely an argument register will be used in
3027 the first cycle of the called function, we reduce the cost for
3028 slightly better scheduling of dep_insn. When not hinted, the
3029 mispredicted branch would hide the cost as well. */
3030 if (CALL_P (insn))
3031 {
3032 rtx target = get_branch_target (insn);
3033 if (GET_CODE (target) != REG || !set_of (target, insn))
3034 return cost - 2;
3035 return cost;
3036 }
3037
3038 /* And when returning from a function, let's assume the return values
3039 are completed sooner too. */
3040 if (CALL_P (dep_insn))
644459d0 3041 return cost - 2;
5a976006 3042
3043 /* Make sure an instruction that loads from the back chain is schedule
3044 away from the return instruction so a hint is more likely to get
3045 issued. */
3046 if (INSN_CODE (insn) == CODE_FOR__return
3047 && (set = single_set (dep_insn))
3048 && GET_CODE (SET_DEST (set)) == REG
3049 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3050 return 20;
3051
644459d0 3052 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3053 scheduler makes every insn in a block anti-dependent on the final
3054 jump_insn. We adjust here so higher cost insns will get scheduled
3055 earlier. */
99f52c2b 3056 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
45e8950d 3057 return insn_sched_cost (dep_insn) - 3;
5a976006 3058
644459d0 3059 return cost;
3060}
3061\f
3062/* Create a CONST_DOUBLE from a string. */
842ae815 3063rtx
3754d046 3064spu_float_const (const char *string, machine_mode mode)
644459d0 3065{
3066 REAL_VALUE_TYPE value;
3067 value = REAL_VALUE_ATOF (string, mode);
d5f9611d 3068 return const_double_from_real_value (value, mode);
644459d0 3069}
3070
644459d0 3071int
3072spu_constant_address_p (rtx x)
3073{
3074 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3075 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3076 || GET_CODE (x) == HIGH);
3077}
3078
3079static enum spu_immediate
3080which_immediate_load (HOST_WIDE_INT val)
3081{
3082 gcc_assert (val == trunc_int_for_mode (val, SImode));
3083
3084 if (val >= -0x8000 && val <= 0x7fff)
3085 return SPU_IL;
3086 if (val >= 0 && val <= 0x3ffff)
3087 return SPU_ILA;
3088 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3089 return SPU_ILH;
3090 if ((val & 0xffff) == 0)
3091 return SPU_ILHU;
3092
3093 return SPU_NONE;
3094}
3095
dea01258 3096/* Return true when OP can be loaded by one of the il instructions, or
3097 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3098int
3754d046 3099immediate_load_p (rtx op, machine_mode mode)
dea01258 3100{
3101 if (CONSTANT_P (op))
3102 {
3103 enum immediate_class c = classify_immediate (op, mode);
5df189be 3104 return c == IC_IL1 || c == IC_IL1s
3072d30e 3105 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3106 }
3107 return 0;
3108}
3109
3110/* Return true if the first SIZE bytes of arr is a constant that can be
3111 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3112 represent the size and offset of the instruction to use. */
3113static int
3114cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3115{
3116 int cpat, run, i, start;
3117 cpat = 1;
3118 run = 0;
3119 start = -1;
3120 for (i = 0; i < size && cpat; i++)
3121 if (arr[i] != i+16)
3122 {
3123 if (!run)
3124 {
3125 start = i;
3126 if (arr[i] == 3)
3127 run = 1;
3128 else if (arr[i] == 2 && arr[i+1] == 3)
3129 run = 2;
3130 else if (arr[i] == 0)
3131 {
3132 while (arr[i+run] == run && i+run < 16)
3133 run++;
3134 if (run != 4 && run != 8)
3135 cpat = 0;
3136 }
3137 else
3138 cpat = 0;
3139 if ((i & (run-1)) != 0)
3140 cpat = 0;
3141 i += run;
3142 }
3143 else
3144 cpat = 0;
3145 }
b01a6dc3 3146 if (cpat && (run || size < 16))
dea01258 3147 {
3148 if (run == 0)
3149 run = 1;
3150 if (prun)
3151 *prun = run;
3152 if (pstart)
3153 *pstart = start == -1 ? 16-run : start;
3154 return 1;
3155 }
3156 return 0;
3157}
3158
3159/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3160 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3161static enum immediate_class
3754d046 3162classify_immediate (rtx op, machine_mode mode)
644459d0 3163{
3164 HOST_WIDE_INT val;
3165 unsigned char arr[16];
5df189be 3166 int i, j, repeated, fsmbi, repeat;
dea01258 3167
3168 gcc_assert (CONSTANT_P (op));
3169
644459d0 3170 if (GET_MODE (op) != VOIDmode)
3171 mode = GET_MODE (op);
3172
dea01258 3173 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3174 if (!flag_pic
3175 && mode == V4SImode
dea01258 3176 && GET_CODE (op) == CONST_VECTOR
3177 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
62fdb8e4 3178 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3179 op = unwrap_const_vec_duplicate (op);
644459d0 3180
dea01258 3181 switch (GET_CODE (op))
3182 {
3183 case SYMBOL_REF:
3184 case LABEL_REF:
3185 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3186
dea01258 3187 case CONST:
0cfc65d4 3188 /* We can never know if the resulting address fits in 18 bits and can be
3189 loaded with ila. For now, assume the address will not overflow if
3190 the displacement is "small" (fits 'K' constraint). */
3191 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3192 {
3193 rtx sym = XEXP (XEXP (op, 0), 0);
3194 rtx cst = XEXP (XEXP (op, 0), 1);
3195
3196 if (GET_CODE (sym) == SYMBOL_REF
3197 && GET_CODE (cst) == CONST_INT
3198 && satisfies_constraint_K (cst))
3199 return IC_IL1s;
3200 }
3201 return IC_IL2s;
644459d0 3202
dea01258 3203 case HIGH:
3204 return IC_IL1s;
3205
3206 case CONST_VECTOR:
3207 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3208 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3209 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3210 return IC_POOL;
3211 /* Fall through. */
3212
3213 case CONST_INT:
3214 case CONST_DOUBLE:
3215 constant_to_array (mode, op, arr);
644459d0 3216
dea01258 3217 /* Check that each 4-byte slot is identical. */
3218 repeated = 1;
3219 for (i = 4; i < 16; i += 4)
3220 for (j = 0; j < 4; j++)
3221 if (arr[j] != arr[i + j])
3222 repeated = 0;
3223
3224 if (repeated)
3225 {
3226 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3227 val = trunc_int_for_mode (val, SImode);
3228
3229 if (which_immediate_load (val) != SPU_NONE)
3230 return IC_IL1;
3231 }
3232
3233 /* Any mode of 2 bytes or smaller can be loaded with an il
3234 instruction. */
3235 gcc_assert (GET_MODE_SIZE (mode) > 2);
3236
3237 fsmbi = 1;
5df189be 3238 repeat = 0;
dea01258 3239 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3240 if (arr[i] != 0 && repeat == 0)
3241 repeat = arr[i];
3242 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3243 fsmbi = 0;
3244 if (fsmbi)
5df189be 3245 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3246
3247 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3248 return IC_CPAT;
3249
3250 if (repeated)
3251 return IC_IL2;
3252
3253 return IC_POOL;
3254 default:
3255 break;
3256 }
3257 gcc_unreachable ();
644459d0 3258}
3259
3260static enum spu_immediate
3261which_logical_immediate (HOST_WIDE_INT val)
3262{
3263 gcc_assert (val == trunc_int_for_mode (val, SImode));
3264
3265 if (val >= -0x200 && val <= 0x1ff)
3266 return SPU_ORI;
3267 if (val >= 0 && val <= 0xffff)
3268 return SPU_IOHL;
3269 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3270 {
3271 val = trunc_int_for_mode (val, HImode);
3272 if (val >= -0x200 && val <= 0x1ff)
3273 return SPU_ORHI;
3274 if ((val & 0xff) == ((val >> 8) & 0xff))
3275 {
3276 val = trunc_int_for_mode (val, QImode);
3277 if (val >= -0x200 && val <= 0x1ff)
3278 return SPU_ORBI;
3279 }
3280 }
3281 return SPU_NONE;
3282}
3283
5df189be 3284/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3285 CONST_DOUBLEs. */
3286static int
3287const_vector_immediate_p (rtx x)
3288{
3289 int i;
3290 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3291 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3292 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3293 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3294 return 0;
3295 return 1;
3296}
3297
644459d0 3298int
3754d046 3299logical_immediate_p (rtx op, machine_mode mode)
644459d0 3300{
3301 HOST_WIDE_INT val;
3302 unsigned char arr[16];
3303 int i, j;
3304
3305 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3306 || GET_CODE (op) == CONST_VECTOR);
3307
5df189be 3308 if (GET_CODE (op) == CONST_VECTOR
3309 && !const_vector_immediate_p (op))
3310 return 0;
3311
644459d0 3312 if (GET_MODE (op) != VOIDmode)
3313 mode = GET_MODE (op);
3314
3315 constant_to_array (mode, op, arr);
3316
3317 /* Check that bytes are repeated. */
3318 for (i = 4; i < 16; i += 4)
3319 for (j = 0; j < 4; j++)
3320 if (arr[j] != arr[i + j])
3321 return 0;
3322
3323 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3324 val = trunc_int_for_mode (val, SImode);
3325
3326 i = which_logical_immediate (val);
3327 return i != SPU_NONE && i != SPU_IOHL;
3328}
3329
3330int
3754d046 3331iohl_immediate_p (rtx op, machine_mode mode)
644459d0 3332{
3333 HOST_WIDE_INT val;
3334 unsigned char arr[16];
3335 int i, j;
3336
3337 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3338 || GET_CODE (op) == CONST_VECTOR);
3339
5df189be 3340 if (GET_CODE (op) == CONST_VECTOR
3341 && !const_vector_immediate_p (op))
3342 return 0;
3343
644459d0 3344 if (GET_MODE (op) != VOIDmode)
3345 mode = GET_MODE (op);
3346
3347 constant_to_array (mode, op, arr);
3348
3349 /* Check that bytes are repeated. */
3350 for (i = 4; i < 16; i += 4)
3351 for (j = 0; j < 4; j++)
3352 if (arr[j] != arr[i + j])
3353 return 0;
3354
3355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3356 val = trunc_int_for_mode (val, SImode);
3357
3358 return val >= 0 && val <= 0xffff;
3359}
3360
3361int
3754d046 3362arith_immediate_p (rtx op, machine_mode mode,
644459d0 3363 HOST_WIDE_INT low, HOST_WIDE_INT high)
3364{
3365 HOST_WIDE_INT val;
3366 unsigned char arr[16];
3367 int bytes, i, j;
3368
3369 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3370 || GET_CODE (op) == CONST_VECTOR);
3371
5df189be 3372 if (GET_CODE (op) == CONST_VECTOR
3373 && !const_vector_immediate_p (op))
3374 return 0;
3375
644459d0 3376 if (GET_MODE (op) != VOIDmode)
3377 mode = GET_MODE (op);
3378
3379 constant_to_array (mode, op, arr);
3380
6e256598 3381 bytes = GET_MODE_UNIT_SIZE (mode);
e2cd4ccd 3382 mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
644459d0 3383
3384 /* Check that bytes are repeated. */
3385 for (i = bytes; i < 16; i += bytes)
3386 for (j = 0; j < bytes; j++)
3387 if (arr[j] != arr[i + j])
3388 return 0;
3389
3390 val = arr[0];
3391 for (j = 1; j < bytes; j++)
3392 val = (val << 8) | arr[j];
3393
3394 val = trunc_int_for_mode (val, mode);
3395
3396 return val >= low && val <= high;
3397}
3398
56c7bfc2 3399/* TRUE when op is an immediate and an exact power of 2, and given that
3400 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3401 all entries must be the same. */
3402bool
3754d046 3403exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
56c7bfc2 3404{
3754d046 3405 machine_mode int_mode;
56c7bfc2 3406 HOST_WIDE_INT val;
3407 unsigned char arr[16];
3408 int bytes, i, j;
3409
3410 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3411 || GET_CODE (op) == CONST_VECTOR);
3412
3413 if (GET_CODE (op) == CONST_VECTOR
3414 && !const_vector_immediate_p (op))
3415 return 0;
3416
3417 if (GET_MODE (op) != VOIDmode)
3418 mode = GET_MODE (op);
3419
3420 constant_to_array (mode, op, arr);
3421
6e256598 3422 mode = GET_MODE_INNER (mode);
56c7bfc2 3423
3424 bytes = GET_MODE_SIZE (mode);
e2cd4ccd 3425 int_mode = int_mode_for_mode (mode).require ();
56c7bfc2 3426
3427 /* Check that bytes are repeated. */
3428 for (i = bytes; i < 16; i += bytes)
3429 for (j = 0; j < bytes; j++)
3430 if (arr[j] != arr[i + j])
3431 return 0;
3432
3433 val = arr[0];
3434 for (j = 1; j < bytes; j++)
3435 val = (val << 8) | arr[j];
3436
3437 val = trunc_int_for_mode (val, int_mode);
3438
3439 /* Currently, we only handle SFmode */
3440 gcc_assert (mode == SFmode);
3441 if (mode == SFmode)
3442 {
3443 int exp = (val >> 23) - 127;
3444 return val > 0 && (val & 0x007fffff) == 0
3445 && exp >= low && exp <= high;
3446 }
3447 return FALSE;
3448}
3449
6cf5579e 3450/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3451
6f4e40cd 3452static bool
3453ea_symbol_ref_p (const_rtx x)
6cf5579e 3454{
6cf5579e 3455 tree decl;
3456
3457 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3458 {
3459 rtx plus = XEXP (x, 0);
3460 rtx op0 = XEXP (plus, 0);
3461 rtx op1 = XEXP (plus, 1);
3462 if (GET_CODE (op1) == CONST_INT)
3463 x = op0;
3464 }
3465
3466 return (GET_CODE (x) == SYMBOL_REF
3467 && (decl = SYMBOL_REF_DECL (x)) != 0
3468 && TREE_CODE (decl) == VAR_DECL
3469 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3470}
3471
644459d0 3472/* We accept:
5b865faf 3473 - any 32-bit constant (SImode, SFmode)
644459d0 3474 - any constant that can be generated with fsmbi (any mode)
5b865faf 3475 - a 64-bit constant where the high and low bits are identical
644459d0 3476 (DImode, DFmode)
5b865faf 3477 - a 128-bit constant where the four 32-bit words match. */
ca316360 3478bool
3754d046 3479spu_legitimate_constant_p (machine_mode mode, rtx x)
644459d0 3480{
6f4e40cd 3481 subrtx_iterator::array_type array;
5df189be 3482 if (GET_CODE (x) == HIGH)
3483 x = XEXP (x, 0);
6cf5579e 3484
3485 /* Reject any __ea qualified reference. These can't appear in
3486 instructions but must be forced to the constant pool. */
6f4e40cd 3487 FOR_EACH_SUBRTX (iter, array, x, ALL)
3488 if (ea_symbol_ref_p (*iter))
3489 return 0;
6cf5579e 3490
644459d0 3491 /* V4SI with all identical symbols is valid. */
5df189be 3492 if (!flag_pic
ca316360 3493 && mode == V4SImode
644459d0 3494 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3495 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3496 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
62fdb8e4 3497 return const_vec_duplicate_p (x);
644459d0 3498
5df189be 3499 if (GET_CODE (x) == CONST_VECTOR
3500 && !const_vector_immediate_p (x))
3501 return 0;
644459d0 3502 return 1;
3503}
3504
3505/* Valid address are:
3506 - symbol_ref, label_ref, const
3507 - reg
9d98604b 3508 - reg + const_int, where const_int is 16 byte aligned
644459d0 3509 - reg + reg, alignment doesn't matter
3510 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3511 ignore the 4 least significant bits of the const. We only care about
3512 16 byte modes because the expand phase will change all smaller MEM
3513 references to TImode. */
3514static bool
3754d046 3515spu_legitimate_address_p (machine_mode mode,
fd50b071 3516 rtx x, bool reg_ok_strict)
644459d0 3517{
9d98604b 3518 int aligned = GET_MODE_SIZE (mode) >= 16;
3519 if (aligned
3520 && GET_CODE (x) == AND
644459d0 3521 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3522 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3523 x = XEXP (x, 0);
3524 switch (GET_CODE (x))
3525 {
644459d0 3526 case LABEL_REF:
6cf5579e 3527 return !TARGET_LARGE_MEM;
3528
9d98604b 3529 case SYMBOL_REF:
644459d0 3530 case CONST:
6cf5579e 3531 /* Keep __ea references until reload so that spu_expand_mov can see them
3532 in MEMs. */
6f4e40cd 3533 if (ea_symbol_ref_p (x))
6cf5579e 3534 return !reload_in_progress && !reload_completed;
9d98604b 3535 return !TARGET_LARGE_MEM;
644459d0 3536
3537 case CONST_INT:
3538 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3539
3540 case SUBREG:
3541 x = XEXP (x, 0);
fd0f8124 3542 if (!REG_P (x))
9d98604b 3543 return 0;
fd0f8124 3544 /* FALLTHRU */
644459d0 3545
3546 case REG:
3547 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3548
3549 case PLUS:
3550 case LO_SUM:
3551 {
3552 rtx op0 = XEXP (x, 0);
3553 rtx op1 = XEXP (x, 1);
3554 if (GET_CODE (op0) == SUBREG)
3555 op0 = XEXP (op0, 0);
3556 if (GET_CODE (op1) == SUBREG)
3557 op1 = XEXP (op1, 0);
644459d0 3558 if (GET_CODE (op0) == REG
3559 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3560 && GET_CODE (op1) == CONST_INT
fa695424 3561 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3562 /* If virtual registers are involved, the displacement will
3563 change later on anyway, so checking would be premature.
3564 Reload will make sure the final displacement after
3565 register elimination is OK. */
3566 || op0 == arg_pointer_rtx
3567 || op0 == frame_pointer_rtx
3568 || op0 == virtual_stack_vars_rtx)
9d98604b 3569 && (!aligned || (INTVAL (op1) & 15) == 0))
3570 return TRUE;
644459d0 3571 if (GET_CODE (op0) == REG
3572 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3573 && GET_CODE (op1) == REG
3574 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3575 return TRUE;
644459d0 3576 }
3577 break;
3578
3579 default:
3580 break;
3581 }
9d98604b 3582 return FALSE;
644459d0 3583}
3584
6cf5579e 3585/* Like spu_legitimate_address_p, except with named addresses. */
3586static bool
3754d046 3587spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
6cf5579e 3588 bool reg_ok_strict, addr_space_t as)
3589{
3590 if (as == ADDR_SPACE_EA)
3591 return (REG_P (x) && (GET_MODE (x) == EAmode));
3592
3593 else if (as != ADDR_SPACE_GENERIC)
3594 gcc_unreachable ();
3595
3596 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3597}
3598
644459d0 3599/* When the address is reg + const_int, force the const_int into a
fa7637bd 3600 register. */
3defb88e 3601static rtx
644459d0 3602spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3754d046 3603 machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3604{
3605 rtx op0, op1;
3606 /* Make sure both operands are registers. */
3607 if (GET_CODE (x) == PLUS)
3608 {
3609 op0 = XEXP (x, 0);
3610 op1 = XEXP (x, 1);
3611 if (ALIGNED_SYMBOL_REF_P (op0))
3612 {
3613 op0 = force_reg (Pmode, op0);
3614 mark_reg_pointer (op0, 128);
3615 }
3616 else if (GET_CODE (op0) != REG)
3617 op0 = force_reg (Pmode, op0);
3618 if (ALIGNED_SYMBOL_REF_P (op1))
3619 {
3620 op1 = force_reg (Pmode, op1);
3621 mark_reg_pointer (op1, 128);
3622 }
3623 else if (GET_CODE (op1) != REG)
3624 op1 = force_reg (Pmode, op1);
3625 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3626 }
41e3a0c7 3627 return x;
644459d0 3628}
3629
6cf5579e 3630/* Like spu_legitimate_address, except with named address support. */
3631static rtx
3754d046 3632spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
6cf5579e 3633 addr_space_t as)
3634{
3635 if (as != ADDR_SPACE_GENERIC)
3636 return x;
3637
3638 return spu_legitimize_address (x, oldx, mode);
3639}
3640
fa695424 3641/* Reload reg + const_int for out-of-range displacements. */
3642rtx
3754d046 3643spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
fa695424 3644 int opnum, int type)
3645{
3646 bool removed_and = false;
3647
3648 if (GET_CODE (ad) == AND
3649 && CONST_INT_P (XEXP (ad, 1))
3650 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3651 {
3652 ad = XEXP (ad, 0);
3653 removed_and = true;
3654 }
3655
3656 if (GET_CODE (ad) == PLUS
3657 && REG_P (XEXP (ad, 0))
3658 && CONST_INT_P (XEXP (ad, 1))
3659 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3660 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3661 {
3662 /* Unshare the sum. */
3663 ad = copy_rtx (ad);
3664
3665 /* Reload the displacement. */
3666 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3667 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3668 opnum, (enum reload_type) type);
3669
3670 /* Add back AND for alignment if we stripped it. */
3671 if (removed_and)
3672 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3673
3674 return ad;
3675 }
3676
3677 return NULL_RTX;
3678}
3679
644459d0 3680/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3681 struct attribute_spec.handler. */
3682static tree
3683spu_handle_fndecl_attribute (tree * node,
3684 tree name,
3685 tree args ATTRIBUTE_UNUSED,
3686 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3687{
3688 if (TREE_CODE (*node) != FUNCTION_DECL)
3689 {
67a779df 3690 warning (0, "%qE attribute only applies to functions",
3691 name);
644459d0 3692 *no_add_attrs = true;
3693 }
3694
3695 return NULL_TREE;
3696}
3697
3698/* Handle the "vector" attribute. */
3699static tree
3700spu_handle_vector_attribute (tree * node, tree name,
3701 tree args ATTRIBUTE_UNUSED,
3702 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3703{
3704 tree type = *node, result = NULL_TREE;
3754d046 3705 machine_mode mode;
644459d0 3706 int unsigned_p;
3707
3708 while (POINTER_TYPE_P (type)
3709 || TREE_CODE (type) == FUNCTION_TYPE
3710 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3711 type = TREE_TYPE (type);
3712
3713 mode = TYPE_MODE (type);
3714
3715 unsigned_p = TYPE_UNSIGNED (type);
3716 switch (mode)
3717 {
916ace94 3718 case E_DImode:
644459d0 3719 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3720 break;
916ace94 3721 case E_SImode:
644459d0 3722 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3723 break;
916ace94 3724 case E_HImode:
644459d0 3725 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3726 break;
916ace94 3727 case E_QImode:
644459d0 3728 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3729 break;
916ace94 3730 case E_SFmode:
644459d0 3731 result = V4SF_type_node;
3732 break;
916ace94 3733 case E_DFmode:
644459d0 3734 result = V2DF_type_node;
3735 break;
3736 default:
3737 break;
3738 }
3739
3740 /* Propagate qualifiers attached to the element type
3741 onto the vector type. */
3742 if (result && result != type && TYPE_QUALS (type))
3743 result = build_qualified_type (result, TYPE_QUALS (type));
3744
3745 *no_add_attrs = true; /* No need to hang on to the attribute. */
3746
3747 if (!result)
67a779df 3748 warning (0, "%qE attribute ignored", name);
644459d0 3749 else
d991e6e8 3750 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3751
3752 return NULL_TREE;
3753}
3754
f2b32076 3755/* Return nonzero if FUNC is a naked function. */
644459d0 3756static int
3757spu_naked_function_p (tree func)
3758{
3759 tree a;
3760
3761 if (TREE_CODE (func) != FUNCTION_DECL)
3762 abort ();
3763
3764 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3765 return a != NULL_TREE;
3766}
3767
3768int
3769spu_initial_elimination_offset (int from, int to)
3770{
3771 int saved_regs_size = spu_saved_regs_size ();
3772 int sp_offset = 0;
d5bf7b64 3773 if (!crtl->is_leaf || crtl->outgoing_args_size
644459d0 3774 || get_frame_size () || saved_regs_size)
3775 sp_offset = STACK_POINTER_OFFSET;
3776 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3777 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3778 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3779 return get_frame_size ();
644459d0 3780 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3781 return sp_offset + crtl->outgoing_args_size
644459d0 3782 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3783 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3784 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3785 else
3786 gcc_unreachable ();
644459d0 3787}
3788
3789rtx
fb80456a 3790spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3791{
3754d046 3792 machine_mode mode = TYPE_MODE (type);
644459d0 3793 int byte_size = ((mode == BLKmode)
3794 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3795
3796 /* Make sure small structs are left justified in a register. */
3797 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3798 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3799 {
3754d046 3800 machine_mode smode;
644459d0 3801 rtvec v;
3802 int i;
3803 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3804 int n = byte_size / UNITS_PER_WORD;
3805 v = rtvec_alloc (nregs);
3806 for (i = 0; i < n; i++)
3807 {
3808 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3809 gen_rtx_REG (TImode,
3810 FIRST_RETURN_REGNUM
3811 + i),
3812 GEN_INT (UNITS_PER_WORD * i));
3813 byte_size -= UNITS_PER_WORD;
3814 }
3815
3816 if (n < nregs)
3817 {
3818 if (byte_size < 4)
3819 byte_size = 4;
1a5d4b27 3820 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
644459d0 3821 RTVEC_ELT (v, n) =
3822 gen_rtx_EXPR_LIST (VOIDmode,
3823 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3824 GEN_INT (UNITS_PER_WORD * n));
3825 }
3826 return gen_rtx_PARALLEL (mode, v);
3827 }
3828 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3829}
3830
ee9034d4 3831static rtx
39cba157 3832spu_function_arg (cumulative_args_t cum_v,
3754d046 3833 machine_mode mode,
ee9034d4 3834 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3835{
39cba157 3836 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
644459d0 3837 int byte_size;
3838
a08c5dd0 3839 if (*cum >= MAX_REGISTER_ARGS)
644459d0 3840 return 0;
3841
3842 byte_size = ((mode == BLKmode)
3843 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3844
3845 /* The ABI does not allow parameters to be passed partially in
3846 reg and partially in stack. */
a08c5dd0 3847 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 3848 return 0;
3849
3850 /* Make sure small structs are left justified in a register. */
3851 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3852 && byte_size < UNITS_PER_WORD && byte_size > 0)
3853 {
3754d046 3854 machine_mode smode;
644459d0 3855 rtx gr_reg;
3856 if (byte_size < 4)
3857 byte_size = 4;
1a5d4b27 3858 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
644459d0 3859 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 3860 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 3861 const0_rtx);
3862 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3863 }
3864 else
a08c5dd0 3865 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 3866}
3867
ee9034d4 3868static void
3754d046 3869spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
ee9034d4 3870 const_tree type, bool named ATTRIBUTE_UNUSED)
3871{
39cba157 3872 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3873
ee9034d4 3874 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3875 ? 1
3876 : mode == BLKmode
3877 ? ((int_size_in_bytes (type) + 15) / 16)
3878 : mode == VOIDmode
3879 ? 1
74f68e49 3880 : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
ee9034d4 3881}
3882
8adb95eb 3883/* Implement TARGET_FUNCTION_ARG_OFFSET. The SPU ABI wants 32/64-bit
3884 types at offset 0 in the quad-word on the stack. 8/16-bit types
3885 should be at offsets 3/2 respectively. */
3886
3887static HOST_WIDE_INT
3888spu_function_arg_offset (machine_mode mode, const_tree type)
3889{
3890 if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3891 return 4 - GET_MODE_SIZE (mode);
3892 return 0;
3893}
3894
d7ab0e3d 3895/* Implement TARGET_FUNCTION_ARG_PADDING. */
3896
3897static pad_direction
3898spu_function_arg_padding (machine_mode, const_tree)
3899{
3900 return PAD_UPWARD;
3901}
3902
644459d0 3903/* Variable sized types are passed by reference. */
3904static bool
39cba157 3905spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3754d046 3906 machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3907 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3908{
3909 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3910}
3911\f
3912
3913/* Var args. */
3914
3915/* Create and return the va_list datatype.
3916
3917 On SPU, va_list is an array type equivalent to
3918
3919 typedef struct __va_list_tag
3920 {
3921 void *__args __attribute__((__aligned(16)));
3922 void *__skip __attribute__((__aligned(16)));
3923
3924 } va_list[1];
3925
fa7637bd 3926 where __args points to the arg that will be returned by the next
644459d0 3927 va_arg(), and __skip points to the previous stack frame such that
3928 when __args == __skip we should advance __args by 32 bytes. */
3929static tree
3930spu_build_builtin_va_list (void)
3931{
3932 tree f_args, f_skip, record, type_decl;
3933 bool owp;
3934
3935 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3936
3937 type_decl =
54e46243 3938 build_decl (BUILTINS_LOCATION,
3939 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3940
54e46243 3941 f_args = build_decl (BUILTINS_LOCATION,
3942 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3943 f_skip = build_decl (BUILTINS_LOCATION,
3944 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3945
3946 DECL_FIELD_CONTEXT (f_args) = record;
5d4b30ea 3947 SET_DECL_ALIGN (f_args, 128);
644459d0 3948 DECL_USER_ALIGN (f_args) = 1;
3949
3950 DECL_FIELD_CONTEXT (f_skip) = record;
5d4b30ea 3951 SET_DECL_ALIGN (f_skip, 128);
644459d0 3952 DECL_USER_ALIGN (f_skip) = 1;
3953
bc907808 3954 TYPE_STUB_DECL (record) = type_decl;
644459d0 3955 TYPE_NAME (record) = type_decl;
3956 TYPE_FIELDS (record) = f_args;
1767a056 3957 DECL_CHAIN (f_args) = f_skip;
644459d0 3958
3959 /* We know this is being padded and we want it too. It is an internal
3960 type so hide the warnings from the user. */
3961 owp = warn_padded;
3962 warn_padded = false;
3963
3964 layout_type (record);
3965
3966 warn_padded = owp;
3967
3968 /* The correct type is an array type of one element. */
3969 return build_array_type (record, build_index_type (size_zero_node));
3970}
3971
3972/* Implement va_start by filling the va_list structure VALIST.
3973 NEXTARG points to the first anonymous stack argument.
3974
3975 The following global variables are used to initialize
3976 the va_list structure:
3977
abe32cce 3978 crtl->args.info;
644459d0 3979 the CUMULATIVE_ARGS for this function
3980
abe32cce 3981 crtl->args.arg_offset_rtx:
644459d0 3982 holds the offset of the first anonymous stack argument
3983 (relative to the virtual arg pointer). */
3984
8a58ed0a 3985static void
644459d0 3986spu_va_start (tree valist, rtx nextarg)
3987{
3988 tree f_args, f_skip;
3989 tree args, skip, t;
3990
3991 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 3992 f_skip = DECL_CHAIN (f_args);
644459d0 3993
170efcd4 3994 valist = build_simple_mem_ref (valist);
644459d0 3995 args =
3996 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3997 skip =
3998 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3999
4000 /* Find the __args area. */
4001 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4002 if (crtl->args.pretend_args_size > 0)
2cc66f2a 4003 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
75a70cf9 4004 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4005 TREE_SIDE_EFFECTS (t) = 1;
4006 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4007
4008 /* Find the __skip area. */
4009 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2cc66f2a 4010 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4011 - STACK_POINTER_OFFSET));
75a70cf9 4012 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4013 TREE_SIDE_EFFECTS (t) = 1;
4014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4015}
4016
4017/* Gimplify va_arg by updating the va_list structure
4018 VALIST as required to retrieve an argument of type
4019 TYPE, and returning that argument.
4020
4021 ret = va_arg(VALIST, TYPE);
4022
4023 generates code equivalent to:
4024
4025 paddedsize = (sizeof(TYPE) + 15) & -16;
4026 if (VALIST.__args + paddedsize > VALIST.__skip
4027 && VALIST.__args <= VALIST.__skip)
4028 addr = VALIST.__skip + 32;
4029 else
4030 addr = VALIST.__args;
4031 VALIST.__args = addr + paddedsize;
4032 ret = *(TYPE *)addr;
4033 */
4034static tree
75a70cf9 4035spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4036 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4037{
4038 tree f_args, f_skip;
4039 tree args, skip;
4040 HOST_WIDE_INT size, rsize;
2cc66f2a 4041 tree addr, tmp;
644459d0 4042 bool pass_by_reference_p;
4043
4044 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4045 f_skip = DECL_CHAIN (f_args);
644459d0 4046
644459d0 4047 args =
4048 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4049 skip =
4050 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4051
4052 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4053
4054 /* if an object is dynamically sized, a pointer to it is passed
4055 instead of the object itself. */
27a82950 4056 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4057 false);
644459d0 4058 if (pass_by_reference_p)
4059 type = build_pointer_type (type);
4060 size = int_size_in_bytes (type);
4061 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4062
4063 /* build conditional expression to calculate addr. The expression
4064 will be gimplified later. */
2cc66f2a 4065 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
644459d0 4066 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4067 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4068 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4069 unshare_expr (skip)));
644459d0 4070
4071 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2cc66f2a 4072 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4073 unshare_expr (args));
644459d0 4074
75a70cf9 4075 gimplify_assign (addr, tmp, pre_p);
644459d0 4076
4077 /* update VALIST.__args */
2cc66f2a 4078 tmp = fold_build_pointer_plus_hwi (addr, rsize);
75a70cf9 4079 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4080
8115f0af 4081 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4082 addr);
644459d0 4083
4084 if (pass_by_reference_p)
4085 addr = build_va_arg_indirect_ref (addr);
4086
4087 return build_va_arg_indirect_ref (addr);
4088}
4089
4090/* Save parameter registers starting with the register that corresponds
4091 to the first unnamed parameters. If the first unnamed parameter is
4092 in the stack then save no registers. Set pretend_args_size to the
4093 amount of space needed to save the registers. */
39cba157 4094static void
3754d046 4095spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
644459d0 4096 tree type, int *pretend_size, int no_rtl)
4097{
4098 if (!no_rtl)
4099 {
4100 rtx tmp;
4101 int regno;
4102 int offset;
39cba157 4103 int ncum = *get_cumulative_args (cum);
644459d0 4104
4105 /* cum currently points to the last named argument, we want to
4106 start at the next argument. */
39cba157 4107 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
644459d0 4108
4109 offset = -STACK_POINTER_OFFSET;
4110 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4111 {
4112 tmp = gen_frame_mem (V4SImode,
29c05e22 4113 plus_constant (Pmode, virtual_incoming_args_rtx,
644459d0 4114 offset));
4115 emit_move_insn (tmp,
4116 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4117 offset += 16;
4118 }
4119 *pretend_size = offset + STACK_POINTER_OFFSET;
4120 }
4121}
4122\f
b2d7ede1 4123static void
644459d0 4124spu_conditional_register_usage (void)
4125{
4126 if (flag_pic)
4127 {
4128 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4129 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4130 }
644459d0 4131}
4132
9d98604b 4133/* This is called any time we inspect the alignment of a register for
4134 addresses. */
644459d0 4135static int
9d98604b 4136reg_aligned_for_addr (rtx x)
644459d0 4137{
9d98604b 4138 int regno =
4139 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4140 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4141}
4142
69ced2d6 4143/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4144 into its SYMBOL_REF_FLAGS. */
4145static void
4146spu_encode_section_info (tree decl, rtx rtl, int first)
4147{
4148 default_encode_section_info (decl, rtl, first);
4149
4150 /* If a variable has a forced alignment to < 16 bytes, mark it with
4151 SYMBOL_FLAG_ALIGN1. */
4152 if (TREE_CODE (decl) == VAR_DECL
4153 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4154 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4155}
4156
644459d0 4157/* Return TRUE if we are certain the mem refers to a complete object
4158 which is both 16-byte aligned and padded to a 16-byte boundary. This
4159 would make it safe to store with a single instruction.
4160 We guarantee the alignment and padding for static objects by aligning
579d67ba 4161 all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
644459d0 4162 FIXME: We currently cannot guarantee this for objects on the stack
4163 because assign_parm_setup_stack calls assign_stack_local with the
4164 alignment of the parameter mode and in that case the alignment never
4165 gets adjusted by LOCAL_ALIGNMENT. */
4166static int
4167store_with_one_insn_p (rtx mem)
4168{
3754d046 4169 machine_mode mode = GET_MODE (mem);
644459d0 4170 rtx addr = XEXP (mem, 0);
9d98604b 4171 if (mode == BLKmode)
644459d0 4172 return 0;
9d98604b 4173 if (GET_MODE_SIZE (mode) >= 16)
4174 return 1;
644459d0 4175 /* Only static objects. */
4176 if (GET_CODE (addr) == SYMBOL_REF)
4177 {
4178 /* We use the associated declaration to make sure the access is
fa7637bd 4179 referring to the whole object.
851d9296 4180 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4181 if it is necessary. Will there be cases where one exists, and
4182 the other does not? Will there be cases where both exist, but
4183 have different types? */
4184 tree decl = MEM_EXPR (mem);
4185 if (decl
4186 && TREE_CODE (decl) == VAR_DECL
4187 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4188 return 1;
4189 decl = SYMBOL_REF_DECL (addr);
4190 if (decl
4191 && TREE_CODE (decl) == VAR_DECL
4192 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4193 return 1;
4194 }
4195 return 0;
4196}
4197
9d98604b 4198/* Return 1 when the address is not valid for a simple load and store as
4199 required by the '_mov*' patterns. We could make this less strict
4200 for loads, but we prefer mem's to look the same so they are more
4201 likely to be merged. */
4202static int
4203address_needs_split (rtx mem)
4204{
4205 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4206 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4207 || !(store_with_one_insn_p (mem)
4208 || mem_is_padded_component_ref (mem))))
4209 return 1;
4210
4211 return 0;
4212}
4213
6cf5579e 4214static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4215static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4216static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4217
4218/* MEM is known to be an __ea qualified memory access. Emit a call to
4219 fetch the ppu memory to local store, and return its address in local
4220 store. */
4221
4222static void
4223ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4224{
4225 if (is_store)
4226 {
4227 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4228 if (!cache_fetch_dirty)
4229 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4230 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
9e9e5c15 4231 ea_addr, EAmode, ndirty, SImode);
6cf5579e 4232 }
4233 else
4234 {
4235 if (!cache_fetch)
4236 cache_fetch = init_one_libfunc ("__cache_fetch");
4237 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
9e9e5c15 4238 ea_addr, EAmode);
6cf5579e 4239 }
4240}
4241
4242/* Like ea_load_store, but do the cache tag comparison and, for stores,
4243 dirty bit marking, inline.
4244
4245 The cache control data structure is an array of
4246
4247 struct __cache_tag_array
4248 {
4249 unsigned int tag_lo[4];
4250 unsigned int tag_hi[4];
4251 void *data_pointer[4];
4252 int reserved[4];
4253 vector unsigned short dirty_bits[4];
4254 } */
4255
4256static void
4257ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4258{
4259 rtx ea_addr_si;
4260 HOST_WIDE_INT v;
4261 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4262 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4263 rtx index_mask = gen_reg_rtx (SImode);
4264 rtx tag_arr = gen_reg_rtx (Pmode);
4265 rtx splat_mask = gen_reg_rtx (TImode);
4266 rtx splat = gen_reg_rtx (V4SImode);
4267 rtx splat_hi = NULL_RTX;
4268 rtx tag_index = gen_reg_rtx (Pmode);
4269 rtx block_off = gen_reg_rtx (SImode);
4270 rtx tag_addr = gen_reg_rtx (Pmode);
4271 rtx tag = gen_reg_rtx (V4SImode);
4272 rtx cache_tag = gen_reg_rtx (V4SImode);
4273 rtx cache_tag_hi = NULL_RTX;
4274 rtx cache_ptrs = gen_reg_rtx (TImode);
4275 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4276 rtx tag_equal = gen_reg_rtx (V4SImode);
4277 rtx tag_equal_hi = NULL_RTX;
4278 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4279 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4280 rtx eq_index = gen_reg_rtx (SImode);
0af56f80 4281 rtx bcomp, hit_label, hit_ref, cont_label;
4282 rtx_insn *insn;
6cf5579e 4283
4284 if (spu_ea_model != 32)
4285 {
4286 splat_hi = gen_reg_rtx (V4SImode);
4287 cache_tag_hi = gen_reg_rtx (V4SImode);
4288 tag_equal_hi = gen_reg_rtx (V4SImode);
4289 }
4290
29c05e22 4291 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
6cf5579e 4292 emit_move_insn (tag_arr, tag_arr_sym);
4293 v = 0x0001020300010203LL;
4294 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4295 ea_addr_si = ea_addr;
4296 if (spu_ea_model != 32)
4297 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4298
4299 /* tag_index = ea_addr & (tag_array_size - 128) */
4300 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4301
4302 /* splat ea_addr to all 4 slots. */
4303 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4304 /* Similarly for high 32 bits of ea_addr. */
4305 if (spu_ea_model != 32)
4306 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4307
4308 /* block_off = ea_addr & 127 */
4309 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4310
4311 /* tag_addr = tag_arr + tag_index */
4312 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4313
4314 /* Read cache tags. */
4315 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4316 if (spu_ea_model != 32)
4317 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
29c05e22 4318 plus_constant (Pmode,
4319 tag_addr, 16)));
6cf5579e 4320
4321 /* tag = ea_addr & -128 */
4322 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4323
4324 /* Read all four cache data pointers. */
4325 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
29c05e22 4326 plus_constant (Pmode,
4327 tag_addr, 32)));
6cf5579e 4328
4329 /* Compare tags. */
4330 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4331 if (spu_ea_model != 32)
4332 {
4333 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4334 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4335 }
4336
4337 /* At most one of the tags compare equal, so tag_equal has one
4338 32-bit slot set to all 1's, with the other slots all zero.
4339 gbb picks off low bit from each byte in the 128-bit registers,
4340 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4341 we have a hit. */
4342 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4343 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4344
4345 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4346 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4347
4348 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4349 (rotating eq_index mod 16 bytes). */
4350 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4351 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4352
4353 /* Add block offset to form final data address. */
4354 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4355
4356 /* Check that we did hit. */
4357 hit_label = gen_label_rtx ();
4358 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4359 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
d1f9b275 4360 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
6cf5579e 4361 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4362 hit_ref, pc_rtx)));
4363 /* Say that this branch is very likely to happen. */
61cb1816 4364 add_reg_br_prob_note (insn, profile_probability::very_likely ());
6cf5579e 4365
4366 ea_load_store (mem, is_store, ea_addr, data_addr);
4367 cont_label = gen_label_rtx ();
4368 emit_jump_insn (gen_jump (cont_label));
4369 emit_barrier ();
4370
4371 emit_label (hit_label);
4372
4373 if (is_store)
4374 {
4375 HOST_WIDE_INT v_hi;
4376 rtx dirty_bits = gen_reg_rtx (TImode);
4377 rtx dirty_off = gen_reg_rtx (SImode);
4378 rtx dirty_128 = gen_reg_rtx (TImode);
4379 rtx neg_block_off = gen_reg_rtx (SImode);
4380
4381 /* Set up mask with one dirty bit per byte of the mem we are
4382 writing, starting from top bit. */
4383 v_hi = v = -1;
4384 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4385 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4386 {
4387 v_hi = v;
4388 v = 0;
4389 }
4390 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4391
4392 /* Form index into cache dirty_bits. eq_index is one of
4393 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4394 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4395 offset to each of the four dirty_bits elements. */
4396 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4397
4398 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4399
4400 /* Rotate bit mask to proper bit. */
4401 emit_insn (gen_negsi2 (neg_block_off, block_off));
4402 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4403 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4404
4405 /* Or in the new dirty bits. */
4406 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4407
4408 /* Store. */
4409 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4410 }
4411
4412 emit_label (cont_label);
4413}
4414
4415static rtx
4416expand_ea_mem (rtx mem, bool is_store)
4417{
4418 rtx ea_addr;
4419 rtx data_addr = gen_reg_rtx (Pmode);
4420 rtx new_mem;
4421
4422 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4423 if (optimize_size || optimize == 0)
4424 ea_load_store (mem, is_store, ea_addr, data_addr);
4425 else
4426 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4427
4428 if (ea_alias_set == -1)
4429 ea_alias_set = new_alias_set ();
4430
4431 /* We generate a new MEM RTX to refer to the copy of the data
4432 in the cache. We do not copy memory attributes (except the
4433 alignment) from the original MEM, as they may no longer apply
4434 to the cache copy. */
4435 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4436 set_mem_alias_set (new_mem, ea_alias_set);
4437 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4438
4439 return new_mem;
4440}
4441
644459d0 4442int
3754d046 4443spu_expand_mov (rtx * ops, machine_mode mode)
644459d0 4444{
4445 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4446 {
4447 /* Perform the move in the destination SUBREG's inner mode. */
4448 ops[0] = SUBREG_REG (ops[0]);
4449 mode = GET_MODE (ops[0]);
4450 ops[1] = gen_lowpart_common (mode, ops[1]);
4451 gcc_assert (ops[1]);
4452 }
644459d0 4453
4454 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4455 {
4456 rtx from = SUBREG_REG (ops[1]);
2cf1bb25 4457 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
644459d0 4458
4459 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4460 && GET_MODE_CLASS (imode) == MODE_INT
4461 && subreg_lowpart_p (ops[1]));
4462
4463 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4464 imode = SImode;
4465 if (imode != GET_MODE (from))
4466 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4467
4468 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4469 {
d6bf3b14 4470 enum insn_code icode = convert_optab_handler (trunc_optab,
4471 mode, imode);
644459d0 4472 emit_insn (GEN_FCN (icode) (ops[0], from));
4473 }
4474 else
4475 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4476 return 1;
4477 }
4478
4479 /* At least one of the operands needs to be a register. */
4480 if ((reload_in_progress | reload_completed) == 0
4481 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4482 {
4483 rtx temp = force_reg (mode, ops[1]);
4484 emit_move_insn (ops[0], temp);
4485 return 1;
4486 }
4487 if (reload_in_progress || reload_completed)
4488 {
dea01258 4489 if (CONSTANT_P (ops[1]))
4490 return spu_split_immediate (ops);
644459d0 4491 return 0;
4492 }
9d98604b 4493
4494 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4495 extend them. */
4496 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4497 {
9d98604b 4498 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4499 if (val != INTVAL (ops[1]))
644459d0 4500 {
9d98604b 4501 emit_move_insn (ops[0], GEN_INT (val));
4502 return 1;
644459d0 4503 }
4504 }
9d98604b 4505 if (MEM_P (ops[0]))
6cf5579e 4506 {
4507 if (MEM_ADDR_SPACE (ops[0]))
4508 ops[0] = expand_ea_mem (ops[0], true);
4509 return spu_split_store (ops);
4510 }
9d98604b 4511 if (MEM_P (ops[1]))
6cf5579e 4512 {
4513 if (MEM_ADDR_SPACE (ops[1]))
4514 ops[1] = expand_ea_mem (ops[1], false);
4515 return spu_split_load (ops);
4516 }
9d98604b 4517
644459d0 4518 return 0;
4519}
4520
9d98604b 4521static void
4522spu_convert_move (rtx dst, rtx src)
644459d0 4523{
3754d046 4524 machine_mode mode = GET_MODE (dst);
e2cd4ccd 4525 machine_mode int_mode = int_mode_for_mode (mode).require ();
9d98604b 4526 rtx reg;
4527 gcc_assert (GET_MODE (src) == TImode);
4528 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
d1f9b275 4529 emit_insn (gen_rtx_SET (reg,
9d98604b 4530 gen_rtx_TRUNCATE (int_mode,
4531 gen_rtx_LSHIFTRT (TImode, src,
4532 GEN_INT (int_mode == DImode ? 64 : 96)))));
4533 if (int_mode != mode)
4534 {
4535 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4536 emit_move_insn (dst, reg);
4537 }
4538}
644459d0 4539
9d98604b 4540/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4541 the address from SRC and SRC+16. Return a REG or CONST_INT that
4542 specifies how many bytes to rotate the loaded registers, plus any
4543 extra from EXTRA_ROTQBY. The address and rotate amounts are
4544 normalized to improve merging of loads and rotate computations. */
4545static rtx
4546spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4547{
4548 rtx addr = XEXP (src, 0);
4549 rtx p0, p1, rot, addr0, addr1;
4550 int rot_amt;
644459d0 4551
4552 rot = 0;
4553 rot_amt = 0;
9d98604b 4554
4555 if (MEM_ALIGN (src) >= 128)
4556 /* Address is already aligned; simply perform a TImode load. */ ;
4557 else if (GET_CODE (addr) == PLUS)
644459d0 4558 {
4559 /* 8 cases:
4560 aligned reg + aligned reg => lqx
4561 aligned reg + unaligned reg => lqx, rotqby
4562 aligned reg + aligned const => lqd
4563 aligned reg + unaligned const => lqd, rotqbyi
4564 unaligned reg + aligned reg => lqx, rotqby
4565 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4566 unaligned reg + aligned const => lqd, rotqby
4567 unaligned reg + unaligned const -> not allowed by legitimate address
4568 */
4569 p0 = XEXP (addr, 0);
4570 p1 = XEXP (addr, 1);
9d98604b 4571 if (!reg_aligned_for_addr (p0))
644459d0 4572 {
9d98604b 4573 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4574 {
9d98604b 4575 rot = gen_reg_rtx (SImode);
4576 emit_insn (gen_addsi3 (rot, p0, p1));
4577 }
4578 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4579 {
4580 if (INTVAL (p1) > 0
4581 && REG_POINTER (p0)
4582 && INTVAL (p1) * BITS_PER_UNIT
4583 < REGNO_POINTER_ALIGN (REGNO (p0)))
4584 {
4585 rot = gen_reg_rtx (SImode);
4586 emit_insn (gen_addsi3 (rot, p0, p1));
4587 addr = p0;
4588 }
4589 else
4590 {
4591 rtx x = gen_reg_rtx (SImode);
4592 emit_move_insn (x, p1);
4593 if (!spu_arith_operand (p1, SImode))
4594 p1 = x;
4595 rot = gen_reg_rtx (SImode);
4596 emit_insn (gen_addsi3 (rot, p0, p1));
4597 addr = gen_rtx_PLUS (Pmode, p0, x);
4598 }
644459d0 4599 }
4600 else
4601 rot = p0;
4602 }
4603 else
4604 {
4605 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4606 {
4607 rot_amt = INTVAL (p1) & 15;
9d98604b 4608 if (INTVAL (p1) & -16)
4609 {
4610 p1 = GEN_INT (INTVAL (p1) & -16);
4611 addr = gen_rtx_PLUS (SImode, p0, p1);
4612 }
4613 else
4614 addr = p0;
644459d0 4615 }
9d98604b 4616 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4617 rot = p1;
4618 }
4619 }
9d98604b 4620 else if (REG_P (addr))
644459d0 4621 {
9d98604b 4622 if (!reg_aligned_for_addr (addr))
644459d0 4623 rot = addr;
4624 }
4625 else if (GET_CODE (addr) == CONST)
4626 {
4627 if (GET_CODE (XEXP (addr, 0)) == PLUS
4628 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4629 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4630 {
4631 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4632 if (rot_amt & -16)
4633 addr = gen_rtx_CONST (Pmode,
4634 gen_rtx_PLUS (Pmode,
4635 XEXP (XEXP (addr, 0), 0),
4636 GEN_INT (rot_amt & -16)));
4637 else
4638 addr = XEXP (XEXP (addr, 0), 0);
4639 }
4640 else
9d98604b 4641 {
4642 rot = gen_reg_rtx (Pmode);
4643 emit_move_insn (rot, addr);
4644 }
644459d0 4645 }
4646 else if (GET_CODE (addr) == CONST_INT)
4647 {
4648 rot_amt = INTVAL (addr);
4649 addr = GEN_INT (rot_amt & -16);
4650 }
4651 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4652 {
4653 rot = gen_reg_rtx (Pmode);
4654 emit_move_insn (rot, addr);
4655 }
644459d0 4656
9d98604b 4657 rot_amt += extra_rotby;
644459d0 4658
4659 rot_amt &= 15;
4660
4661 if (rot && rot_amt)
4662 {
9d98604b 4663 rtx x = gen_reg_rtx (SImode);
4664 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4665 rot = x;
644459d0 4666 rot_amt = 0;
4667 }
9d98604b 4668 if (!rot && rot_amt)
4669 rot = GEN_INT (rot_amt);
4670
4671 addr0 = copy_rtx (addr);
4672 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4673 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4674
4675 if (dst1)
4676 {
29c05e22 4677 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
9d98604b 4678 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4679 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4680 }
644459d0 4681
9d98604b 4682 return rot;
4683}
4684
4685int
4686spu_split_load (rtx * ops)
4687{
3754d046 4688 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4689 rtx addr, load, rot;
4690 int rot_amt;
644459d0 4691
9d98604b 4692 if (GET_MODE_SIZE (mode) >= 16)
4693 return 0;
644459d0 4694
9d98604b 4695 addr = XEXP (ops[1], 0);
4696 gcc_assert (GET_CODE (addr) != AND);
4697
4698 if (!address_needs_split (ops[1]))
4699 {
4700 ops[1] = change_address (ops[1], TImode, addr);
4701 load = gen_reg_rtx (TImode);
4702 emit_insn (gen__movti (load, ops[1]));
4703 spu_convert_move (ops[0], load);
4704 return 1;
4705 }
4706
4707 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4708
4709 load = gen_reg_rtx (TImode);
4710 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4711
4712 if (rot)
4713 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4714
9d98604b 4715 spu_convert_move (ops[0], load);
4716 return 1;
644459d0 4717}
4718
9d98604b 4719int
644459d0 4720spu_split_store (rtx * ops)
4721{
3754d046 4722 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4723 rtx reg;
644459d0 4724 rtx addr, p0, p1, p1_lo, smem;
4725 int aform;
4726 int scalar;
4727
9d98604b 4728 if (GET_MODE_SIZE (mode) >= 16)
4729 return 0;
4730
644459d0 4731 addr = XEXP (ops[0], 0);
9d98604b 4732 gcc_assert (GET_CODE (addr) != AND);
4733
4734 if (!address_needs_split (ops[0]))
4735 {
4736 reg = gen_reg_rtx (TImode);
4737 emit_insn (gen_spu_convert (reg, ops[1]));
4738 ops[0] = change_address (ops[0], TImode, addr);
4739 emit_move_insn (ops[0], reg);
4740 return 1;
4741 }
644459d0 4742
4743 if (GET_CODE (addr) == PLUS)
4744 {
4745 /* 8 cases:
4746 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4747 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4748 aligned reg + aligned const => lqd, c?d, shuf, stqx
4749 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4750 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4751 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4752 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4753 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4754 */
4755 aform = 0;
4756 p0 = XEXP (addr, 0);
4757 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4758 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4759 {
4760 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4761 if (reg_aligned_for_addr (p0))
4762 {
4763 p1 = GEN_INT (INTVAL (p1) & -16);
4764 if (p1 == const0_rtx)
4765 addr = p0;
4766 else
4767 addr = gen_rtx_PLUS (SImode, p0, p1);
4768 }
4769 else
4770 {
4771 rtx x = gen_reg_rtx (SImode);
4772 emit_move_insn (x, p1);
4773 addr = gen_rtx_PLUS (SImode, p0, x);
4774 }
644459d0 4775 }
4776 }
9d98604b 4777 else if (REG_P (addr))
644459d0 4778 {
4779 aform = 0;
4780 p0 = addr;
4781 p1 = p1_lo = const0_rtx;
4782 }
4783 else
4784 {
4785 aform = 1;
4786 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4787 p1 = 0; /* aform doesn't use p1 */
4788 p1_lo = addr;
4789 if (ALIGNED_SYMBOL_REF_P (addr))
4790 p1_lo = const0_rtx;
9d98604b 4791 else if (GET_CODE (addr) == CONST
4792 && GET_CODE (XEXP (addr, 0)) == PLUS
4793 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4794 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4795 {
9d98604b 4796 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4797 if ((v & -16) != 0)
4798 addr = gen_rtx_CONST (Pmode,
4799 gen_rtx_PLUS (Pmode,
4800 XEXP (XEXP (addr, 0), 0),
4801 GEN_INT (v & -16)));
4802 else
4803 addr = XEXP (XEXP (addr, 0), 0);
4804 p1_lo = GEN_INT (v & 15);
644459d0 4805 }
4806 else if (GET_CODE (addr) == CONST_INT)
4807 {
4808 p1_lo = GEN_INT (INTVAL (addr) & 15);
4809 addr = GEN_INT (INTVAL (addr) & -16);
4810 }
9d98604b 4811 else
4812 {
4813 p1_lo = gen_reg_rtx (SImode);
4814 emit_move_insn (p1_lo, addr);
4815 }
644459d0 4816 }
4817
4cbad5bb 4818 gcc_assert (aform == 0 || aform == 1);
9d98604b 4819 reg = gen_reg_rtx (TImode);
e04cf423 4820
644459d0 4821 scalar = store_with_one_insn_p (ops[0]);
4822 if (!scalar)
4823 {
4824 /* We could copy the flags from the ops[0] MEM to mem here,
4825 We don't because we want this load to be optimized away if
4826 possible, and copying the flags will prevent that in certain
4827 cases, e.g. consider the volatile flag. */
4828
9d98604b 4829 rtx pat = gen_reg_rtx (TImode);
e04cf423 4830 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4831 set_mem_alias_set (lmem, 0);
4832 emit_insn (gen_movti (reg, lmem));
644459d0 4833
9d98604b 4834 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4835 p0 = stack_pointer_rtx;
4836 if (!p1_lo)
4837 p1_lo = const0_rtx;
4838
4839 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4840 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4841 }
644459d0 4842 else
4843 {
4844 if (GET_CODE (ops[1]) == REG)
4845 emit_insn (gen_spu_convert (reg, ops[1]));
4846 else if (GET_CODE (ops[1]) == SUBREG)
4847 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4848 else
4849 abort ();
4850 }
4851
4852 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4853 emit_insn (gen_ashlti3
4854 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4855
9d98604b 4856 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4857 /* We can't use the previous alias set because the memory has changed
4858 size and can potentially overlap objects of other types. */
4859 set_mem_alias_set (smem, 0);
4860
e04cf423 4861 emit_insn (gen_movti (smem, reg));
9d98604b 4862 return 1;
644459d0 4863}
4864
4865/* Return TRUE if X is MEM which is a struct member reference
4866 and the member can safely be loaded and stored with a single
4867 instruction because it is padded. */
4868static int
4869mem_is_padded_component_ref (rtx x)
4870{
4871 tree t = MEM_EXPR (x);
4872 tree r;
4873 if (!t || TREE_CODE (t) != COMPONENT_REF)
4874 return 0;
4875 t = TREE_OPERAND (t, 1);
4876 if (!t || TREE_CODE (t) != FIELD_DECL
4877 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4878 return 0;
4879 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4880 r = DECL_FIELD_CONTEXT (t);
4881 if (!r || TREE_CODE (r) != RECORD_TYPE)
4882 return 0;
4883 /* Make sure they are the same mode */
4884 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4885 return 0;
4886 /* If there are no following fields then the field alignment assures
fa7637bd 4887 the structure is padded to the alignment which means this field is
4888 padded too. */
644459d0 4889 if (TREE_CHAIN (t) == 0)
4890 return 1;
4891 /* If the following field is also aligned then this field will be
4892 padded. */
4893 t = TREE_CHAIN (t);
4894 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4895 return 1;
4896 return 0;
4897}
4898
c7b91b14 4899/* Parse the -mfixed-range= option string. */
4900static void
4901fix_range (const char *const_str)
4902{
4903 int i, first, last;
4904 char *str, *dash, *comma;
4905
4906 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4907 REG2 are either register names or register numbers. The effect
4908 of this option is to mark the registers in the range from REG1 to
4909 REG2 as ``fixed'' so they won't be used by the compiler. */
4910
4911 i = strlen (const_str);
4912 str = (char *) alloca (i + 1);
4913 memcpy (str, const_str, i + 1);
4914
4915 while (1)
4916 {
4917 dash = strchr (str, '-');
4918 if (!dash)
4919 {
2f6d557f 4920 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
c7b91b14 4921 return;
4922 }
4923 *dash = '\0';
4924 comma = strchr (dash + 1, ',');
4925 if (comma)
4926 *comma = '\0';
4927
4928 first = decode_reg_name (str);
4929 if (first < 0)
4930 {
4931 warning (0, "unknown register name: %s", str);
4932 return;
4933 }
4934
4935 last = decode_reg_name (dash + 1);
4936 if (last < 0)
4937 {
4938 warning (0, "unknown register name: %s", dash + 1);
4939 return;
4940 }
4941
4942 *dash = '-';
4943
4944 if (first > last)
4945 {
4946 warning (0, "%s-%s is an empty range", str, dash + 1);
4947 return;
4948 }
4949
4950 for (i = first; i <= last; ++i)
4951 fixed_regs[i] = call_used_regs[i] = 1;
4952
4953 if (!comma)
4954 break;
4955
4956 *comma = ',';
4957 str = comma + 1;
4958 }
4959}
4960
644459d0 4961/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4962 can be generated using the fsmbi instruction. */
4963int
4964fsmbi_const_p (rtx x)
4965{
dea01258 4966 if (CONSTANT_P (x))
4967 {
5df189be 4968 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4969 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4970 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4971 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4972 }
4973 return 0;
4974}
4975
4976/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4977 can be generated using the cbd, chd, cwd or cdd instruction. */
4978int
3754d046 4979cpat_const_p (rtx x, machine_mode mode)
dea01258 4980{
4981 if (CONSTANT_P (x))
4982 {
4983 enum immediate_class c = classify_immediate (x, mode);
4984 return c == IC_CPAT;
4985 }
4986 return 0;
4987}
644459d0 4988
dea01258 4989rtx
4990gen_cpat_const (rtx * ops)
4991{
4992 unsigned char dst[16];
4993 int i, offset, shift, isize;
4994 if (GET_CODE (ops[3]) != CONST_INT
4995 || GET_CODE (ops[2]) != CONST_INT
4996 || (GET_CODE (ops[1]) != CONST_INT
4997 && GET_CODE (ops[1]) != REG))
4998 return 0;
4999 if (GET_CODE (ops[1]) == REG
5000 && (!REG_POINTER (ops[1])
5001 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5002 return 0;
644459d0 5003
5004 for (i = 0; i < 16; i++)
dea01258 5005 dst[i] = i + 16;
5006 isize = INTVAL (ops[3]);
5007 if (isize == 1)
5008 shift = 3;
5009 else if (isize == 2)
5010 shift = 2;
5011 else
5012 shift = 0;
5013 offset = (INTVAL (ops[2]) +
5014 (GET_CODE (ops[1]) ==
5015 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5016 for (i = 0; i < isize; i++)
5017 dst[offset + i] = i + shift;
5018 return array_to_constant (TImode, dst);
644459d0 5019}
5020
5021/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5022 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5023 than 16 bytes, the value is repeated across the rest of the array. */
5024void
3754d046 5025constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
644459d0 5026{
5027 HOST_WIDE_INT val;
5028 int i, j, first;
5029
5030 memset (arr, 0, 16);
5031 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5032 if (GET_CODE (x) == CONST_INT
5033 || (GET_CODE (x) == CONST_DOUBLE
5034 && (mode == SFmode || mode == DFmode)))
5035 {
5036 gcc_assert (mode != VOIDmode && mode != BLKmode);
5037
5038 if (GET_CODE (x) == CONST_DOUBLE)
5039 val = const_double_to_hwint (x);
5040 else
5041 val = INTVAL (x);
5042 first = GET_MODE_SIZE (mode) - 1;
5043 for (i = first; i >= 0; i--)
5044 {
5045 arr[i] = val & 0xff;
5046 val >>= 8;
5047 }
5048 /* Splat the constant across the whole array. */
5049 for (j = 0, i = first + 1; i < 16; i++)
5050 {
5051 arr[i] = arr[j];
5052 j = (j == first) ? 0 : j + 1;
5053 }
5054 }
5055 else if (GET_CODE (x) == CONST_DOUBLE)
5056 {
5057 val = CONST_DOUBLE_LOW (x);
5058 for (i = 15; i >= 8; i--)
5059 {
5060 arr[i] = val & 0xff;
5061 val >>= 8;
5062 }
5063 val = CONST_DOUBLE_HIGH (x);
5064 for (i = 7; i >= 0; i--)
5065 {
5066 arr[i] = val & 0xff;
5067 val >>= 8;
5068 }
5069 }
5070 else if (GET_CODE (x) == CONST_VECTOR)
5071 {
5072 int units;
5073 rtx elt;
5074 mode = GET_MODE_INNER (mode);
5075 units = CONST_VECTOR_NUNITS (x);
5076 for (i = 0; i < units; i++)
5077 {
5078 elt = CONST_VECTOR_ELT (x, i);
5079 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5080 {
5081 if (GET_CODE (elt) == CONST_DOUBLE)
5082 val = const_double_to_hwint (elt);
5083 else
5084 val = INTVAL (elt);
5085 first = GET_MODE_SIZE (mode) - 1;
5086 if (first + i * GET_MODE_SIZE (mode) > 16)
5087 abort ();
5088 for (j = first; j >= 0; j--)
5089 {
5090 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5091 val >>= 8;
5092 }
5093 }
5094 }
5095 }
5096 else
5097 gcc_unreachable();
5098}
5099
5100/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5101 smaller than 16 bytes, use the bytes that would represent that value
5102 in a register, e.g., for QImode return the value of arr[3]. */
5103rtx
3754d046 5104array_to_constant (machine_mode mode, const unsigned char arr[16])
644459d0 5105{
3754d046 5106 machine_mode inner_mode;
644459d0 5107 rtvec v;
5108 int units, size, i, j, k;
5109 HOST_WIDE_INT val;
5110
5111 if (GET_MODE_CLASS (mode) == MODE_INT
5112 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5113 {
5114 j = GET_MODE_SIZE (mode);
5115 i = j < 4 ? 4 - j : 0;
5116 for (val = 0; i < j; i++)
5117 val = (val << 8) | arr[i];
5118 val = trunc_int_for_mode (val, mode);
5119 return GEN_INT (val);
5120 }
5121
5122 if (mode == TImode)
5123 {
5124 HOST_WIDE_INT high;
5125 for (i = high = 0; i < 8; i++)
5126 high = (high << 8) | arr[i];
5127 for (i = 8, val = 0; i < 16; i++)
5128 val = (val << 8) | arr[i];
5129 return immed_double_const (val, high, TImode);
5130 }
5131 if (mode == SFmode)
5132 {
5133 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5134 val = trunc_int_for_mode (val, SImode);
171b6d22 5135 return hwint_to_const_double (SFmode, val);
644459d0 5136 }
5137 if (mode == DFmode)
5138 {
1f915911 5139 for (i = 0, val = 0; i < 8; i++)
5140 val = (val << 8) | arr[i];
171b6d22 5141 return hwint_to_const_double (DFmode, val);
644459d0 5142 }
5143
5144 if (!VECTOR_MODE_P (mode))
5145 abort ();
5146
5147 units = GET_MODE_NUNITS (mode);
5148 size = GET_MODE_UNIT_SIZE (mode);
5149 inner_mode = GET_MODE_INNER (mode);
5150 v = rtvec_alloc (units);
5151
5152 for (k = i = 0; i < units; ++i)
5153 {
5154 val = 0;
5155 for (j = 0; j < size; j++, k++)
5156 val = (val << 8) | arr[k];
5157
5158 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5159 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5160 else
5161 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5162 }
5163 if (k > 16)
5164 abort ();
5165
5166 return gen_rtx_CONST_VECTOR (mode, v);
5167}
5168
5169static void
5170reloc_diagnostic (rtx x)
5171{
712d2297 5172 tree decl = 0;
644459d0 5173 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5174 return;
5175
5176 if (GET_CODE (x) == SYMBOL_REF)
5177 decl = SYMBOL_REF_DECL (x);
5178 else if (GET_CODE (x) == CONST
5179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5180 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5181
5182 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5183 if (decl && !DECL_P (decl))
5184 decl = 0;
5185
644459d0 5186 /* The decl could be a string constant. */
5187 if (decl && DECL_P (decl))
712d2297 5188 {
5189 location_t loc;
5190 /* We use last_assemble_variable_decl to get line information. It's
5191 not always going to be right and might not even be close, but will
5192 be right for the more common cases. */
5193 if (!last_assemble_variable_decl || in_section == ctors_section)
5194 loc = DECL_SOURCE_LOCATION (decl);
5195 else
5196 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5197
712d2297 5198 if (TARGET_WARN_RELOC)
5199 warning_at (loc, 0,
5200 "creating run-time relocation for %qD", decl);
5201 else
5202 error_at (loc,
5203 "creating run-time relocation for %qD", decl);
5204 }
5205 else
5206 {
5207 if (TARGET_WARN_RELOC)
5208 warning_at (input_location, 0, "creating run-time relocation");
5209 else
5210 error_at (input_location, "creating run-time relocation");
5211 }
644459d0 5212}
5213
5214/* Hook into assemble_integer so we can generate an error for run-time
5215 relocations. The SPU ABI disallows them. */
5216static bool
5217spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5218{
5219 /* By default run-time relocations aren't supported, but we allow them
5220 in case users support it in their own run-time loader. And we provide
5221 a warning for those users that don't. */
5222 if ((GET_CODE (x) == SYMBOL_REF)
5223 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5224 reloc_diagnostic (x);
5225
5226 return default_assemble_integer (x, size, aligned_p);
5227}
5228
5229static void
5230spu_asm_globalize_label (FILE * file, const char *name)
5231{
5232 fputs ("\t.global\t", file);
5233 assemble_name (file, name);
5234 fputs ("\n", file);
5235}
5236
5237static bool
5ae4887d 5238spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
20d892d1 5239 int opno ATTRIBUTE_UNUSED, int *total,
f529eb25 5240 bool speed ATTRIBUTE_UNUSED)
644459d0 5241{
5ae4887d 5242 int code = GET_CODE (x);
644459d0 5243 int cost = COSTS_N_INSNS (2);
5244
5245 /* Folding to a CONST_VECTOR will use extra space but there might
5246 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5247 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5248 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5249 because this cost will only be compared against a single insn.
5250 if (code == CONST_VECTOR)
ca316360 5251 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5252 */
5253
5254 /* Use defaults for float operations. Not accurate but good enough. */
5255 if (mode == DFmode)
5256 {
5257 *total = COSTS_N_INSNS (13);
5258 return true;
5259 }
5260 if (mode == SFmode)
5261 {
5262 *total = COSTS_N_INSNS (6);
5263 return true;
5264 }
5265 switch (code)
5266 {
5267 case CONST_INT:
5268 if (satisfies_constraint_K (x))
5269 *total = 0;
5270 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5271 *total = COSTS_N_INSNS (1);
5272 else
5273 *total = COSTS_N_INSNS (3);
5274 return true;
5275
5276 case CONST:
5277 *total = COSTS_N_INSNS (3);
5278 return true;
5279
5280 case LABEL_REF:
5281 case SYMBOL_REF:
5282 *total = COSTS_N_INSNS (0);
5283 return true;
5284
5285 case CONST_DOUBLE:
5286 *total = COSTS_N_INSNS (5);
5287 return true;
5288
5289 case FLOAT_EXTEND:
5290 case FLOAT_TRUNCATE:
5291 case FLOAT:
5292 case UNSIGNED_FLOAT:
5293 case FIX:
5294 case UNSIGNED_FIX:
5295 *total = COSTS_N_INSNS (7);
5296 return true;
5297
5298 case PLUS:
5299 if (mode == TImode)
5300 {
5301 *total = COSTS_N_INSNS (9);
5302 return true;
5303 }
5304 break;
5305
5306 case MULT:
5307 cost =
5308 GET_CODE (XEXP (x, 0)) ==
5309 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5310 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5311 {
5312 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5313 {
5314 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5315 cost = COSTS_N_INSNS (14);
5316 if ((val & 0xffff) == 0)
5317 cost = COSTS_N_INSNS (9);
5318 else if (val > 0 && val < 0x10000)
5319 cost = COSTS_N_INSNS (11);
5320 }
5321 }
5322 *total = cost;
5323 return true;
5324 case DIV:
5325 case UDIV:
5326 case MOD:
5327 case UMOD:
5328 *total = COSTS_N_INSNS (20);
5329 return true;
5330 case ROTATE:
5331 case ROTATERT:
5332 case ASHIFT:
5333 case ASHIFTRT:
5334 case LSHIFTRT:
5335 *total = COSTS_N_INSNS (4);
5336 return true;
5337 case UNSPEC:
5338 if (XINT (x, 1) == UNSPEC_CONVERT)
5339 *total = COSTS_N_INSNS (0);
5340 else
5341 *total = COSTS_N_INSNS (4);
5342 return true;
5343 }
5344 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5345 if (GET_MODE_CLASS (mode) == MODE_INT
5346 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5347 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5348 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5349 *total = cost;
5350 return true;
5351}
5352
f77c4496 5353static scalar_int_mode
1bd43494 5354spu_unwind_word_mode (void)
644459d0 5355{
1bd43494 5356 return SImode;
644459d0 5357}
5358
5359/* Decide whether we can make a sibling call to a function. DECL is the
5360 declaration of the function being targeted by the call and EXP is the
5361 CALL_EXPR representing the call. */
5362static bool
5363spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5364{
5365 return decl && !TARGET_LARGE_MEM;
5366}
5367
5368/* We need to correctly update the back chain pointer and the Available
5369 Stack Size (which is in the second slot of the sp register.) */
5370void
5371spu_allocate_stack (rtx op0, rtx op1)
5372{
5373 HOST_WIDE_INT v;
5374 rtx chain = gen_reg_rtx (V4SImode);
5375 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5376 rtx sp = gen_reg_rtx (V4SImode);
5377 rtx splatted = gen_reg_rtx (V4SImode);
5378 rtx pat = gen_reg_rtx (TImode);
5379
5380 /* copy the back chain so we can save it back again. */
5381 emit_move_insn (chain, stack_bot);
5382
5383 op1 = force_reg (SImode, op1);
5384
5385 v = 0x1020300010203ll;
5386 emit_move_insn (pat, immed_double_const (v, v, TImode));
5387 emit_insn (gen_shufb (splatted, op1, op1, pat));
5388
5389 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5390 emit_insn (gen_subv4si3 (sp, sp, splatted));
5391
1e81f1d8 5392 if (flag_stack_check || flag_stack_clash_protection)
644459d0 5393 {
5394 rtx avail = gen_reg_rtx(SImode);
5395 rtx result = gen_reg_rtx(SImode);
447443f5 5396 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
644459d0 5397 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5398 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5399 }
5400
5401 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5402
5403 emit_move_insn (stack_bot, chain);
5404
5405 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5406}
5407
5408void
5409spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5410{
5411 static unsigned char arr[16] =
5412 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5413 rtx temp = gen_reg_rtx (SImode);
5414 rtx temp2 = gen_reg_rtx (SImode);
5415 rtx temp3 = gen_reg_rtx (V4SImode);
5416 rtx temp4 = gen_reg_rtx (V4SImode);
5417 rtx pat = gen_reg_rtx (TImode);
5418 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5419
5420 /* Restore the backchain from the first word, sp from the second. */
5421 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5422 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5423
5424 emit_move_insn (pat, array_to_constant (TImode, arr));
5425
5426 /* Compute Available Stack Size for sp */
5427 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5428 emit_insn (gen_shufb (temp3, temp, temp, pat));
5429
5430 /* Compute Available Stack Size for back chain */
5431 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5432 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5433 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5434
5435 emit_insn (gen_addv4si3 (sp, sp, temp3));
5436 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5437}
5438
5439static void
5440spu_init_libfuncs (void)
5441{
5442 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5443 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5444 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5445 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5446 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5447 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5448 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5449 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5450 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4d3aeb29 5451 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
644459d0 5452 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5453 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5454
5455 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5456 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5457
5825ec3f 5458 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5459 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5460 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5461 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5462 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5463 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5464 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5465 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5466 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5467 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5468 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5469 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5470
19a53068 5471 set_optab_libfunc (smul_optab, TImode, "__multi3");
5472 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5473 set_optab_libfunc (smod_optab, TImode, "__modti3");
5474 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5475 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5476 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5477}
5478
5479/* Make a subreg, stripping any existing subreg. We could possibly just
5480 call simplify_subreg, but in this case we know what we want. */
5481rtx
3754d046 5482spu_gen_subreg (machine_mode mode, rtx x)
644459d0 5483{
5484 if (GET_CODE (x) == SUBREG)
5485 x = SUBREG_REG (x);
5486 if (GET_MODE (x) == mode)
5487 return x;
5488 return gen_rtx_SUBREG (mode, x, 0);
5489}
5490
5491static bool
fb80456a 5492spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5493{
5494 return (TYPE_MODE (type) == BLKmode
5495 && ((type) == 0
5496 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5497 || int_size_in_bytes (type) >
5498 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5499}
5500\f
5501/* Create the built-in types and functions */
5502
c2233b46 5503enum spu_function_code
5504{
5505#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5506#include "spu-builtins.def"
5507#undef DEF_BUILTIN
5508 NUM_SPU_BUILTINS
5509};
5510
5511extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5512
644459d0 5513struct spu_builtin_description spu_builtins[] = {
5514#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5515 {fcode, icode, name, type, params},
644459d0 5516#include "spu-builtins.def"
5517#undef DEF_BUILTIN
5518};
5519
0c5c4d59 5520static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5521
5522/* Returns the spu builtin decl for CODE. */
e6925042 5523
5524static tree
5525spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5526{
5527 if (code >= NUM_SPU_BUILTINS)
5528 return error_mark_node;
5529
0c5c4d59 5530 return spu_builtin_decls[code];
e6925042 5531}
5532
5533
644459d0 5534static void
5535spu_init_builtins (void)
5536{
5537 struct spu_builtin_description *d;
5538 unsigned int i;
5539
5540 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5541 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5542 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5543 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5544 V4SF_type_node = build_vector_type (float_type_node, 4);
5545 V2DF_type_node = build_vector_type (double_type_node, 2);
5546
5547 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5548 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5549 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5550 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5551
c4ecce0c 5552 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5553
5554 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5561 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5562 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5564 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5566
5567 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5568 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5569 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5570 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5571 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5572 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5573 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5574 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5575
5576 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5577 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5578
5579 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5580
5581 spu_builtin_types[SPU_BTI_PTR] =
5582 build_pointer_type (build_qualified_type
5583 (void_type_node,
5584 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5585
5586 /* For each builtin we build a new prototype. The tree code will make
5587 sure nodes are shared. */
5588 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5589 {
5590 tree p;
5591 char name[64]; /* build_function will make a copy. */
5592 int parm;
5593
5594 if (d->name == 0)
5595 continue;
5596
5dfbd18f 5597 /* Find last parm. */
644459d0 5598 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5599 ;
644459d0 5600
5601 p = void_list_node;
5602 while (parm > 1)
5603 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5604
5605 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5606
5607 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5608 spu_builtin_decls[i] =
3726fe5e 5609 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5610 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5611 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5612
5613 /* These builtins don't throw. */
0c5c4d59 5614 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5615 }
5616}
5617
cf31d486 5618void
5619spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5620{
5621 static unsigned char arr[16] =
5622 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5623
5624 rtx temp = gen_reg_rtx (Pmode);
5625 rtx temp2 = gen_reg_rtx (V4SImode);
5626 rtx temp3 = gen_reg_rtx (V4SImode);
5627 rtx pat = gen_reg_rtx (TImode);
5628 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5629
5630 emit_move_insn (pat, array_to_constant (TImode, arr));
5631
5632 /* Restore the sp. */
5633 emit_move_insn (temp, op1);
5634 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5635
5636 /* Compute available stack size for sp. */
5637 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5638 emit_insn (gen_shufb (temp3, temp, temp, pat));
5639
5640 emit_insn (gen_addv4si3 (sp, sp, temp3));
5641 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5642}
5643
644459d0 5644int
5645spu_safe_dma (HOST_WIDE_INT channel)
5646{
006e4b96 5647 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5648}
5649
5650void
5651spu_builtin_splats (rtx ops[])
5652{
3754d046 5653 machine_mode mode = GET_MODE (ops[0]);
644459d0 5654 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5655 {
5656 unsigned char arr[16];
5657 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5658 emit_move_insn (ops[0], array_to_constant (mode, arr));
5659 }
644459d0 5660 else
5661 {
5662 rtx reg = gen_reg_rtx (TImode);
5663 rtx shuf;
5664 if (GET_CODE (ops[1]) != REG
5665 && GET_CODE (ops[1]) != SUBREG)
5666 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5667 switch (mode)
5668 {
916ace94 5669 case E_V2DImode:
5670 case E_V2DFmode:
644459d0 5671 shuf =
5672 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5673 TImode);
5674 break;
916ace94 5675 case E_V4SImode:
5676 case E_V4SFmode:
644459d0 5677 shuf =
5678 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5679 TImode);
5680 break;
916ace94 5681 case E_V8HImode:
644459d0 5682 shuf =
5683 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5684 TImode);
5685 break;
916ace94 5686 case E_V16QImode:
644459d0 5687 shuf =
5688 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5689 TImode);
5690 break;
5691 default:
5692 abort ();
5693 }
5694 emit_move_insn (reg, shuf);
5695 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5696 }
5697}
5698
5699void
5700spu_builtin_extract (rtx ops[])
5701{
3754d046 5702 machine_mode mode;
644459d0 5703 rtx rot, from, tmp;
5704
5705 mode = GET_MODE (ops[1]);
5706
5707 if (GET_CODE (ops[2]) == CONST_INT)
5708 {
5709 switch (mode)
5710 {
916ace94 5711 case E_V16QImode:
447443f5 5712 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
644459d0 5713 break;
916ace94 5714 case E_V8HImode:
447443f5 5715 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
644459d0 5716 break;
916ace94 5717 case E_V4SFmode:
447443f5 5718 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
644459d0 5719 break;
916ace94 5720 case E_V4SImode:
447443f5 5721 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
644459d0 5722 break;
916ace94 5723 case E_V2DImode:
447443f5 5724 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
644459d0 5725 break;
916ace94 5726 case E_V2DFmode:
447443f5 5727 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
644459d0 5728 break;
5729 default:
5730 abort ();
5731 }
5732 return;
5733 }
5734
5735 from = spu_gen_subreg (TImode, ops[1]);
5736 rot = gen_reg_rtx (TImode);
5737 tmp = gen_reg_rtx (SImode);
5738
5739 switch (mode)
5740 {
916ace94 5741 case E_V16QImode:
644459d0 5742 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5743 break;
916ace94 5744 case E_V8HImode:
644459d0 5745 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5746 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5747 break;
916ace94 5748 case E_V4SFmode:
5749 case E_V4SImode:
644459d0 5750 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5751 break;
916ace94 5752 case E_V2DImode:
5753 case E_V2DFmode:
644459d0 5754 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5755 break;
5756 default:
5757 abort ();
5758 }
5759 emit_insn (gen_rotqby_ti (rot, from, tmp));
5760
5761 emit_insn (gen_spu_convert (ops[0], rot));
5762}
5763
5764void
5765spu_builtin_insert (rtx ops[])
5766{
3754d046 5767 machine_mode mode = GET_MODE (ops[0]);
5768 machine_mode imode = GET_MODE_INNER (mode);
644459d0 5769 rtx mask = gen_reg_rtx (TImode);
5770 rtx offset;
5771
5772 if (GET_CODE (ops[3]) == CONST_INT)
5773 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5774 else
5775 {
5776 offset = gen_reg_rtx (SImode);
5777 emit_insn (gen_mulsi3
5778 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5779 }
5780 emit_insn (gen_cpat
5781 (mask, stack_pointer_rtx, offset,
5782 GEN_INT (GET_MODE_SIZE (imode))));
5783 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5784}
5785
5786void
5787spu_builtin_promote (rtx ops[])
5788{
3754d046 5789 machine_mode mode, imode;
644459d0 5790 rtx rot, from, offset;
5791 HOST_WIDE_INT pos;
5792
5793 mode = GET_MODE (ops[0]);
5794 imode = GET_MODE_INNER (mode);
5795
5796 from = gen_reg_rtx (TImode);
5797 rot = spu_gen_subreg (TImode, ops[0]);
5798
5799 emit_insn (gen_spu_convert (from, ops[1]));
5800
5801 if (GET_CODE (ops[2]) == CONST_INT)
5802 {
5803 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5804 if (GET_MODE_SIZE (imode) < 4)
5805 pos += 4 - GET_MODE_SIZE (imode);
5806 offset = GEN_INT (pos & 15);
5807 }
5808 else
5809 {
5810 offset = gen_reg_rtx (SImode);
5811 switch (mode)
5812 {
916ace94 5813 case E_V16QImode:
644459d0 5814 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5815 break;
916ace94 5816 case E_V8HImode:
644459d0 5817 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5818 emit_insn (gen_addsi3 (offset, offset, offset));
5819 break;
916ace94 5820 case E_V4SFmode:
5821 case E_V4SImode:
644459d0 5822 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5823 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5824 break;
916ace94 5825 case E_V2DImode:
5826 case E_V2DFmode:
644459d0 5827 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5828 break;
5829 default:
5830 abort ();
5831 }
5832 }
5833 emit_insn (gen_rotqby_ti (rot, from, offset));
5834}
5835
e96f2783 5836static void
5837spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5838{
e96f2783 5839 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5840 rtx shuf = gen_reg_rtx (V4SImode);
5841 rtx insn = gen_reg_rtx (V4SImode);
5842 rtx shufc;
5843 rtx insnc;
5844 rtx mem;
5845
5846 fnaddr = force_reg (SImode, fnaddr);
5847 cxt = force_reg (SImode, cxt);
5848
5849 if (TARGET_LARGE_MEM)
5850 {
5851 rtx rotl = gen_reg_rtx (V4SImode);
5852 rtx mask = gen_reg_rtx (V4SImode);
5853 rtx bi = gen_reg_rtx (SImode);
e96f2783 5854 static unsigned char const shufa[16] = {
644459d0 5855 2, 3, 0, 1, 18, 19, 16, 17,
5856 0, 1, 2, 3, 16, 17, 18, 19
5857 };
e96f2783 5858 static unsigned char const insna[16] = {
644459d0 5859 0x41, 0, 0, 79,
5860 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5861 0x60, 0x80, 0, 79,
5862 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5863 };
5864
5865 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5866 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5867
5868 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5869 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5870 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5871 emit_insn (gen_selb (insn, insnc, rotl, mask));
5872
e96f2783 5873 mem = adjust_address (m_tramp, V4SImode, 0);
5874 emit_move_insn (mem, insn);
644459d0 5875
5876 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5877 mem = adjust_address (m_tramp, Pmode, 16);
5878 emit_move_insn (mem, bi);
644459d0 5879 }
5880 else
5881 {
5882 rtx scxt = gen_reg_rtx (SImode);
5883 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5884 static unsigned char const insna[16] = {
644459d0 5885 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5886 0x30, 0, 0, 0,
5887 0, 0, 0, 0,
5888 0, 0, 0, 0
5889 };
5890
5891 shufc = gen_reg_rtx (TImode);
5892 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5893
5894 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5895 fits 18 bits and the last 4 are zeros. This will be true if
5896 the stack pointer is initialized to 0x3fff0 at program start,
5897 otherwise the ila instruction will be garbage. */
5898
5899 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5900 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5901 emit_insn (gen_cpat
5902 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5903 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5904 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5905
e96f2783 5906 mem = adjust_address (m_tramp, V4SImode, 0);
5907 emit_move_insn (mem, insn);
644459d0 5908 }
5909 emit_insn (gen_sync ());
5910}
5911
08c6cbd2 5912static bool
5913spu_warn_func_return (tree decl)
5914{
5915 /* Naked functions are implemented entirely in assembly, including the
5916 return sequence, so suppress warnings about this. */
5917 return !spu_naked_function_p (decl);
5918}
5919
644459d0 5920void
5921spu_expand_sign_extend (rtx ops[])
5922{
5923 unsigned char arr[16];
5924 rtx pat = gen_reg_rtx (TImode);
5925 rtx sign, c;
5926 int i, last;
5927 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5928 if (GET_MODE (ops[1]) == QImode)
5929 {
5930 sign = gen_reg_rtx (HImode);
5931 emit_insn (gen_extendqihi2 (sign, ops[1]));
5932 for (i = 0; i < 16; i++)
5933 arr[i] = 0x12;
5934 arr[last] = 0x13;
5935 }
5936 else
5937 {
5938 for (i = 0; i < 16; i++)
5939 arr[i] = 0x10;
5940 switch (GET_MODE (ops[1]))
5941 {
916ace94 5942 case E_HImode:
644459d0 5943 sign = gen_reg_rtx (SImode);
5944 emit_insn (gen_extendhisi2 (sign, ops[1]));
5945 arr[last] = 0x03;
5946 arr[last - 1] = 0x02;
5947 break;
916ace94 5948 case E_SImode:
644459d0 5949 sign = gen_reg_rtx (SImode);
5950 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5951 for (i = 0; i < 4; i++)
5952 arr[last - i] = 3 - i;
5953 break;
916ace94 5954 case E_DImode:
644459d0 5955 sign = gen_reg_rtx (SImode);
5956 c = gen_reg_rtx (SImode);
5957 emit_insn (gen_spu_convert (c, ops[1]));
5958 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5959 for (i = 0; i < 8; i++)
5960 arr[last - i] = 7 - i;
5961 break;
5962 default:
5963 abort ();
5964 }
5965 }
5966 emit_move_insn (pat, array_to_constant (TImode, arr));
5967 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5968}
5969
5970/* expand vector initialization. If there are any constant parts,
5971 load constant parts first. Then load any non-constant parts. */
5972void
5973spu_expand_vector_init (rtx target, rtx vals)
5974{
3754d046 5975 machine_mode mode = GET_MODE (target);
644459d0 5976 int n_elts = GET_MODE_NUNITS (mode);
5977 int n_var = 0;
5978 bool all_same = true;
790c536c 5979 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5980 int i;
5981
5982 first = XVECEXP (vals, 0, 0);
5983 for (i = 0; i < n_elts; ++i)
5984 {
5985 x = XVECEXP (vals, 0, i);
e442af0b 5986 if (!(CONST_INT_P (x)
5987 || GET_CODE (x) == CONST_DOUBLE
5988 || GET_CODE (x) == CONST_FIXED))
644459d0 5989 ++n_var;
5990 else
5991 {
5992 if (first_constant == NULL_RTX)
5993 first_constant = x;
5994 }
5995 if (i > 0 && !rtx_equal_p (x, first))
5996 all_same = false;
5997 }
5998
5999 /* if all elements are the same, use splats to repeat elements */
6000 if (all_same)
6001 {
6002 if (!CONSTANT_P (first)
6003 && !register_operand (first, GET_MODE (x)))
6004 first = force_reg (GET_MODE (first), first);
6005 emit_insn (gen_spu_splats (target, first));
6006 return;
6007 }
6008
6009 /* load constant parts */
6010 if (n_var != n_elts)
6011 {
6012 if (n_var == 0)
6013 {
6014 emit_move_insn (target,
6015 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6016 }
6017 else
6018 {
6019 rtx constant_parts_rtx = copy_rtx (vals);
6020
6021 gcc_assert (first_constant != NULL_RTX);
6022 /* fill empty slots with the first constant, this increases
6023 our chance of using splats in the recursive call below. */
6024 for (i = 0; i < n_elts; ++i)
e442af0b 6025 {
6026 x = XVECEXP (constant_parts_rtx, 0, i);
6027 if (!(CONST_INT_P (x)
6028 || GET_CODE (x) == CONST_DOUBLE
6029 || GET_CODE (x) == CONST_FIXED))
6030 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6031 }
644459d0 6032
6033 spu_expand_vector_init (target, constant_parts_rtx);
6034 }
6035 }
6036
6037 /* load variable parts */
6038 if (n_var != 0)
6039 {
6040 rtx insert_operands[4];
6041
6042 insert_operands[0] = target;
6043 insert_operands[2] = target;
6044 for (i = 0; i < n_elts; ++i)
6045 {
6046 x = XVECEXP (vals, 0, i);
e442af0b 6047 if (!(CONST_INT_P (x)
6048 || GET_CODE (x) == CONST_DOUBLE
6049 || GET_CODE (x) == CONST_FIXED))
644459d0 6050 {
6051 if (!register_operand (x, GET_MODE (x)))
6052 x = force_reg (GET_MODE (x), x);
6053 insert_operands[1] = x;
6054 insert_operands[3] = GEN_INT (i);
6055 spu_builtin_insert (insert_operands);
6056 }
6057 }
6058 }
6059}
6352eedf 6060
5474166e 6061/* Return insn index for the vector compare instruction for given CODE,
6062 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6063
6064static int
6065get_vec_cmp_insn (enum rtx_code code,
3754d046 6066 machine_mode dest_mode,
6067 machine_mode op_mode)
5474166e 6068
6069{
6070 switch (code)
6071 {
6072 case EQ:
6073 if (dest_mode == V16QImode && op_mode == V16QImode)
6074 return CODE_FOR_ceq_v16qi;
6075 if (dest_mode == V8HImode && op_mode == V8HImode)
6076 return CODE_FOR_ceq_v8hi;
6077 if (dest_mode == V4SImode && op_mode == V4SImode)
6078 return CODE_FOR_ceq_v4si;
6079 if (dest_mode == V4SImode && op_mode == V4SFmode)
6080 return CODE_FOR_ceq_v4sf;
6081 if (dest_mode == V2DImode && op_mode == V2DFmode)
6082 return CODE_FOR_ceq_v2df;
6083 break;
6084 case GT:
6085 if (dest_mode == V16QImode && op_mode == V16QImode)
6086 return CODE_FOR_cgt_v16qi;
6087 if (dest_mode == V8HImode && op_mode == V8HImode)
6088 return CODE_FOR_cgt_v8hi;
6089 if (dest_mode == V4SImode && op_mode == V4SImode)
6090 return CODE_FOR_cgt_v4si;
6091 if (dest_mode == V4SImode && op_mode == V4SFmode)
6092 return CODE_FOR_cgt_v4sf;
6093 if (dest_mode == V2DImode && op_mode == V2DFmode)
6094 return CODE_FOR_cgt_v2df;
6095 break;
6096 case GTU:
6097 if (dest_mode == V16QImode && op_mode == V16QImode)
6098 return CODE_FOR_clgt_v16qi;
6099 if (dest_mode == V8HImode && op_mode == V8HImode)
6100 return CODE_FOR_clgt_v8hi;
6101 if (dest_mode == V4SImode && op_mode == V4SImode)
6102 return CODE_FOR_clgt_v4si;
6103 break;
6104 default:
6105 break;
6106 }
6107 return -1;
6108}
6109
6110/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6111 DMODE is expected destination mode. This is a recursive function. */
6112
6113static rtx
6114spu_emit_vector_compare (enum rtx_code rcode,
6115 rtx op0, rtx op1,
3754d046 6116 machine_mode dmode)
5474166e 6117{
6118 int vec_cmp_insn;
6119 rtx mask;
3754d046 6120 machine_mode dest_mode;
6121 machine_mode op_mode = GET_MODE (op1);
5474166e 6122
6123 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6124
6125 /* Floating point vector compare instructions uses destination V4SImode.
6126 Double floating point vector compare instructions uses destination V2DImode.
6127 Move destination to appropriate mode later. */
6128 if (dmode == V4SFmode)
6129 dest_mode = V4SImode;
6130 else if (dmode == V2DFmode)
6131 dest_mode = V2DImode;
6132 else
6133 dest_mode = dmode;
6134
6135 mask = gen_reg_rtx (dest_mode);
6136 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6137
6138 if (vec_cmp_insn == -1)
6139 {
6140 bool swap_operands = false;
6141 bool try_again = false;
6142 switch (rcode)
6143 {
6144 case LT:
6145 rcode = GT;
6146 swap_operands = true;
6147 try_again = true;
6148 break;
6149 case LTU:
6150 rcode = GTU;
6151 swap_operands = true;
6152 try_again = true;
6153 break;
6154 case NE:
e20943d4 6155 case UNEQ:
6156 case UNLE:
6157 case UNLT:
6158 case UNGE:
6159 case UNGT:
6160 case UNORDERED:
5474166e 6161 /* Treat A != B as ~(A==B). */
6162 {
e20943d4 6163 enum rtx_code rev_code;
5474166e 6164 enum insn_code nor_code;
e20943d4 6165 rtx rev_mask;
6166
6167 rev_code = reverse_condition_maybe_unordered (rcode);
6168 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6169
d6bf3b14 6170 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6171 gcc_assert (nor_code != CODE_FOR_nothing);
e20943d4 6172 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
5474166e 6173 if (dmode != dest_mode)
6174 {
6175 rtx temp = gen_reg_rtx (dest_mode);
6176 convert_move (temp, mask, 0);
6177 return temp;
6178 }
6179 return mask;
6180 }
6181 break;
6182 case GE:
6183 case GEU:
6184 case LE:
6185 case LEU:
6186 /* Try GT/GTU/LT/LTU OR EQ */
6187 {
6188 rtx c_rtx, eq_rtx;
6189 enum insn_code ior_code;
6190 enum rtx_code new_code;
6191
6192 switch (rcode)
6193 {
6194 case GE: new_code = GT; break;
6195 case GEU: new_code = GTU; break;
6196 case LE: new_code = LT; break;
6197 case LEU: new_code = LTU; break;
6198 default:
6199 gcc_unreachable ();
6200 }
6201
6202 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6203 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6204
d6bf3b14 6205 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6206 gcc_assert (ior_code != CODE_FOR_nothing);
6207 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6208 if (dmode != dest_mode)
6209 {
6210 rtx temp = gen_reg_rtx (dest_mode);
6211 convert_move (temp, mask, 0);
6212 return temp;
6213 }
6214 return mask;
6215 }
6216 break;
e20943d4 6217 case LTGT:
6218 /* Try LT OR GT */
6219 {
6220 rtx lt_rtx, gt_rtx;
6221 enum insn_code ior_code;
6222
6223 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6224 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6225
6226 ior_code = optab_handler (ior_optab, dest_mode);
6227 gcc_assert (ior_code != CODE_FOR_nothing);
6228 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6229 if (dmode != dest_mode)
6230 {
6231 rtx temp = gen_reg_rtx (dest_mode);
6232 convert_move (temp, mask, 0);
6233 return temp;
6234 }
6235 return mask;
6236 }
6237 break;
6238 case ORDERED:
6239 /* Implement as (A==A) & (B==B) */
6240 {
6241 rtx a_rtx, b_rtx;
6242 enum insn_code and_code;
6243
6244 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6245 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6246
6247 and_code = optab_handler (and_optab, dest_mode);
6248 gcc_assert (and_code != CODE_FOR_nothing);
6249 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6250 if (dmode != dest_mode)
6251 {
6252 rtx temp = gen_reg_rtx (dest_mode);
6253 convert_move (temp, mask, 0);
6254 return temp;
6255 }
6256 return mask;
6257 }
6258 break;
5474166e 6259 default:
6260 gcc_unreachable ();
6261 }
6262
6263 /* You only get two chances. */
6264 if (try_again)
6265 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6266
6267 gcc_assert (vec_cmp_insn != -1);
6268
6269 if (swap_operands)
6270 {
6271 rtx tmp;
6272 tmp = op0;
6273 op0 = op1;
6274 op1 = tmp;
6275 }
6276 }
6277
6278 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6279 if (dmode != dest_mode)
6280 {
6281 rtx temp = gen_reg_rtx (dest_mode);
6282 convert_move (temp, mask, 0);
6283 return temp;
6284 }
6285 return mask;
6286}
6287
6288
6289/* Emit vector conditional expression.
6290 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6291 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6292
6293int
6294spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6295 rtx cond, rtx cc_op0, rtx cc_op1)
6296{
3754d046 6297 machine_mode dest_mode = GET_MODE (dest);
5474166e 6298 enum rtx_code rcode = GET_CODE (cond);
6299 rtx mask;
6300
6301 /* Get the vector mask for the given relational operations. */
6302 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6303
6304 emit_insn(gen_selb (dest, op2, op1, mask));
6305
6306 return 1;
6307}
6308
6352eedf 6309static rtx
3754d046 6310spu_force_reg (machine_mode mode, rtx op)
6352eedf 6311{
6312 rtx x, r;
6313 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6314 {
6315 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6316 || GET_MODE (op) == BLKmode)
6317 return force_reg (mode, convert_to_mode (mode, op, 0));
6318 abort ();
6319 }
6320
6321 r = force_reg (GET_MODE (op), op);
6322 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6323 {
6324 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6325 if (x)
6326 return x;
6327 }
6328
6329 x = gen_reg_rtx (mode);
6330 emit_insn (gen_spu_convert (x, r));
6331 return x;
6332}
6333
6334static void
6335spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6336{
6337 HOST_WIDE_INT v = 0;
6338 int lsbits;
6339 /* Check the range of immediate operands. */
6340 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6341 {
6342 int range = p - SPU_BTI_7;
5df189be 6343
6344 if (!CONSTANT_P (op))
bf776685 6345 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6346 d->name,
6347 spu_builtin_range[range].low, spu_builtin_range[range].high);
6348
6349 if (GET_CODE (op) == CONST
6350 && (GET_CODE (XEXP (op, 0)) == PLUS
6351 || GET_CODE (XEXP (op, 0)) == MINUS))
6352 {
6353 v = INTVAL (XEXP (XEXP (op, 0), 1));
6354 op = XEXP (XEXP (op, 0), 0);
6355 }
6356 else if (GET_CODE (op) == CONST_INT)
6357 v = INTVAL (op);
5df189be 6358 else if (GET_CODE (op) == CONST_VECTOR
6359 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6360 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6361
6362 /* The default for v is 0 which is valid in every range. */
6363 if (v < spu_builtin_range[range].low
6364 || v > spu_builtin_range[range].high)
bf776685 6365 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6366 d->name,
6367 spu_builtin_range[range].low, spu_builtin_range[range].high,
6368 v);
6352eedf 6369
6370 switch (p)
6371 {
6372 case SPU_BTI_S10_4:
6373 lsbits = 4;
6374 break;
6375 case SPU_BTI_U16_2:
6376 /* This is only used in lqa, and stqa. Even though the insns
6377 encode 16 bits of the address (all but the 2 least
6378 significant), only 14 bits are used because it is masked to
6379 be 16 byte aligned. */
6380 lsbits = 4;
6381 break;
6382 case SPU_BTI_S16_2:
6383 /* This is used for lqr and stqr. */
6384 lsbits = 2;
6385 break;
6386 default:
6387 lsbits = 0;
6388 }
6389
6390 if (GET_CODE (op) == LABEL_REF
6391 || (GET_CODE (op) == SYMBOL_REF
6392 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6393 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6394 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6395 d->name);
6396 }
6397}
6398
6399
70ca06f8 6400static int
5df189be 6401expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6402 rtx target, rtx ops[])
6403{
bc620c5c 6404 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6405 int i = 0, a;
6352eedf 6406
6407 /* Expand the arguments into rtl. */
6408
6409 if (d->parm[0] != SPU_BTI_VOID)
6410 ops[i++] = target;
6411
70ca06f8 6412 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6413 {
5df189be 6414 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6415 if (arg == 0)
6416 abort ();
b9c74b4d 6417 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6418 }
70ca06f8 6419
32f79657 6420 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6421 return i;
6352eedf 6422}
6423
6424static rtx
6425spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6426 tree exp, rtx target)
6352eedf 6427{
6428 rtx pat;
6429 rtx ops[8];
bc620c5c 6430 enum insn_code icode = (enum insn_code) d->icode;
3754d046 6431 machine_mode mode, tmode;
6352eedf 6432 int i, p;
70ca06f8 6433 int n_operands;
6352eedf 6434 tree return_type;
6435
6436 /* Set up ops[] with values from arglist. */
70ca06f8 6437 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6438
6439 /* Handle the target operand which must be operand 0. */
6440 i = 0;
6441 if (d->parm[0] != SPU_BTI_VOID)
6442 {
6443
6444 /* We prefer the mode specified for the match_operand otherwise
6445 use the mode from the builtin function prototype. */
6446 tmode = insn_data[d->icode].operand[0].mode;
6447 if (tmode == VOIDmode)
6448 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6449
6450 /* Try to use target because not using it can lead to extra copies
6451 and when we are using all of the registers extra copies leads
6452 to extra spills. */
6453 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6454 ops[0] = target;
6455 else
6456 target = ops[0] = gen_reg_rtx (tmode);
6457
6458 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6459 abort ();
6460
6461 i++;
6462 }
6463
a76866d3 6464 if (d->fcode == SPU_MASK_FOR_LOAD)
6465 {
3754d046 6466 machine_mode mode = insn_data[icode].operand[1].mode;
a76866d3 6467 tree arg;
6468 rtx addr, op, pat;
6469
6470 /* get addr */
5df189be 6471 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6472 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6473 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6474 addr = memory_address (mode, op);
6475
6476 /* negate addr */
6477 op = gen_reg_rtx (GET_MODE (addr));
d1f9b275 6478 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
a76866d3 6479 op = gen_rtx_MEM (mode, op);
6480
6481 pat = GEN_FCN (icode) (target, op);
6482 if (!pat)
6483 return 0;
6484 emit_insn (pat);
6485 return target;
6486 }
6487
6352eedf 6488 /* Ignore align_hint, but still expand it's args in case they have
6489 side effects. */
6490 if (icode == CODE_FOR_spu_align_hint)
6491 return 0;
6492
6493 /* Handle the rest of the operands. */
70ca06f8 6494 for (p = 1; i < n_operands; i++, p++)
6352eedf 6495 {
6496 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6497 mode = insn_data[d->icode].operand[i].mode;
6498 else
6499 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6500
6501 /* mode can be VOIDmode here for labels */
6502
6503 /* For specific intrinsics with an immediate operand, e.g.,
6504 si_ai(), we sometimes need to convert the scalar argument to a
6505 vector argument by splatting the scalar. */
6506 if (VECTOR_MODE_P (mode)
6507 && (GET_CODE (ops[i]) == CONST_INT
6508 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6509 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6510 {
6511 if (GET_CODE (ops[i]) == CONST_INT)
6512 ops[i] = spu_const (mode, INTVAL (ops[i]));
6513 else
6514 {
6515 rtx reg = gen_reg_rtx (mode);
3754d046 6516 machine_mode imode = GET_MODE_INNER (mode);
6352eedf 6517 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6518 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6519 if (imode != GET_MODE (ops[i]))
6520 ops[i] = convert_to_mode (imode, ops[i],
6521 TYPE_UNSIGNED (spu_builtin_types
6522 [d->parm[i]]));
6523 emit_insn (gen_spu_splats (reg, ops[i]));
6524 ops[i] = reg;
6525 }
6526 }
6527
5df189be 6528 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6529
6352eedf 6530 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6531 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6532 }
6533
70ca06f8 6534 switch (n_operands)
6352eedf 6535 {
6536 case 0:
6537 pat = GEN_FCN (icode) (0);
6538 break;
6539 case 1:
6540 pat = GEN_FCN (icode) (ops[0]);
6541 break;
6542 case 2:
6543 pat = GEN_FCN (icode) (ops[0], ops[1]);
6544 break;
6545 case 3:
6546 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6547 break;
6548 case 4:
6549 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6550 break;
6551 case 5:
6552 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6553 break;
6554 case 6:
6555 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6556 break;
6557 default:
6558 abort ();
6559 }
6560
6561 if (!pat)
6562 abort ();
6563
6564 if (d->type == B_CALL || d->type == B_BISLED)
6565 emit_call_insn (pat);
6566 else if (d->type == B_JUMP)
6567 {
6568 emit_jump_insn (pat);
6569 emit_barrier ();
6570 }
6571 else
6572 emit_insn (pat);
6573
6574 return_type = spu_builtin_types[d->parm[0]];
6575 if (d->parm[0] != SPU_BTI_VOID
6576 && GET_MODE (target) != TYPE_MODE (return_type))
6577 {
6578 /* target is the return value. It should always be the mode of
6579 the builtin function prototype. */
6580 target = spu_force_reg (TYPE_MODE (return_type), target);
6581 }
6582
6583 return target;
6584}
6585
6586rtx
6587spu_expand_builtin (tree exp,
6588 rtx target,
6589 rtx subtarget ATTRIBUTE_UNUSED,
3754d046 6590 machine_mode mode ATTRIBUTE_UNUSED,
6352eedf 6591 int ignore ATTRIBUTE_UNUSED)
6592{
5df189be 6593 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6594 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6595 struct spu_builtin_description *d;
6596
6597 if (fcode < NUM_SPU_BUILTINS)
6598 {
6599 d = &spu_builtins[fcode];
6600
5df189be 6601 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6602 }
6603 abort ();
6604}
6605
a76866d3 6606/* Implement targetm.vectorize.builtin_mask_for_load. */
6607static tree
6608spu_builtin_mask_for_load (void)
6609{
0c5c4d59 6610 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6611}
5df189be 6612
a28df51d 6613/* Implement targetm.vectorize.builtin_vectorization_cost. */
6614static int
0822b158 6615spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
d13adc77 6616 tree vectype,
0822b158 6617 int misalign ATTRIBUTE_UNUSED)
559093aa 6618{
d13adc77 6619 unsigned elements;
6620
559093aa 6621 switch (type_of_cost)
6622 {
6623 case scalar_stmt:
6624 case vector_stmt:
6625 case vector_load:
6626 case vector_store:
6627 case vec_to_scalar:
6628 case scalar_to_vec:
6629 case cond_branch_not_taken:
6630 case vec_perm:
5df2530b 6631 case vec_promote_demote:
559093aa 6632 return 1;
6633
6634 case scalar_store:
6635 return 10;
6636
6637 case scalar_load:
6638 /* Load + rotate. */
6639 return 2;
6640
6641 case unaligned_load:
72e995da 6642 case vector_gather_load:
6643 case vector_scatter_store:
559093aa 6644 return 2;
6645
6646 case cond_branch_taken:
6647 return 6;
6648
d13adc77 6649 case vec_construct:
6650 elements = TYPE_VECTOR_SUBPARTS (vectype);
6651 return elements / 2 + 1;
6652
559093aa 6653 default:
6654 gcc_unreachable ();
6655 }
a28df51d 6656}
6657
4db2b577 6658/* Implement targetm.vectorize.init_cost. */
6659
61b33788 6660static void *
4db2b577 6661spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6662{
f97dec81 6663 unsigned *cost = XNEWVEC (unsigned, 3);
6664 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
4db2b577 6665 return cost;
6666}
6667
6668/* Implement targetm.vectorize.add_stmt_cost. */
6669
61b33788 6670static unsigned
4db2b577 6671spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
f97dec81 6672 struct _stmt_vec_info *stmt_info, int misalign,
6673 enum vect_cost_model_location where)
4db2b577 6674{
6675 unsigned *cost = (unsigned *) data;
6676 unsigned retval = 0;
6677
6678 if (flag_vect_cost_model)
6679 {
f97dec81 6680 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4db2b577 6681 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6682
6683 /* Statements in an inner loop relative to the loop being
6684 vectorized are weighted more heavily. The value here is
6685 arbitrary and could potentially be improved with analysis. */
f97dec81 6686 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4db2b577 6687 count *= 50; /* FIXME. */
6688
6689 retval = (unsigned) (count * stmt_cost);
f97dec81 6690 cost[where] += retval;
4db2b577 6691 }
6692
6693 return retval;
6694}
6695
6696/* Implement targetm.vectorize.finish_cost. */
6697
f97dec81 6698static void
6699spu_finish_cost (void *data, unsigned *prologue_cost,
6700 unsigned *body_cost, unsigned *epilogue_cost)
4db2b577 6701{
f97dec81 6702 unsigned *cost = (unsigned *) data;
6703 *prologue_cost = cost[vect_prologue];
6704 *body_cost = cost[vect_body];
6705 *epilogue_cost = cost[vect_epilogue];
4db2b577 6706}
6707
6708/* Implement targetm.vectorize.destroy_cost_data. */
6709
61b33788 6710static void
4db2b577 6711spu_destroy_cost_data (void *data)
6712{
6713 free (data);
6714}
6715
0e87db76 6716/* Return true iff, data reference of TYPE can reach vector alignment (16)
6717 after applying N number of iterations. This routine does not determine
6718 how may iterations are required to reach desired alignment. */
6719
6720static bool
a9f1838b 6721spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6722{
6723 if (is_packed)
6724 return false;
6725
6726 /* All other types are naturally aligned. */
6727 return true;
6728}
6729
6cf5579e 6730/* Return the appropriate mode for a named address pointer. */
f77c4496 6731static scalar_int_mode
6cf5579e 6732spu_addr_space_pointer_mode (addr_space_t addrspace)
6733{
6734 switch (addrspace)
6735 {
6736 case ADDR_SPACE_GENERIC:
6737 return ptr_mode;
6738 case ADDR_SPACE_EA:
6739 return EAmode;
6740 default:
6741 gcc_unreachable ();
6742 }
6743}
6744
6745/* Return the appropriate mode for a named address address. */
f77c4496 6746static scalar_int_mode
6cf5579e 6747spu_addr_space_address_mode (addr_space_t addrspace)
6748{
6749 switch (addrspace)
6750 {
6751 case ADDR_SPACE_GENERIC:
6752 return Pmode;
6753 case ADDR_SPACE_EA:
6754 return EAmode;
6755 default:
6756 gcc_unreachable ();
6757 }
6758}
6759
6760/* Determine if one named address space is a subset of another. */
6761
6762static bool
6763spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6764{
6765 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6766 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6767
6768 if (subset == superset)
6769 return true;
6770
6771 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6772 being subsets but instead as disjoint address spaces. */
6773 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6774 return false;
6775
6776 else
6777 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6778}
6779
6780/* Convert from one address space to another. */
6781static rtx
6782spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6783{
6784 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6785 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6786
6787 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6788 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6789
6790 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6791 {
6792 rtx result, ls;
6793
6794 ls = gen_const_mem (DImode,
6795 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6796 set_mem_align (ls, 128);
6797
6798 result = gen_reg_rtx (Pmode);
6799 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6800 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6801 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6802 ls, const0_rtx, Pmode, 1);
6803
6804 emit_insn (gen_subsi3 (result, op, ls));
6805
6806 return result;
6807 }
6808
6809 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6810 {
6811 rtx result, ls;
6812
6813 ls = gen_const_mem (DImode,
6814 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6815 set_mem_align (ls, 128);
6816
6817 result = gen_reg_rtx (EAmode);
6818 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6819 op = force_reg (Pmode, op);
6820 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6821 ls, const0_rtx, EAmode, 1);
6822 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6823
6824 if (EAmode == SImode)
6825 emit_insn (gen_addsi3 (result, op, ls));
6826 else
6827 emit_insn (gen_adddi3 (result, op, ls));
6828
6829 return result;
6830 }
6831
6832 else
6833 gcc_unreachable ();
6834}
6835
6836
d52fd16a 6837/* Count the total number of instructions in each pipe and return the
6838 maximum, which is used as the Minimum Iteration Interval (MII)
6839 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6840 -2 are instructions that can go in pipe0 or pipe1. */
6841static int
6842spu_sms_res_mii (struct ddg *g)
6843{
6844 int i;
6845 unsigned t[4] = {0, 0, 0, 0};
6846
6847 for (i = 0; i < g->num_nodes; i++)
6848 {
0af56f80 6849 rtx_insn *insn = g->nodes[i].insn;
d52fd16a 6850 int p = get_pipe (insn) + 2;
6851
1e944a0b 6852 gcc_assert (p >= 0);
6853 gcc_assert (p < 4);
d52fd16a 6854
6855 t[p]++;
6856 if (dump_file && INSN_P (insn))
6857 fprintf (dump_file, "i%d %s %d %d\n",
6858 INSN_UID (insn),
6859 insn_data[INSN_CODE(insn)].name,
6860 p, t[p]);
6861 }
6862 if (dump_file)
6863 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6864
6865 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6866}
6867
6868
5df189be 6869void
6870spu_init_expanders (void)
9d98604b 6871{
5df189be 6872 if (cfun)
9d98604b 6873 {
6874 rtx r0, r1;
6875 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6876 frame_pointer_needed is true. We don't know that until we're
6877 expanding the prologue. */
6878 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6879
6880 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6881 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6882 to be treated as aligned, so generate them here. */
6883 r0 = gen_reg_rtx (SImode);
6884 r1 = gen_reg_rtx (SImode);
6885 mark_reg_pointer (r0, 128);
6886 mark_reg_pointer (r1, 128);
6887 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6888 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6889 }
ea32e033 6890}
6891
f77c4496 6892static scalar_int_mode
ea32e033 6893spu_libgcc_cmp_return_mode (void)
6894{
6895
6896/* For SPU word mode is TI mode so it is better to use SImode
6897 for compare returns. */
6898 return SImode;
6899}
6900
f77c4496 6901static scalar_int_mode
ea32e033 6902spu_libgcc_shift_count_mode (void)
6903{
6904/* For SPU word mode is TI mode so it is better to use SImode
6905 for shift counts. */
6906 return SImode;
6907}
5a976006 6908
a08dfd55 6909/* Implement targetm.section_type_flags. */
6910static unsigned int
6911spu_section_type_flags (tree decl, const char *name, int reloc)
6912{
6913 /* .toe needs to have type @nobits. */
6914 if (strcmp (name, ".toe") == 0)
6915 return SECTION_BSS;
6cf5579e 6916 /* Don't load _ea into the current address space. */
6917 if (strcmp (name, "._ea") == 0)
6918 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6919 return default_section_type_flags (decl, name, reloc);
6920}
c2233b46 6921
6cf5579e 6922/* Implement targetm.select_section. */
6923static section *
6924spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6925{
6926 /* Variables and constants defined in the __ea address space
6927 go into a special section named "._ea". */
6928 if (TREE_TYPE (decl) != error_mark_node
6929 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6930 {
6931 /* We might get called with string constants, but get_named_section
6932 doesn't like them as they are not DECLs. Also, we need to set
6933 flags in that case. */
6934 if (!DECL_P (decl))
6935 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6936
6937 return get_named_section (decl, "._ea", reloc);
6938 }
6939
6940 return default_elf_select_section (decl, reloc, align);
6941}
6942
6943/* Implement targetm.unique_section. */
6944static void
6945spu_unique_section (tree decl, int reloc)
6946{
6947 /* We don't support unique section names in the __ea address
6948 space for now. */
6949 if (TREE_TYPE (decl) != error_mark_node
6950 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6951 return;
6952
6953 default_unique_section (decl, reloc);
6954}
6955
56c7bfc2 6956/* Generate a constant or register which contains 2^SCALE. We assume
6957 the result is valid for MODE. Currently, MODE must be V4SFmode and
6958 SCALE must be SImode. */
6959rtx
3754d046 6960spu_gen_exp2 (machine_mode mode, rtx scale)
56c7bfc2 6961{
6962 gcc_assert (mode == V4SFmode);
6963 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6964 if (GET_CODE (scale) != CONST_INT)
6965 {
6966 /* unsigned int exp = (127 + scale) << 23;
6967 __vector float m = (__vector float) spu_splats (exp); */
6968 rtx reg = force_reg (SImode, scale);
6969 rtx exp = gen_reg_rtx (SImode);
6970 rtx mul = gen_reg_rtx (mode);
6971 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6972 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6973 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6974 return mul;
6975 }
6976 else
6977 {
6978 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6979 unsigned char arr[16];
6980 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6981 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6982 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6983 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6984 return array_to_constant (mode, arr);
6985 }
6986}
6987
9d98604b 6988/* After reload, just change the convert into a move instruction
6989 or a dead instruction. */
6990void
6991spu_split_convert (rtx ops[])
6992{
6993 if (REGNO (ops[0]) == REGNO (ops[1]))
6994 emit_note (NOTE_INSN_DELETED);
6995 else
6996 {
6997 /* Use TImode always as this might help hard reg copyprop. */
6998 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6999 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7000 emit_insn (gen_move_insn (op0, op1));
7001 }
7002}
7003
b3878a6c 7004void
4cbad5bb 7005spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7006{
7007 fprintf (file, "# profile\n");
7008 fprintf (file, "brsl $75, _mcount\n");
7009}
7010
329c1e4e 7011/* Implement targetm.ref_may_alias_errno. */
7012static bool
7013spu_ref_may_alias_errno (ao_ref *ref)
7014{
7015 tree base = ao_ref_base (ref);
7016
7017 /* With SPU newlib, errno is defined as something like
7018 _impure_data._errno
7019 The default implementation of this target macro does not
7020 recognize such expressions, so special-code for it here. */
7021
7022 if (TREE_CODE (base) == VAR_DECL
7023 && !TREE_STATIC (base)
7024 && DECL_EXTERNAL (base)
7025 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7026 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7027 "_impure_data") == 0
7028 /* _errno is the first member of _impure_data. */
7029 && ref->offset == 0)
7030 return true;
7031
7032 return default_ref_may_alias_errno (ref);
7033}
7034
f17d2d13 7035/* Output thunk to FILE that implements a C++ virtual function call (with
7036 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7037 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7038 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7039 relative to the resulting this pointer. */
7040
7041static void
7042spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7043 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7044 tree function)
7045{
7046 rtx op[8];
7047
7048 /* Make sure unwind info is emitted for the thunk if needed. */
7049 final_start_function (emit_barrier (), file, 1);
7050
7051 /* Operand 0 is the target function. */
7052 op[0] = XEXP (DECL_RTL (function), 0);
7053
7054 /* Operand 1 is the 'this' pointer. */
7055 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7056 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7057 else
7058 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7059
7060 /* Operands 2/3 are the low/high halfwords of delta. */
7061 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7062 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7063
7064 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7065 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7066 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7067
7068 /* Operands 6/7 are temporary registers. */
7069 op[6] = gen_rtx_REG (Pmode, 79);
7070 op[7] = gen_rtx_REG (Pmode, 78);
7071
7072 /* Add DELTA to this pointer. */
7073 if (delta)
7074 {
7075 if (delta >= -0x200 && delta < 0x200)
7076 output_asm_insn ("ai\t%1,%1,%2", op);
7077 else if (delta >= -0x8000 && delta < 0x8000)
7078 {
7079 output_asm_insn ("il\t%6,%2", op);
7080 output_asm_insn ("a\t%1,%1,%6", op);
7081 }
7082 else
7083 {
7084 output_asm_insn ("ilhu\t%6,%3", op);
7085 output_asm_insn ("iohl\t%6,%2", op);
7086 output_asm_insn ("a\t%1,%1,%6", op);
7087 }
7088 }
7089
7090 /* Perform vcall adjustment. */
7091 if (vcall_offset)
7092 {
7093 output_asm_insn ("lqd\t%7,0(%1)", op);
7094 output_asm_insn ("rotqby\t%7,%7,%1", op);
7095
7096 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7097 output_asm_insn ("ai\t%7,%7,%4", op);
7098 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7099 {
7100 output_asm_insn ("il\t%6,%4", op);
7101 output_asm_insn ("a\t%7,%7,%6", op);
7102 }
7103 else
7104 {
7105 output_asm_insn ("ilhu\t%6,%5", op);
7106 output_asm_insn ("iohl\t%6,%4", op);
7107 output_asm_insn ("a\t%7,%7,%6", op);
7108 }
7109
7110 output_asm_insn ("lqd\t%6,0(%7)", op);
7111 output_asm_insn ("rotqby\t%6,%6,%7", op);
7112 output_asm_insn ("a\t%1,%1,%6", op);
7113 }
7114
7115 /* Jump to target. */
7116 output_asm_insn ("br\t%0", op);
7117
7118 final_end_function ();
7119}
7120
d5065e6e 7121/* Canonicalize a comparison from one we don't have to one we do have. */
7122static void
7123spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7124 bool op0_preserve_value)
7125{
7126 if (!op0_preserve_value
7127 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7128 {
7129 rtx tem = *op0;
7130 *op0 = *op1;
7131 *op1 = tem;
7132 *code = (int)swap_condition ((enum rtx_code)*code);
7133 }
7134}
39c0ba8b 7135
7136/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7137 to perform. MEM is the memory on which to operate. VAL is the second
7138 operand of the binary operator. BEFORE and AFTER are optional locations to
7139 return the value of MEM either before of after the operation. */
7140void
7141spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7142 rtx orig_before, rtx orig_after)
7143{
7144 machine_mode mode = GET_MODE (mem);
7145 rtx before = orig_before, after = orig_after;
7146
7147 if (before == NULL_RTX)
7148 before = gen_reg_rtx (mode);
7149
7150 emit_move_insn (before, mem);
7151
7152 if (code == MULT) /* NAND operation */
7153 {
7154 rtx x = expand_simple_binop (mode, AND, before, val,
7155 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7156 after = expand_simple_unop (mode, NOT, x, after, 1);
7157 }
7158 else
7159 {
7160 after = expand_simple_binop (mode, code, before, val,
7161 after, 1, OPTAB_LIB_WIDEN);
7162 }
7163
7164 emit_move_insn (mem, after);
7165
7166 if (orig_after && after != orig_after)
7167 emit_move_insn (orig_after, after);
7168}
7169
5f6dcf1a 7170/* Implement TARGET_MODES_TIEABLE_P. */
7171
7172static bool
7173spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7174{
7175 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7176 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7177}
b56a9dbc 7178
7179/* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7180 in the lowpart of a register, which is only true for SPU. */
7181
7182static bool
7183spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7184{
7185 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7186 || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7187 || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7188}
050dd610 7189
7190/* Implement TARGET_TRULY_NOOP_TRUNCATION. */
7191
7192static bool
e524465a 7193spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
050dd610 7194{
7195 return inprec <= 32 && outprec <= inprec;
7196}
579d67ba 7197
1cdbc719 7198/* Implement TARGET_STATIC_RTX_ALIGNMENT.
7199
7200 Make all static objects 16-byte aligned. This allows us to assume
7201 they are also padded to 16 bytes, which means we can use a single
7202 load or store instruction to access them. */
7203
7204static HOST_WIDE_INT
7205spu_static_rtx_alignment (machine_mode mode)
7206{
7207 return MAX (GET_MODE_ALIGNMENT (mode), 128);
7208}
7209
579d67ba 7210/* Implement TARGET_CONSTANT_ALIGNMENT.
7211
7212 Make all static objects 16-byte aligned. This allows us to assume
7213 they are also padded to 16 bytes, which means we can use a single
7214 load or store instruction to access them. */
7215
7216static HOST_WIDE_INT
7217spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7218{
7219 return MAX (align, 128);
7220}
3defb88e 7221\f
7222/* Table of machine attributes. */
7223static const struct attribute_spec spu_attribute_table[] =
7224{
672bc44d 7225 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7226 affects_type_identity, handler, exclude } */
7227 { "naked", 0, 0, true, false, false, false,
7228 spu_handle_fndecl_attribute, NULL },
7229 { "spu_vector", 0, 0, false, true, false, false,
7230 spu_handle_vector_attribute, NULL },
7231 { NULL, 0, 0, false, false, false, false, NULL, NULL }
3defb88e 7232};
7233
7234/* TARGET overrides. */
7235
e46fbef5 7236#undef TARGET_LRA_P
7237#define TARGET_LRA_P hook_bool_void_false
7238
3defb88e 7239#undef TARGET_ADDR_SPACE_POINTER_MODE
7240#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7241
7242#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7243#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7244
7245#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7246#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7247 spu_addr_space_legitimate_address_p
7248
7249#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7250#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7251
7252#undef TARGET_ADDR_SPACE_SUBSET_P
7253#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7254
7255#undef TARGET_ADDR_SPACE_CONVERT
7256#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7257
7258#undef TARGET_INIT_BUILTINS
7259#define TARGET_INIT_BUILTINS spu_init_builtins
7260#undef TARGET_BUILTIN_DECL
7261#define TARGET_BUILTIN_DECL spu_builtin_decl
7262
7263#undef TARGET_EXPAND_BUILTIN
7264#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7265
7266#undef TARGET_UNWIND_WORD_MODE
7267#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7268
7269#undef TARGET_LEGITIMIZE_ADDRESS
7270#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7271
7272/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7273 and .quad for the debugger. When it is known that the assembler is fixed,
7274 these can be removed. */
7275#undef TARGET_ASM_UNALIGNED_SI_OP
7276#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7277
7278#undef TARGET_ASM_ALIGNED_DI_OP
7279#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7280
7281/* The .8byte directive doesn't seem to work well for a 32 bit
7282 architecture. */
7283#undef TARGET_ASM_UNALIGNED_DI_OP
7284#define TARGET_ASM_UNALIGNED_DI_OP NULL
7285
7286#undef TARGET_RTX_COSTS
7287#define TARGET_RTX_COSTS spu_rtx_costs
7288
7289#undef TARGET_ADDRESS_COST
d9c5e5f4 7290#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3defb88e 7291
7292#undef TARGET_SCHED_ISSUE_RATE
7293#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7294
7295#undef TARGET_SCHED_INIT_GLOBAL
7296#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7297
7298#undef TARGET_SCHED_INIT
7299#define TARGET_SCHED_INIT spu_sched_init
7300
7301#undef TARGET_SCHED_VARIABLE_ISSUE
7302#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7303
7304#undef TARGET_SCHED_REORDER
7305#define TARGET_SCHED_REORDER spu_sched_reorder
7306
7307#undef TARGET_SCHED_REORDER2
7308#define TARGET_SCHED_REORDER2 spu_sched_reorder
7309
7310#undef TARGET_SCHED_ADJUST_COST
7311#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7312
7313#undef TARGET_ATTRIBUTE_TABLE
7314#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7315
7316#undef TARGET_ASM_INTEGER
7317#define TARGET_ASM_INTEGER spu_assemble_integer
7318
7319#undef TARGET_SCALAR_MODE_SUPPORTED_P
7320#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7321
7322#undef TARGET_VECTOR_MODE_SUPPORTED_P
7323#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7324
7325#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7326#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7327
7328#undef TARGET_ASM_GLOBALIZE_LABEL
7329#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7330
7331#undef TARGET_PASS_BY_REFERENCE
7332#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7333
7334#undef TARGET_FUNCTION_ARG
7335#define TARGET_FUNCTION_ARG spu_function_arg
7336
7337#undef TARGET_FUNCTION_ARG_ADVANCE
7338#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7339
8adb95eb 7340#undef TARGET_FUNCTION_ARG_OFFSET
7341#define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7342
d7ab0e3d 7343#undef TARGET_FUNCTION_ARG_PADDING
7344#define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7345
3defb88e 7346#undef TARGET_MUST_PASS_IN_STACK
7347#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7348
7349#undef TARGET_BUILD_BUILTIN_VA_LIST
7350#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7351
7352#undef TARGET_EXPAND_BUILTIN_VA_START
7353#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7354
7355#undef TARGET_SETUP_INCOMING_VARARGS
7356#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7357
7358#undef TARGET_MACHINE_DEPENDENT_REORG
7359#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7360
7361#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7362#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7363
7364#undef TARGET_INIT_LIBFUNCS
7365#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7366
7367#undef TARGET_RETURN_IN_MEMORY
7368#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7369
7370#undef TARGET_ENCODE_SECTION_INFO
7371#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7372
7373#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7374#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7375
7376#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7377#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7378
7379#undef TARGET_VECTORIZE_INIT_COST
7380#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7381
7382#undef TARGET_VECTORIZE_ADD_STMT_COST
7383#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7384
7385#undef TARGET_VECTORIZE_FINISH_COST
7386#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7387
7388#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7389#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7390
7391#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7392#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7393
7394#undef TARGET_LIBGCC_CMP_RETURN_MODE
7395#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7396
7397#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7398#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7399
7400#undef TARGET_SCHED_SMS_RES_MII
7401#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7402
7403#undef TARGET_SECTION_TYPE_FLAGS
7404#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7405
7406#undef TARGET_ASM_SELECT_SECTION
7407#define TARGET_ASM_SELECT_SECTION spu_select_section
7408
7409#undef TARGET_ASM_UNIQUE_SECTION
7410#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7411
7412#undef TARGET_LEGITIMATE_ADDRESS_P
7413#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7414
7415#undef TARGET_LEGITIMATE_CONSTANT_P
7416#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7417
7418#undef TARGET_TRAMPOLINE_INIT
7419#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7420
08c6cbd2 7421#undef TARGET_WARN_FUNC_RETURN
7422#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7423
3defb88e 7424#undef TARGET_OPTION_OVERRIDE
7425#define TARGET_OPTION_OVERRIDE spu_option_override
7426
7427#undef TARGET_CONDITIONAL_REGISTER_USAGE
7428#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7429
7430#undef TARGET_REF_MAY_ALIAS_ERRNO
7431#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7432
7433#undef TARGET_ASM_OUTPUT_MI_THUNK
7434#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7435#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7436#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7437
7438/* Variable tracking should be run after all optimizations which
7439 change order of insns. It also needs a valid CFG. */
7440#undef TARGET_DELAY_VARTRACK
7441#define TARGET_DELAY_VARTRACK true
7442
d5065e6e 7443#undef TARGET_CANONICALIZE_COMPARISON
7444#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7445
5f35dd0e 7446#undef TARGET_CAN_USE_DOLOOP_P
7447#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7448
5f6dcf1a 7449#undef TARGET_MODES_TIEABLE_P
7450#define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7451
74f68e49 7452#undef TARGET_HARD_REGNO_NREGS
7453#define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7454
b56a9dbc 7455#undef TARGET_CAN_CHANGE_MODE_CLASS
7456#define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7457
050dd610 7458#undef TARGET_TRULY_NOOP_TRUNCATION
7459#define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7460
1cdbc719 7461#undef TARGET_STATIC_RTX_ALIGNMENT
7462#define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
579d67ba 7463#undef TARGET_CONSTANT_ALIGNMENT
7464#define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7465
41af5cf4 7466#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
7467#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
7468
3defb88e 7469struct gcc_target targetm = TARGET_INITIALIZER;
7470
c2233b46 7471#include "gt-spu.h"