]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
Adjust scan string for PIE
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
d353bf18 1/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
b20a8bb4 30#include "hash-set.h"
31#include "machmode.h"
32#include "vec.h"
33#include "double-int.h"
34#include "input.h"
35#include "alias.h"
36#include "symtab.h"
37#include "wide-int.h"
38#include "inchash.h"
644459d0 39#include "tree.h"
b20a8bb4 40#include "fold-const.h"
9ed99284 41#include "stringpool.h"
42#include "stor-layout.h"
43#include "calls.h"
44#include "varasm.h"
644459d0 45#include "expr.h"
34517c64 46#include "insn-codes.h"
644459d0 47#include "optabs.h"
48#include "except.h"
a3020f2f 49#include "input.h"
644459d0 50#include "function.h"
51#include "output.h"
94ea8568 52#include "predict.h"
53#include "dominance.h"
54#include "cfg.h"
55#include "cfgrtl.h"
56#include "cfganal.h"
57#include "lcm.h"
58#include "cfgbuild.h"
59#include "cfgcleanup.h"
644459d0 60#include "basic-block.h"
0b205f4c 61#include "diagnostic-core.h"
644459d0 62#include "ggc.h"
644459d0 63#include "tm_p.h"
64#include "target.h"
65#include "target-def.h"
66#include "langhooks.h"
67#include "reload.h"
644459d0 68#include "sched-int.h"
69#include "params.h"
bc61cadb 70#include "hash-table.h"
71#include "tree-ssa-alias.h"
72#include "internal-fn.h"
73#include "gimple-fold.h"
74#include "tree-eh.h"
75#include "gimple-expr.h"
76#include "is-a.h"
e795d6e1 77#include "gimple.h"
a8783bee 78#include "gimplify.h"
644459d0 79#include "tm-constrs.h"
5a976006 80#include "sbitmap.h"
5a976006 81#include "df.h"
94ea8568 82#include "ddg.h"
83#include "timevar.h"
b9ed1410 84#include "dumpfile.h"
a7a0184d 85#include "cfgloop.h"
f7715905 86#include "builtins.h"
6f4e40cd 87#include "rtl-iter.h"
6352eedf 88
89/* Builtin types, data and prototypes. */
c2233b46 90
91enum spu_builtin_type_index
92{
93 SPU_BTI_END_OF_PARAMS,
94
95 /* We create new type nodes for these. */
96 SPU_BTI_V16QI,
97 SPU_BTI_V8HI,
98 SPU_BTI_V4SI,
99 SPU_BTI_V2DI,
100 SPU_BTI_V4SF,
101 SPU_BTI_V2DF,
102 SPU_BTI_UV16QI,
103 SPU_BTI_UV8HI,
104 SPU_BTI_UV4SI,
105 SPU_BTI_UV2DI,
106
107 /* A 16-byte type. (Implemented with V16QI_type_node) */
108 SPU_BTI_QUADWORD,
109
110 /* These all correspond to intSI_type_node */
111 SPU_BTI_7,
112 SPU_BTI_S7,
113 SPU_BTI_U7,
114 SPU_BTI_S10,
115 SPU_BTI_S10_4,
116 SPU_BTI_U14,
117 SPU_BTI_16,
118 SPU_BTI_S16,
119 SPU_BTI_S16_2,
120 SPU_BTI_U16,
121 SPU_BTI_U16_2,
122 SPU_BTI_U18,
123
124 /* These correspond to the standard types */
125 SPU_BTI_INTQI,
126 SPU_BTI_INTHI,
127 SPU_BTI_INTSI,
128 SPU_BTI_INTDI,
129
130 SPU_BTI_UINTQI,
131 SPU_BTI_UINTHI,
132 SPU_BTI_UINTSI,
133 SPU_BTI_UINTDI,
134
135 SPU_BTI_FLOAT,
136 SPU_BTI_DOUBLE,
137
138 SPU_BTI_VOID,
139 SPU_BTI_PTR,
140
141 SPU_BTI_MAX
142};
143
144#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
145#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
146#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
147#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
148#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
149#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
150#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
151#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
152#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
153#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
154
155static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
156
6352eedf 157struct spu_builtin_range
158{
159 int low, high;
160};
161
162static struct spu_builtin_range spu_builtin_range[] = {
163 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
164 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
165 {0ll, 0x7fll}, /* SPU_BTI_U7 */
166 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
167 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
168 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
169 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
170 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
171 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
172 {0ll, 0xffffll}, /* SPU_BTI_U16 */
173 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
174 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
175};
176
644459d0 177\f
178/* Target specific attribute specifications. */
179char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
180
181/* Prototypes and external defs. */
0af56f80 182static int get_pipe (rtx_insn *insn);
644459d0 183static int spu_naked_function_p (tree func);
644459d0 184static int mem_is_padded_component_ref (rtx x);
c7b91b14 185static void fix_range (const char *);
9d98604b 186static rtx spu_expand_load (rtx, rtx, rtx, int);
644459d0 187
5474166e 188/* Which instruction set architecture to use. */
189int spu_arch;
190/* Which cpu are we tuning for. */
191int spu_tune;
192
5a976006 193/* The hardware requires 8 insns between a hint and the branch it
194 effects. This variable describes how many rtl instructions the
195 compiler needs to see before inserting a hint, and then the compiler
196 will insert enough nops to make it at least 8 insns. The default is
197 for the compiler to allow up to 2 nops be emitted. The nops are
198 inserted in pairs, so we round down. */
199int spu_hint_dist = (8*4) - (2*4);
200
644459d0 201enum spu_immediate {
202 SPU_NONE,
203 SPU_IL,
204 SPU_ILA,
205 SPU_ILH,
206 SPU_ILHU,
207 SPU_ORI,
208 SPU_ORHI,
209 SPU_ORBI,
99369027 210 SPU_IOHL
644459d0 211};
dea01258 212enum immediate_class
213{
214 IC_POOL, /* constant pool */
215 IC_IL1, /* one il* instruction */
216 IC_IL2, /* both ilhu and iohl instructions */
217 IC_IL1s, /* one il* instruction */
218 IC_IL2s, /* both ilhu and iohl instructions */
219 IC_FSMBI, /* the fsmbi instruction */
220 IC_CPAT, /* one of the c*d instructions */
5df189be 221 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 222};
644459d0 223
224static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
225static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 226static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
227static enum immediate_class classify_immediate (rtx op,
3754d046 228 machine_mode mode);
644459d0 229
6cf5579e 230/* Pointer mode for __ea references. */
231#define EAmode (spu_ea_model != 32 ? DImode : SImode)
232
ef51d1e3 233\f
5eb28709 234/* Define the structure for the machine field in struct function. */
235struct GTY(()) machine_function
236{
237 /* Register to use for PIC accesses. */
238 rtx pic_reg;
239};
240
241/* How to allocate a 'struct machine_function'. */
242static struct machine_function *
243spu_init_machine_status (void)
244{
25a27413 245 return ggc_cleared_alloc<machine_function> ();
5eb28709 246}
247
4c834714 248/* Implement TARGET_OPTION_OVERRIDE. */
249static void
250spu_option_override (void)
644459d0 251{
5eb28709 252 /* Set up function hooks. */
253 init_machine_status = spu_init_machine_status;
254
14d408d9 255 /* Small loops will be unpeeled at -O3. For SPU it is more important
256 to keep code small by default. */
686e2769 257 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 258 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 259 global_options.x_param_values,
260 global_options_set.x_param_values);
14d408d9 261
644459d0 262 flag_omit_frame_pointer = 1;
263
5a976006 264 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 265 if (align_functions < 8)
266 align_functions = 8;
c7b91b14 267
5a976006 268 spu_hint_dist = 8*4 - spu_max_nops*4;
269 if (spu_hint_dist < 0)
270 spu_hint_dist = 0;
271
c7b91b14 272 if (spu_fixed_range_string)
273 fix_range (spu_fixed_range_string);
5474166e 274
275 /* Determine processor architectural level. */
276 if (spu_arch_string)
277 {
278 if (strcmp (&spu_arch_string[0], "cell") == 0)
279 spu_arch = PROCESSOR_CELL;
280 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
281 spu_arch = PROCESSOR_CELLEDP;
282 else
8e181c9d 283 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 284 }
285
286 /* Determine processor to tune for. */
287 if (spu_tune_string)
288 {
289 if (strcmp (&spu_tune_string[0], "cell") == 0)
290 spu_tune = PROCESSOR_CELL;
291 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
292 spu_tune = PROCESSOR_CELLEDP;
293 else
8e181c9d 294 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 295 }
98bbec1e 296
13684256 297 /* Change defaults according to the processor architecture. */
298 if (spu_arch == PROCESSOR_CELLEDP)
299 {
300 /* If no command line option has been otherwise specified, change
301 the default to -mno-safe-hints on celledp -- only the original
302 Cell/B.E. processors require this workaround. */
303 if (!(target_flags_explicit & MASK_SAFE_HINTS))
304 target_flags &= ~MASK_SAFE_HINTS;
305 }
306
98bbec1e 307 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 308}
309\f
310/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
311 struct attribute_spec.handler. */
312
644459d0 313/* True if MODE is valid for the target. By "valid", we mean able to
314 be manipulated in non-trivial ways. In particular, this means all
315 the arithmetic is supported. */
316static bool
3754d046 317spu_scalar_mode_supported_p (machine_mode mode)
644459d0 318{
319 switch (mode)
320 {
321 case QImode:
322 case HImode:
323 case SImode:
324 case SFmode:
325 case DImode:
326 case TImode:
327 case DFmode:
328 return true;
329
330 default:
331 return false;
332 }
333}
334
335/* Similarly for vector modes. "Supported" here is less strict. At
336 least some operations are supported; need to check optabs or builtins
337 for further details. */
338static bool
3754d046 339spu_vector_mode_supported_p (machine_mode mode)
644459d0 340{
341 switch (mode)
342 {
343 case V16QImode:
344 case V8HImode:
345 case V4SImode:
346 case V2DImode:
347 case V4SFmode:
348 case V2DFmode:
349 return true;
350
351 default:
352 return false;
353 }
354}
355
356/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
357 least significant bytes of the outer mode. This function returns
358 TRUE for the SUBREG's where this is correct. */
359int
360valid_subreg (rtx op)
361{
3754d046 362 machine_mode om = GET_MODE (op);
363 machine_mode im = GET_MODE (SUBREG_REG (op));
644459d0 364 return om != VOIDmode && im != VOIDmode
365 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 366 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
367 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 368}
369
370/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 371 and adjust the start offset. */
644459d0 372static rtx
373adjust_operand (rtx op, HOST_WIDE_INT * start)
374{
3754d046 375 machine_mode mode;
644459d0 376 int op_size;
38aca5eb 377 /* Strip any paradoxical SUBREG. */
378 if (GET_CODE (op) == SUBREG
379 && (GET_MODE_BITSIZE (GET_MODE (op))
380 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 381 {
382 if (start)
383 *start -=
384 GET_MODE_BITSIZE (GET_MODE (op)) -
385 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
386 op = SUBREG_REG (op);
387 }
388 /* If it is smaller than SI, assure a SUBREG */
389 op_size = GET_MODE_BITSIZE (GET_MODE (op));
390 if (op_size < 32)
391 {
392 if (start)
393 *start += 32 - op_size;
394 op_size = 32;
395 }
396 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
397 mode = mode_for_size (op_size, MODE_INT, 0);
398 if (mode != GET_MODE (op))
399 op = gen_rtx_SUBREG (mode, op, 0);
400 return op;
401}
402
403void
404spu_expand_extv (rtx ops[], int unsignedp)
405{
9d98604b 406 rtx dst = ops[0], src = ops[1];
644459d0 407 HOST_WIDE_INT width = INTVAL (ops[2]);
408 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 409 HOST_WIDE_INT align_mask;
410 rtx s0, s1, mask, r0;
644459d0 411
9d98604b 412 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 413
9d98604b 414 if (MEM_P (src))
644459d0 415 {
9d98604b 416 /* First, determine if we need 1 TImode load or 2. We need only 1
417 if the bits being extracted do not cross the alignment boundary
418 as determined by the MEM and its address. */
419
420 align_mask = -MEM_ALIGN (src);
421 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 422 {
9d98604b 423 /* Alignment is sufficient for 1 load. */
424 s0 = gen_reg_rtx (TImode);
425 r0 = spu_expand_load (s0, 0, src, start / 8);
426 start &= 7;
427 if (r0)
428 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 429 }
9d98604b 430 else
431 {
432 /* Need 2 loads. */
433 s0 = gen_reg_rtx (TImode);
434 s1 = gen_reg_rtx (TImode);
435 r0 = spu_expand_load (s0, s1, src, start / 8);
436 start &= 7;
437
438 gcc_assert (start + width <= 128);
439 if (r0)
440 {
441 rtx r1 = gen_reg_rtx (SImode);
442 mask = gen_reg_rtx (TImode);
443 emit_move_insn (mask, GEN_INT (-1));
444 emit_insn (gen_rotqby_ti (s0, s0, r0));
445 emit_insn (gen_rotqby_ti (s1, s1, r0));
446 if (GET_CODE (r0) == CONST_INT)
447 r1 = GEN_INT (INTVAL (r0) & 15);
448 else
449 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
450 emit_insn (gen_shlqby_ti (mask, mask, r1));
451 emit_insn (gen_selb (s0, s1, s0, mask));
452 }
453 }
454
455 }
456 else if (GET_CODE (src) == SUBREG)
457 {
458 rtx r = SUBREG_REG (src);
459 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
460 s0 = gen_reg_rtx (TImode);
461 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
462 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
463 else
464 emit_move_insn (s0, src);
465 }
466 else
467 {
468 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
469 s0 = gen_reg_rtx (TImode);
470 emit_move_insn (s0, src);
644459d0 471 }
472
9d98604b 473 /* Now s0 is TImode and contains the bits to extract at start. */
474
475 if (start)
476 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
477
478 if (128 - width)
f5ff0b21 479 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 480
9d98604b 481 emit_move_insn (dst, s0);
644459d0 482}
483
484void
485spu_expand_insv (rtx ops[])
486{
487 HOST_WIDE_INT width = INTVAL (ops[1]);
488 HOST_WIDE_INT start = INTVAL (ops[2]);
489 HOST_WIDE_INT maskbits;
3754d046 490 machine_mode dst_mode;
644459d0 491 rtx dst = ops[0], src = ops[3];
4cbad5bb 492 int dst_size;
644459d0 493 rtx mask;
494 rtx shift_reg;
495 int shift;
496
497
498 if (GET_CODE (ops[0]) == MEM)
499 dst = gen_reg_rtx (TImode);
500 else
501 dst = adjust_operand (dst, &start);
502 dst_mode = GET_MODE (dst);
503 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
504
505 if (CONSTANT_P (src))
506 {
3754d046 507 machine_mode m =
644459d0 508 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
509 src = force_reg (m, convert_to_mode (m, src, 0));
510 }
511 src = adjust_operand (src, 0);
644459d0 512
513 mask = gen_reg_rtx (dst_mode);
514 shift_reg = gen_reg_rtx (dst_mode);
515 shift = dst_size - start - width;
516
517 /* It's not safe to use subreg here because the compiler assumes
518 that the SUBREG_REG is right justified in the SUBREG. */
519 convert_move (shift_reg, src, 1);
520
521 if (shift > 0)
522 {
523 switch (dst_mode)
524 {
525 case SImode:
526 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
527 break;
528 case DImode:
529 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
530 break;
531 case TImode:
532 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
533 break;
534 default:
535 abort ();
536 }
537 }
538 else if (shift < 0)
539 abort ();
540
541 switch (dst_size)
542 {
543 case 32:
544 maskbits = (-1ll << (32 - width - start));
545 if (start)
546 maskbits += (1ll << (32 - start));
547 emit_move_insn (mask, GEN_INT (maskbits));
548 break;
549 case 64:
550 maskbits = (-1ll << (64 - width - start));
551 if (start)
552 maskbits += (1ll << (64 - start));
553 emit_move_insn (mask, GEN_INT (maskbits));
554 break;
555 case 128:
556 {
557 unsigned char arr[16];
558 int i = start / 8;
559 memset (arr, 0, sizeof (arr));
560 arr[i] = 0xff >> (start & 7);
561 for (i++; i <= (start + width - 1) / 8; i++)
562 arr[i] = 0xff;
563 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
564 emit_move_insn (mask, array_to_constant (TImode, arr));
565 }
566 break;
567 default:
568 abort ();
569 }
570 if (GET_CODE (ops[0]) == MEM)
571 {
644459d0 572 rtx low = gen_reg_rtx (SImode);
644459d0 573 rtx rotl = gen_reg_rtx (SImode);
574 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 575 rtx addr;
576 rtx addr0;
577 rtx addr1;
644459d0 578 rtx mem;
579
9d98604b 580 addr = force_reg (Pmode, XEXP (ops[0], 0));
581 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 582 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
583 emit_insn (gen_negsi2 (rotl, low));
584 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
585 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 586 mem = change_address (ops[0], TImode, addr0);
644459d0 587 set_mem_alias_set (mem, 0);
588 emit_move_insn (dst, mem);
589 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 590 if (start + width > MEM_ALIGN (ops[0]))
591 {
592 rtx shl = gen_reg_rtx (SImode);
593 rtx mask1 = gen_reg_rtx (TImode);
594 rtx dst1 = gen_reg_rtx (TImode);
595 rtx mem1;
29c05e22 596 addr1 = plus_constant (Pmode, addr, 16);
9d98604b 597 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 598 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
599 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 600 mem1 = change_address (ops[0], TImode, addr1);
644459d0 601 set_mem_alias_set (mem1, 0);
602 emit_move_insn (dst1, mem1);
603 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
604 emit_move_insn (mem1, dst1);
605 }
9d98604b 606 emit_move_insn (mem, dst);
644459d0 607 }
608 else
71cd778d 609 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 610}
611
612
613int
614spu_expand_block_move (rtx ops[])
615{
616 HOST_WIDE_INT bytes, align, offset;
617 rtx src, dst, sreg, dreg, target;
618 int i;
619 if (GET_CODE (ops[2]) != CONST_INT
620 || GET_CODE (ops[3]) != CONST_INT
48eb4342 621 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 622 return 0;
623
624 bytes = INTVAL (ops[2]);
625 align = INTVAL (ops[3]);
626
627 if (bytes <= 0)
628 return 1;
629
630 dst = ops[0];
631 src = ops[1];
632
633 if (align == 16)
634 {
635 for (offset = 0; offset + 16 <= bytes; offset += 16)
636 {
637 dst = adjust_address (ops[0], V16QImode, offset);
638 src = adjust_address (ops[1], V16QImode, offset);
639 emit_move_insn (dst, src);
640 }
641 if (offset < bytes)
642 {
643 rtx mask;
644 unsigned char arr[16] = { 0 };
645 for (i = 0; i < bytes - offset; i++)
646 arr[i] = 0xff;
647 dst = adjust_address (ops[0], V16QImode, offset);
648 src = adjust_address (ops[1], V16QImode, offset);
649 mask = gen_reg_rtx (V16QImode);
650 sreg = gen_reg_rtx (V16QImode);
651 dreg = gen_reg_rtx (V16QImode);
652 target = gen_reg_rtx (V16QImode);
653 emit_move_insn (mask, array_to_constant (V16QImode, arr));
654 emit_move_insn (dreg, dst);
655 emit_move_insn (sreg, src);
656 emit_insn (gen_selb (target, dreg, sreg, mask));
657 emit_move_insn (dst, target);
658 }
659 return 1;
660 }
661 return 0;
662}
663
664enum spu_comp_code
665{ SPU_EQ, SPU_GT, SPU_GTU };
666
5474166e 667int spu_comp_icode[12][3] = {
668 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
669 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
670 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
671 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
672 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
673 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
674 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
675 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
676 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
677 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
678 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
679 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 680};
681
682/* Generate a compare for CODE. Return a brand-new rtx that represents
683 the result of the compare. GCC can figure this out too if we don't
684 provide all variations of compares, but GCC always wants to use
685 WORD_MODE, we can generate better code in most cases if we do it
686 ourselves. */
687void
74f4459c 688spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 689{
690 int reverse_compare = 0;
691 int reverse_test = 0;
5d70b918 692 rtx compare_result, eq_result;
693 rtx comp_rtx, eq_rtx;
3754d046 694 machine_mode comp_mode;
695 machine_mode op_mode;
b9c74b4d 696 enum spu_comp_code scode, eq_code;
697 enum insn_code ior_code;
74f4459c 698 enum rtx_code code = GET_CODE (cmp);
699 rtx op0 = XEXP (cmp, 0);
700 rtx op1 = XEXP (cmp, 1);
644459d0 701 int index;
5d70b918 702 int eq_test = 0;
644459d0 703
74f4459c 704 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 705 and so on, to keep the constant in operand 1. */
74f4459c 706 if (GET_CODE (op1) == CONST_INT)
644459d0 707 {
74f4459c 708 HOST_WIDE_INT val = INTVAL (op1) - 1;
709 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 710 switch (code)
711 {
712 case GE:
74f4459c 713 op1 = GEN_INT (val);
644459d0 714 code = GT;
715 break;
716 case LT:
74f4459c 717 op1 = GEN_INT (val);
644459d0 718 code = LE;
719 break;
720 case GEU:
74f4459c 721 op1 = GEN_INT (val);
644459d0 722 code = GTU;
723 break;
724 case LTU:
74f4459c 725 op1 = GEN_INT (val);
644459d0 726 code = LEU;
727 break;
728 default:
729 break;
730 }
731 }
732
686195ea 733 /* However, if we generate an integer result, performing a reverse test
734 would require an extra negation, so avoid that where possible. */
735 if (GET_CODE (op1) == CONST_INT && is_set == 1)
736 {
737 HOST_WIDE_INT val = INTVAL (op1) + 1;
738 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
739 switch (code)
740 {
741 case LE:
742 op1 = GEN_INT (val);
743 code = LT;
744 break;
745 case LEU:
746 op1 = GEN_INT (val);
747 code = LTU;
748 break;
749 default:
750 break;
751 }
752 }
753
5d70b918 754 comp_mode = SImode;
74f4459c 755 op_mode = GET_MODE (op0);
5d70b918 756
644459d0 757 switch (code)
758 {
759 case GE:
644459d0 760 scode = SPU_GT;
07027691 761 if (HONOR_NANS (op_mode))
5d70b918 762 {
763 reverse_compare = 0;
764 reverse_test = 0;
765 eq_test = 1;
766 eq_code = SPU_EQ;
767 }
768 else
769 {
770 reverse_compare = 1;
771 reverse_test = 1;
772 }
644459d0 773 break;
774 case LE:
644459d0 775 scode = SPU_GT;
07027691 776 if (HONOR_NANS (op_mode))
5d70b918 777 {
778 reverse_compare = 1;
779 reverse_test = 0;
780 eq_test = 1;
781 eq_code = SPU_EQ;
782 }
783 else
784 {
785 reverse_compare = 0;
786 reverse_test = 1;
787 }
644459d0 788 break;
789 case LT:
790 reverse_compare = 1;
791 reverse_test = 0;
792 scode = SPU_GT;
793 break;
794 case GEU:
795 reverse_compare = 1;
796 reverse_test = 1;
797 scode = SPU_GTU;
798 break;
799 case LEU:
800 reverse_compare = 0;
801 reverse_test = 1;
802 scode = SPU_GTU;
803 break;
804 case LTU:
805 reverse_compare = 1;
806 reverse_test = 0;
807 scode = SPU_GTU;
808 break;
809 case NE:
810 reverse_compare = 0;
811 reverse_test = 1;
812 scode = SPU_EQ;
813 break;
814
815 case EQ:
816 scode = SPU_EQ;
817 break;
818 case GT:
819 scode = SPU_GT;
820 break;
821 case GTU:
822 scode = SPU_GTU;
823 break;
824 default:
825 scode = SPU_EQ;
826 break;
827 }
828
644459d0 829 switch (op_mode)
830 {
831 case QImode:
832 index = 0;
833 comp_mode = QImode;
834 break;
835 case HImode:
836 index = 1;
837 comp_mode = HImode;
838 break;
839 case SImode:
840 index = 2;
841 break;
842 case DImode:
843 index = 3;
844 break;
845 case TImode:
846 index = 4;
847 break;
848 case SFmode:
849 index = 5;
850 break;
851 case DFmode:
852 index = 6;
853 break;
854 case V16QImode:
5474166e 855 index = 7;
856 comp_mode = op_mode;
857 break;
644459d0 858 case V8HImode:
5474166e 859 index = 8;
860 comp_mode = op_mode;
861 break;
644459d0 862 case V4SImode:
5474166e 863 index = 9;
864 comp_mode = op_mode;
865 break;
644459d0 866 case V4SFmode:
5474166e 867 index = 10;
868 comp_mode = V4SImode;
869 break;
644459d0 870 case V2DFmode:
5474166e 871 index = 11;
872 comp_mode = V2DImode;
644459d0 873 break;
5474166e 874 case V2DImode:
644459d0 875 default:
876 abort ();
877 }
878
74f4459c 879 if (GET_MODE (op1) == DFmode
07027691 880 && (scode != SPU_GT && scode != SPU_EQ))
881 abort ();
644459d0 882
74f4459c 883 if (is_set == 0 && op1 == const0_rtx
884 && (GET_MODE (op0) == SImode
686195ea 885 || GET_MODE (op0) == HImode
886 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
644459d0 887 {
888 /* Don't need to set a register with the result when we are
889 comparing against zero and branching. */
890 reverse_test = !reverse_test;
74f4459c 891 compare_result = op0;
644459d0 892 }
893 else
894 {
895 compare_result = gen_reg_rtx (comp_mode);
896
897 if (reverse_compare)
898 {
74f4459c 899 rtx t = op1;
900 op1 = op0;
901 op0 = t;
644459d0 902 }
903
904 if (spu_comp_icode[index][scode] == 0)
905 abort ();
906
907 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 908 (op0, op_mode))
909 op0 = force_reg (op_mode, op0);
644459d0 910 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 911 (op1, op_mode))
912 op1 = force_reg (op_mode, op1);
644459d0 913 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 914 op0, op1);
644459d0 915 if (comp_rtx == 0)
916 abort ();
917 emit_insn (comp_rtx);
918
5d70b918 919 if (eq_test)
920 {
921 eq_result = gen_reg_rtx (comp_mode);
922 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 923 op0, op1);
5d70b918 924 if (eq_rtx == 0)
925 abort ();
926 emit_insn (eq_rtx);
d6bf3b14 927 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 928 gcc_assert (ior_code != CODE_FOR_nothing);
929 emit_insn (GEN_FCN (ior_code)
930 (compare_result, compare_result, eq_result));
931 }
644459d0 932 }
933
934 if (is_set == 0)
935 {
936 rtx bcomp;
937 rtx loc_ref;
938
939 /* We don't have branch on QI compare insns, so we convert the
940 QI compare result to a HI result. */
941 if (comp_mode == QImode)
942 {
943 rtx old_res = compare_result;
944 compare_result = gen_reg_rtx (HImode);
945 comp_mode = HImode;
946 emit_insn (gen_extendqihi2 (compare_result, old_res));
947 }
948
949 if (reverse_test)
950 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
951 else
952 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
953
74f4459c 954 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 955 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
956 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
957 loc_ref, pc_rtx)));
958 }
959 else if (is_set == 2)
960 {
74f4459c 961 rtx target = operands[0];
644459d0 962 int compare_size = GET_MODE_BITSIZE (comp_mode);
963 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
3754d046 964 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
644459d0 965 rtx select_mask;
966 rtx op_t = operands[2];
967 rtx op_f = operands[3];
968
969 /* The result of the comparison can be SI, HI or QI mode. Create a
970 mask based on that result. */
971 if (target_size > compare_size)
972 {
973 select_mask = gen_reg_rtx (mode);
974 emit_insn (gen_extend_compare (select_mask, compare_result));
975 }
976 else if (target_size < compare_size)
977 select_mask =
978 gen_rtx_SUBREG (mode, compare_result,
979 (compare_size - target_size) / BITS_PER_UNIT);
980 else if (comp_mode != mode)
981 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
982 else
983 select_mask = compare_result;
984
985 if (GET_MODE (target) != GET_MODE (op_t)
986 || GET_MODE (target) != GET_MODE (op_f))
987 abort ();
988
989 if (reverse_test)
990 emit_insn (gen_selb (target, op_t, op_f, select_mask));
991 else
992 emit_insn (gen_selb (target, op_f, op_t, select_mask));
993 }
994 else
995 {
74f4459c 996 rtx target = operands[0];
644459d0 997 if (reverse_test)
998 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
999 gen_rtx_NOT (comp_mode, compare_result)));
1000 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1001 emit_insn (gen_extendhisi2 (target, compare_result));
1002 else if (GET_MODE (target) == SImode
1003 && GET_MODE (compare_result) == QImode)
1004 emit_insn (gen_extend_compare (target, compare_result));
1005 else
1006 emit_move_insn (target, compare_result);
1007 }
1008}
1009
1010HOST_WIDE_INT
1011const_double_to_hwint (rtx x)
1012{
1013 HOST_WIDE_INT val;
1014 REAL_VALUE_TYPE rv;
1015 if (GET_MODE (x) == SFmode)
1016 {
1017 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1018 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1019 }
1020 else if (GET_MODE (x) == DFmode)
1021 {
1022 long l[2];
1023 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1024 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1025 val = l[0];
1026 val = (val << 32) | (l[1] & 0xffffffff);
1027 }
1028 else
1029 abort ();
1030 return val;
1031}
1032
1033rtx
3754d046 1034hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
644459d0 1035{
1036 long tv[2];
1037 REAL_VALUE_TYPE rv;
1038 gcc_assert (mode == SFmode || mode == DFmode);
1039
1040 if (mode == SFmode)
1041 tv[0] = (v << 32) >> 32;
1042 else if (mode == DFmode)
1043 {
1044 tv[1] = (v << 32) >> 32;
1045 tv[0] = v >> 32;
1046 }
1047 real_from_target (&rv, tv, mode);
1048 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1049}
1050
1051void
1052print_operand_address (FILE * file, register rtx addr)
1053{
1054 rtx reg;
1055 rtx offset;
1056
e04cf423 1057 if (GET_CODE (addr) == AND
1058 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1059 && INTVAL (XEXP (addr, 1)) == -16)
1060 addr = XEXP (addr, 0);
1061
644459d0 1062 switch (GET_CODE (addr))
1063 {
1064 case REG:
1065 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1066 break;
1067
1068 case PLUS:
1069 reg = XEXP (addr, 0);
1070 offset = XEXP (addr, 1);
1071 if (GET_CODE (offset) == REG)
1072 {
1073 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1074 reg_names[REGNO (offset)]);
1075 }
1076 else if (GET_CODE (offset) == CONST_INT)
1077 {
1078 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1079 INTVAL (offset), reg_names[REGNO (reg)]);
1080 }
1081 else
1082 abort ();
1083 break;
1084
1085 case CONST:
1086 case LABEL_REF:
1087 case SYMBOL_REF:
1088 case CONST_INT:
1089 output_addr_const (file, addr);
1090 break;
1091
1092 default:
1093 debug_rtx (addr);
1094 abort ();
1095 }
1096}
1097
1098void
1099print_operand (FILE * file, rtx x, int code)
1100{
3754d046 1101 machine_mode mode = GET_MODE (x);
644459d0 1102 HOST_WIDE_INT val;
1103 unsigned char arr[16];
1104 int xcode = GET_CODE (x);
dea01258 1105 int i, info;
644459d0 1106 if (GET_MODE (x) == VOIDmode)
1107 switch (code)
1108 {
644459d0 1109 case 'L': /* 128 bits, signed */
1110 case 'm': /* 128 bits, signed */
1111 case 'T': /* 128 bits, signed */
1112 case 't': /* 128 bits, signed */
1113 mode = TImode;
1114 break;
644459d0 1115 case 'K': /* 64 bits, signed */
1116 case 'k': /* 64 bits, signed */
1117 case 'D': /* 64 bits, signed */
1118 case 'd': /* 64 bits, signed */
1119 mode = DImode;
1120 break;
644459d0 1121 case 'J': /* 32 bits, signed */
1122 case 'j': /* 32 bits, signed */
1123 case 's': /* 32 bits, signed */
1124 case 'S': /* 32 bits, signed */
1125 mode = SImode;
1126 break;
1127 }
1128 switch (code)
1129 {
1130
1131 case 'j': /* 32 bits, signed */
1132 case 'k': /* 64 bits, signed */
1133 case 'm': /* 128 bits, signed */
1134 if (xcode == CONST_INT
1135 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1136 {
1137 gcc_assert (logical_immediate_p (x, mode));
1138 constant_to_array (mode, x, arr);
1139 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1140 val = trunc_int_for_mode (val, SImode);
1141 switch (which_logical_immediate (val))
1142 {
1143 case SPU_ORI:
1144 break;
1145 case SPU_ORHI:
1146 fprintf (file, "h");
1147 break;
1148 case SPU_ORBI:
1149 fprintf (file, "b");
1150 break;
1151 default:
1152 gcc_unreachable();
1153 }
1154 }
1155 else
1156 gcc_unreachable();
1157 return;
1158
1159 case 'J': /* 32 bits, signed */
1160 case 'K': /* 64 bits, signed */
1161 case 'L': /* 128 bits, signed */
1162 if (xcode == CONST_INT
1163 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1164 {
1165 gcc_assert (logical_immediate_p (x, mode)
1166 || iohl_immediate_p (x, mode));
1167 constant_to_array (mode, x, arr);
1168 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1169 val = trunc_int_for_mode (val, SImode);
1170 switch (which_logical_immediate (val))
1171 {
1172 case SPU_ORI:
1173 case SPU_IOHL:
1174 break;
1175 case SPU_ORHI:
1176 val = trunc_int_for_mode (val, HImode);
1177 break;
1178 case SPU_ORBI:
1179 val = trunc_int_for_mode (val, QImode);
1180 break;
1181 default:
1182 gcc_unreachable();
1183 }
1184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1185 }
1186 else
1187 gcc_unreachable();
1188 return;
1189
1190 case 't': /* 128 bits, signed */
1191 case 'd': /* 64 bits, signed */
1192 case 's': /* 32 bits, signed */
dea01258 1193 if (CONSTANT_P (x))
644459d0 1194 {
dea01258 1195 enum immediate_class c = classify_immediate (x, mode);
1196 switch (c)
1197 {
1198 case IC_IL1:
1199 constant_to_array (mode, x, arr);
1200 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1201 val = trunc_int_for_mode (val, SImode);
1202 switch (which_immediate_load (val))
1203 {
1204 case SPU_IL:
1205 break;
1206 case SPU_ILA:
1207 fprintf (file, "a");
1208 break;
1209 case SPU_ILH:
1210 fprintf (file, "h");
1211 break;
1212 case SPU_ILHU:
1213 fprintf (file, "hu");
1214 break;
1215 default:
1216 gcc_unreachable ();
1217 }
1218 break;
1219 case IC_CPAT:
1220 constant_to_array (mode, x, arr);
1221 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1222 if (info == 1)
1223 fprintf (file, "b");
1224 else if (info == 2)
1225 fprintf (file, "h");
1226 else if (info == 4)
1227 fprintf (file, "w");
1228 else if (info == 8)
1229 fprintf (file, "d");
1230 break;
1231 case IC_IL1s:
1232 if (xcode == CONST_VECTOR)
1233 {
1234 x = CONST_VECTOR_ELT (x, 0);
1235 xcode = GET_CODE (x);
1236 }
1237 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1238 fprintf (file, "a");
1239 else if (xcode == HIGH)
1240 fprintf (file, "hu");
1241 break;
1242 case IC_FSMBI:
5df189be 1243 case IC_FSMBI2:
dea01258 1244 case IC_IL2:
1245 case IC_IL2s:
1246 case IC_POOL:
1247 abort ();
1248 }
644459d0 1249 }
644459d0 1250 else
1251 gcc_unreachable ();
1252 return;
1253
1254 case 'T': /* 128 bits, signed */
1255 case 'D': /* 64 bits, signed */
1256 case 'S': /* 32 bits, signed */
dea01258 1257 if (CONSTANT_P (x))
644459d0 1258 {
dea01258 1259 enum immediate_class c = classify_immediate (x, mode);
1260 switch (c)
644459d0 1261 {
dea01258 1262 case IC_IL1:
1263 constant_to_array (mode, x, arr);
1264 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1265 val = trunc_int_for_mode (val, SImode);
1266 switch (which_immediate_load (val))
1267 {
1268 case SPU_IL:
1269 case SPU_ILA:
1270 break;
1271 case SPU_ILH:
1272 case SPU_ILHU:
1273 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1274 break;
1275 default:
1276 gcc_unreachable ();
1277 }
1278 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1279 break;
1280 case IC_FSMBI:
1281 constant_to_array (mode, x, arr);
1282 val = 0;
1283 for (i = 0; i < 16; i++)
1284 {
1285 val <<= 1;
1286 val |= arr[i] & 1;
1287 }
1288 print_operand (file, GEN_INT (val), 0);
1289 break;
1290 case IC_CPAT:
1291 constant_to_array (mode, x, arr);
1292 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1293 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1294 break;
dea01258 1295 case IC_IL1s:
dea01258 1296 if (xcode == HIGH)
5df189be 1297 x = XEXP (x, 0);
1298 if (GET_CODE (x) == CONST_VECTOR)
1299 x = CONST_VECTOR_ELT (x, 0);
1300 output_addr_const (file, x);
1301 if (xcode == HIGH)
1302 fprintf (file, "@h");
644459d0 1303 break;
dea01258 1304 case IC_IL2:
1305 case IC_IL2s:
5df189be 1306 case IC_FSMBI2:
dea01258 1307 case IC_POOL:
1308 abort ();
644459d0 1309 }
c8befdb9 1310 }
644459d0 1311 else
1312 gcc_unreachable ();
1313 return;
1314
644459d0 1315 case 'C':
1316 if (xcode == CONST_INT)
1317 {
1318 /* Only 4 least significant bits are relevant for generate
1319 control word instructions. */
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1321 return;
1322 }
1323 break;
1324
1325 case 'M': /* print code for c*d */
1326 if (GET_CODE (x) == CONST_INT)
1327 switch (INTVAL (x))
1328 {
1329 case 1:
1330 fprintf (file, "b");
1331 break;
1332 case 2:
1333 fprintf (file, "h");
1334 break;
1335 case 4:
1336 fprintf (file, "w");
1337 break;
1338 case 8:
1339 fprintf (file, "d");
1340 break;
1341 default:
1342 gcc_unreachable();
1343 }
1344 else
1345 gcc_unreachable();
1346 return;
1347
1348 case 'N': /* Negate the operand */
1349 if (xcode == CONST_INT)
1350 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1351 else if (xcode == CONST_VECTOR)
1352 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1353 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1354 return;
1355
1356 case 'I': /* enable/disable interrupts */
1357 if (xcode == CONST_INT)
1358 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1359 return;
1360
1361 case 'b': /* branch modifiers */
1362 if (xcode == REG)
1363 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1364 else if (COMPARISON_P (x))
1365 fprintf (file, "%s", xcode == NE ? "n" : "");
1366 return;
1367
1368 case 'i': /* indirect call */
1369 if (xcode == MEM)
1370 {
1371 if (GET_CODE (XEXP (x, 0)) == REG)
1372 /* Used in indirect function calls. */
1373 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1374 else
1375 output_address (XEXP (x, 0));
1376 }
1377 return;
1378
1379 case 'p': /* load/store */
1380 if (xcode == MEM)
1381 {
1382 x = XEXP (x, 0);
1383 xcode = GET_CODE (x);
1384 }
e04cf423 1385 if (xcode == AND)
1386 {
1387 x = XEXP (x, 0);
1388 xcode = GET_CODE (x);
1389 }
644459d0 1390 if (xcode == REG)
1391 fprintf (file, "d");
1392 else if (xcode == CONST_INT)
1393 fprintf (file, "a");
1394 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1395 fprintf (file, "r");
1396 else if (xcode == PLUS || xcode == LO_SUM)
1397 {
1398 if (GET_CODE (XEXP (x, 1)) == REG)
1399 fprintf (file, "x");
1400 else
1401 fprintf (file, "d");
1402 }
1403 return;
1404
5df189be 1405 case 'e':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val &= 0x7;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'f':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val &= 0x1f;
1414 output_addr_const (file, GEN_INT (val));
1415 return;
1416
1417 case 'g':
1418 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1419 val &= 0x3f;
1420 output_addr_const (file, GEN_INT (val));
1421 return;
1422
1423 case 'h':
1424 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1425 val = (val >> 3) & 0x1f;
1426 output_addr_const (file, GEN_INT (val));
1427 return;
1428
1429 case 'E':
1430 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1431 val = -val;
1432 val &= 0x7;
1433 output_addr_const (file, GEN_INT (val));
1434 return;
1435
1436 case 'F':
1437 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1438 val = -val;
1439 val &= 0x1f;
1440 output_addr_const (file, GEN_INT (val));
1441 return;
1442
1443 case 'G':
1444 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1445 val = -val;
1446 val &= 0x3f;
1447 output_addr_const (file, GEN_INT (val));
1448 return;
1449
1450 case 'H':
1451 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1452 val = -(val & -8ll);
1453 val = (val >> 3) & 0x1f;
1454 output_addr_const (file, GEN_INT (val));
1455 return;
1456
56c7bfc2 1457 case 'v':
1458 case 'w':
1459 constant_to_array (mode, x, arr);
1460 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1461 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1462 return;
1463
644459d0 1464 case 0:
1465 if (xcode == REG)
1466 fprintf (file, "%s", reg_names[REGNO (x)]);
1467 else if (xcode == MEM)
1468 output_address (XEXP (x, 0));
1469 else if (xcode == CONST_VECTOR)
dea01258 1470 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1471 else
1472 output_addr_const (file, x);
1473 return;
1474
f6a0d06f 1475 /* unused letters
56c7bfc2 1476 o qr u yz
5df189be 1477 AB OPQR UVWXYZ */
644459d0 1478 default:
1479 output_operand_lossage ("invalid %%xn code");
1480 }
1481 gcc_unreachable ();
1482}
1483
644459d0 1484/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1485 caller saved register. For leaf functions it is more efficient to
1486 use a volatile register because we won't need to save and restore the
1487 pic register. This routine is only valid after register allocation
1488 is completed, so we can pick an unused register. */
1489static rtx
1490get_pic_reg (void)
1491{
644459d0 1492 if (!reload_completed && !reload_in_progress)
1493 abort ();
5eb28709 1494
1495 /* If we've already made the decision, we need to keep with it. Once we've
1496 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1497 return true since the register is now live; this should not cause us to
1498 "switch back" to using pic_offset_table_rtx. */
1499 if (!cfun->machine->pic_reg)
1500 {
d5bf7b64 1501 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
5eb28709 1502 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1503 else
1504 cfun->machine->pic_reg = pic_offset_table_rtx;
1505 }
1506
1507 return cfun->machine->pic_reg;
644459d0 1508}
1509
5df189be 1510/* Split constant addresses to handle cases that are too large.
1511 Add in the pic register when in PIC mode.
1512 Split immediates that require more than 1 instruction. */
dea01258 1513int
1514spu_split_immediate (rtx * ops)
c8befdb9 1515{
3754d046 1516 machine_mode mode = GET_MODE (ops[0]);
dea01258 1517 enum immediate_class c = classify_immediate (ops[1], mode);
1518
1519 switch (c)
c8befdb9 1520 {
dea01258 1521 case IC_IL2:
1522 {
1523 unsigned char arrhi[16];
1524 unsigned char arrlo[16];
98bbec1e 1525 rtx to, temp, hi, lo;
dea01258 1526 int i;
3754d046 1527 machine_mode imode = mode;
98bbec1e 1528 /* We need to do reals as ints because the constant used in the
1529 IOR might not be a legitimate real constant. */
1530 imode = int_mode_for_mode (mode);
dea01258 1531 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1532 if (imode != mode)
1533 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1534 else
1535 to = ops[0];
1536 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1537 for (i = 0; i < 16; i += 4)
1538 {
1539 arrlo[i + 2] = arrhi[i + 2];
1540 arrlo[i + 3] = arrhi[i + 3];
1541 arrlo[i + 0] = arrlo[i + 1] = 0;
1542 arrhi[i + 2] = arrhi[i + 3] = 0;
1543 }
98bbec1e 1544 hi = array_to_constant (imode, arrhi);
1545 lo = array_to_constant (imode, arrlo);
1546 emit_move_insn (temp, hi);
dea01258 1547 emit_insn (gen_rtx_SET
98bbec1e 1548 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1549 return 1;
1550 }
5df189be 1551 case IC_FSMBI2:
1552 {
1553 unsigned char arr_fsmbi[16];
1554 unsigned char arr_andbi[16];
1555 rtx to, reg_fsmbi, reg_and;
1556 int i;
3754d046 1557 machine_mode imode = mode;
5df189be 1558 /* We need to do reals as ints because the constant used in the
1559 * AND might not be a legitimate real constant. */
1560 imode = int_mode_for_mode (mode);
1561 constant_to_array (mode, ops[1], arr_fsmbi);
1562 if (imode != mode)
1563 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1564 else
1565 to = ops[0];
1566 for (i = 0; i < 16; i++)
1567 if (arr_fsmbi[i] != 0)
1568 {
1569 arr_andbi[0] = arr_fsmbi[i];
1570 arr_fsmbi[i] = 0xff;
1571 }
1572 for (i = 1; i < 16; i++)
1573 arr_andbi[i] = arr_andbi[0];
1574 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1575 reg_and = array_to_constant (imode, arr_andbi);
1576 emit_move_insn (to, reg_fsmbi);
1577 emit_insn (gen_rtx_SET
1578 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1579 return 1;
1580 }
dea01258 1581 case IC_POOL:
1582 if (reload_in_progress || reload_completed)
1583 {
1584 rtx mem = force_const_mem (mode, ops[1]);
1585 if (TARGET_LARGE_MEM)
1586 {
1587 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1588 emit_move_insn (addr, XEXP (mem, 0));
1589 mem = replace_equiv_address (mem, addr);
1590 }
1591 emit_move_insn (ops[0], mem);
1592 return 1;
1593 }
1594 break;
1595 case IC_IL1s:
1596 case IC_IL2s:
1597 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1598 {
1599 if (c == IC_IL2s)
1600 {
5df189be 1601 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1602 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1603 }
1604 else if (flag_pic)
1605 emit_insn (gen_pic (ops[0], ops[1]));
1606 if (flag_pic)
1607 {
1608 rtx pic_reg = get_pic_reg ();
1609 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
dea01258 1610 }
1611 return flag_pic || c == IC_IL2s;
1612 }
1613 break;
1614 case IC_IL1:
1615 case IC_FSMBI:
1616 case IC_CPAT:
1617 break;
c8befdb9 1618 }
dea01258 1619 return 0;
c8befdb9 1620}
1621
644459d0 1622/* SAVING is TRUE when we are generating the actual load and store
1623 instructions for REGNO. When determining the size of the stack
1624 needed for saving register we must allocate enough space for the
1625 worst case, because we don't always have the information early enough
1626 to not allocate it. But we can at least eliminate the actual loads
1627 and stores during the prologue/epilogue. */
1628static int
1629need_to_save_reg (int regno, int saving)
1630{
3072d30e 1631 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1632 return 1;
1633 if (flag_pic
1634 && regno == PIC_OFFSET_TABLE_REGNUM
5eb28709 1635 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
644459d0 1636 return 1;
1637 return 0;
1638}
1639
1640/* This function is only correct starting with local register
1641 allocation */
1642int
1643spu_saved_regs_size (void)
1644{
1645 int reg_save_size = 0;
1646 int regno;
1647
1648 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1649 if (need_to_save_reg (regno, 0))
1650 reg_save_size += 0x10;
1651 return reg_save_size;
1652}
1653
0af56f80 1654static rtx_insn *
644459d0 1655frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1656{
1657 rtx reg = gen_rtx_REG (V4SImode, regno);
1658 rtx mem =
1659 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1660 return emit_insn (gen_movv4si (mem, reg));
1661}
1662
0af56f80 1663static rtx_insn *
644459d0 1664frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1665{
1666 rtx reg = gen_rtx_REG (V4SImode, regno);
1667 rtx mem =
1668 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1669 return emit_insn (gen_movv4si (reg, mem));
1670}
1671
1672/* This happens after reload, so we need to expand it. */
0af56f80 1673static rtx_insn *
644459d0 1674frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1675{
0af56f80 1676 rtx_insn *insn;
644459d0 1677 if (satisfies_constraint_K (GEN_INT (imm)))
1678 {
1679 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1680 }
1681 else
1682 {
3072d30e 1683 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1684 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1685 if (REGNO (src) == REGNO (scratch))
1686 abort ();
1687 }
644459d0 1688 return insn;
1689}
1690
1691/* Return nonzero if this function is known to have a null epilogue. */
1692
1693int
1694direct_return (void)
1695{
1696 if (reload_completed)
1697 {
1698 if (cfun->static_chain_decl == 0
1699 && (spu_saved_regs_size ()
1700 + get_frame_size ()
abe32cce 1701 + crtl->outgoing_args_size
1702 + crtl->args.pretend_args_size == 0)
d5bf7b64 1703 && crtl->is_leaf)
644459d0 1704 return 1;
1705 }
1706 return 0;
1707}
1708
1709/*
1710 The stack frame looks like this:
1711 +-------------+
1712 | incoming |
a8e019fa 1713 | args |
1714 AP -> +-------------+
644459d0 1715 | $lr save |
1716 +-------------+
1717 prev SP | back chain |
1718 +-------------+
1719 | var args |
abe32cce 1720 | reg save | crtl->args.pretend_args_size bytes
644459d0 1721 +-------------+
1722 | ... |
1723 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1724 FP -> +-------------+
644459d0 1725 | ... |
a8e019fa 1726 | vars | get_frame_size() bytes
1727 HFP -> +-------------+
644459d0 1728 | ... |
1729 | outgoing |
abe32cce 1730 | args | crtl->outgoing_args_size bytes
644459d0 1731 +-------------+
1732 | $lr of next |
1733 | frame |
1734 +-------------+
a8e019fa 1735 | back chain |
1736 SP -> +-------------+
644459d0 1737
1738*/
1739void
1740spu_expand_prologue (void)
1741{
1742 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1743 HOST_WIDE_INT total_size;
1744 HOST_WIDE_INT saved_regs_size;
1745 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1746 rtx scratch_reg_0, scratch_reg_1;
0af56f80 1747 rtx_insn *insn;
1748 rtx real;
644459d0 1749
5eb28709 1750 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1751 cfun->machine->pic_reg = pic_offset_table_rtx;
644459d0 1752
1753 if (spu_naked_function_p (current_function_decl))
1754 return;
1755
1756 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1757 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1758
1759 saved_regs_size = spu_saved_regs_size ();
1760 total_size = size + saved_regs_size
abe32cce 1761 + crtl->outgoing_args_size
1762 + crtl->args.pretend_args_size;
644459d0 1763
d5bf7b64 1764 if (!crtl->is_leaf
18d50ae6 1765 || cfun->calls_alloca || total_size > 0)
644459d0 1766 total_size += STACK_POINTER_OFFSET;
1767
1768 /* Save this first because code after this might use the link
1769 register as a scratch register. */
d5bf7b64 1770 if (!crtl->is_leaf)
644459d0 1771 {
1772 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1773 RTX_FRAME_RELATED_P (insn) = 1;
1774 }
1775
1776 if (total_size > 0)
1777 {
abe32cce 1778 offset = -crtl->args.pretend_args_size;
644459d0 1779 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1780 if (need_to_save_reg (regno, 1))
1781 {
1782 offset -= 16;
1783 insn = frame_emit_store (regno, sp_reg, offset);
1784 RTX_FRAME_RELATED_P (insn) = 1;
1785 }
1786 }
1787
5eb28709 1788 if (flag_pic && cfun->machine->pic_reg)
644459d0 1789 {
5eb28709 1790 rtx pic_reg = cfun->machine->pic_reg;
644459d0 1791 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1792 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1793 }
1794
1795 if (total_size > 0)
1796 {
1797 if (flag_stack_check)
1798 {
d819917f 1799 /* We compare against total_size-1 because
644459d0 1800 ($sp >= total_size) <=> ($sp > total_size-1) */
1801 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1802 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1803 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1804 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1805 {
1806 emit_move_insn (scratch_v4si, size_v4si);
1807 size_v4si = scratch_v4si;
1808 }
1809 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1810 emit_insn (gen_vec_extractv4si
1811 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1812 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1813 }
1814
1815 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1816 the value of the previous $sp because we save it as the back
1817 chain. */
1818 if (total_size <= 2000)
1819 {
1820 /* In this case we save the back chain first. */
1821 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1822 insn =
1823 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1824 }
644459d0 1825 else
1826 {
1827 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1828 insn =
1829 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1830 }
1831 RTX_FRAME_RELATED_P (insn) = 1;
1832 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1833 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1834
1835 if (total_size > 2000)
1836 {
1837 /* Save the back chain ptr */
1838 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1839 }
1840
1841 if (frame_pointer_needed)
1842 {
1843 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1844 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1845 + crtl->outgoing_args_size;
644459d0 1846 /* Set the new frame_pointer */
d8dfeb55 1847 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1848 RTX_FRAME_RELATED_P (insn) = 1;
1849 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1850 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1851 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1852 }
1853 }
1854
8c0dd614 1855 if (flag_stack_usage_info)
a512540d 1856 current_function_static_stack_size = total_size;
644459d0 1857}
1858
1859void
1860spu_expand_epilogue (bool sibcall_p)
1861{
1862 int size = get_frame_size (), offset, regno;
1863 HOST_WIDE_INT saved_regs_size, total_size;
1864 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 1865 rtx scratch_reg_0;
644459d0 1866
644459d0 1867 if (spu_naked_function_p (current_function_decl))
1868 return;
1869
1870 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1871
1872 saved_regs_size = spu_saved_regs_size ();
1873 total_size = size + saved_regs_size
abe32cce 1874 + crtl->outgoing_args_size
1875 + crtl->args.pretend_args_size;
644459d0 1876
d5bf7b64 1877 if (!crtl->is_leaf
18d50ae6 1878 || cfun->calls_alloca || total_size > 0)
644459d0 1879 total_size += STACK_POINTER_OFFSET;
1880
1881 if (total_size > 0)
1882 {
18d50ae6 1883 if (cfun->calls_alloca)
644459d0 1884 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1885 else
1886 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1887
1888
1889 if (saved_regs_size > 0)
1890 {
abe32cce 1891 offset = -crtl->args.pretend_args_size;
644459d0 1892 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1893 if (need_to_save_reg (regno, 1))
1894 {
1895 offset -= 0x10;
1896 frame_emit_load (regno, sp_reg, offset);
1897 }
1898 }
1899 }
1900
d5bf7b64 1901 if (!crtl->is_leaf)
644459d0 1902 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1903
1904 if (!sibcall_p)
1905 {
18b42941 1906 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 1907 emit_jump_insn (gen__return ());
644459d0 1908 }
644459d0 1909}
1910
1911rtx
1912spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1913{
1914 if (count != 0)
1915 return 0;
1916 /* This is inefficient because it ends up copying to a save-register
1917 which then gets saved even though $lr has already been saved. But
1918 it does generate better code for leaf functions and we don't need
1919 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1920 used for __builtin_return_address anyway, so maybe we don't care if
1921 it's inefficient. */
1922 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1923}
1924\f
1925
1926/* Given VAL, generate a constant appropriate for MODE.
1927 If MODE is a vector mode, every element will be VAL.
1928 For TImode, VAL will be zero extended to 128 bits. */
1929rtx
3754d046 1930spu_const (machine_mode mode, HOST_WIDE_INT val)
644459d0 1931{
1932 rtx inner;
1933 rtvec v;
1934 int units, i;
1935
1936 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1937 || GET_MODE_CLASS (mode) == MODE_FLOAT
1938 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1939 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1940
1941 if (GET_MODE_CLASS (mode) == MODE_INT)
1942 return immed_double_const (val, 0, mode);
1943
1944 /* val is the bit representation of the float */
1945 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1946 return hwint_to_const_double (mode, val);
1947
1948 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1949 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1950 else
1951 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1952
1953 units = GET_MODE_NUNITS (mode);
1954
1955 v = rtvec_alloc (units);
1956
1957 for (i = 0; i < units; ++i)
1958 RTVEC_ELT (v, i) = inner;
1959
1960 return gen_rtx_CONST_VECTOR (mode, v);
1961}
644459d0 1962
5474166e 1963/* Create a MODE vector constant from 4 ints. */
1964rtx
3754d046 1965spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
5474166e 1966{
1967 unsigned char arr[16];
1968 arr[0] = (a >> 24) & 0xff;
1969 arr[1] = (a >> 16) & 0xff;
1970 arr[2] = (a >> 8) & 0xff;
1971 arr[3] = (a >> 0) & 0xff;
1972 arr[4] = (b >> 24) & 0xff;
1973 arr[5] = (b >> 16) & 0xff;
1974 arr[6] = (b >> 8) & 0xff;
1975 arr[7] = (b >> 0) & 0xff;
1976 arr[8] = (c >> 24) & 0xff;
1977 arr[9] = (c >> 16) & 0xff;
1978 arr[10] = (c >> 8) & 0xff;
1979 arr[11] = (c >> 0) & 0xff;
1980 arr[12] = (d >> 24) & 0xff;
1981 arr[13] = (d >> 16) & 0xff;
1982 arr[14] = (d >> 8) & 0xff;
1983 arr[15] = (d >> 0) & 0xff;
1984 return array_to_constant(mode, arr);
1985}
5a976006 1986\f
1987/* branch hint stuff */
5474166e 1988
644459d0 1989/* An array of these is used to propagate hints to predecessor blocks. */
1990struct spu_bb_info
1991{
0af56f80 1992 rtx_insn *prop_jump; /* propagated from another block */
5a976006 1993 int bb_index; /* the original block. */
644459d0 1994};
5a976006 1995static struct spu_bb_info *spu_bb_info;
644459d0 1996
5a976006 1997#define STOP_HINT_P(INSN) \
aa90bb35 1998 (CALL_P(INSN) \
5a976006 1999 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2000 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2001
2002/* 1 when RTX is a hinted branch or its target. We keep track of
2003 what has been hinted so the safe-hint code can test it easily. */
2004#define HINTED_P(RTX) \
2005 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2006
2007/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2008#define SCHED_ON_EVEN_P(RTX) \
2009 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2010
2011/* Emit a nop for INSN such that the two will dual issue. This assumes
2012 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2013 We check for TImode to handle a MULTI1 insn which has dual issued its
b1135d9a 2014 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
5a976006 2015static void
0af56f80 2016emit_nop_for_insn (rtx_insn *insn)
644459d0 2017{
5a976006 2018 int p;
0af56f80 2019 rtx_insn *new_insn;
b1135d9a 2020
2021 /* We need to handle JUMP_TABLE_DATA separately. */
2022 if (JUMP_TABLE_DATA_P (insn))
2023 {
2024 new_insn = emit_insn_after (gen_lnop(), insn);
2025 recog_memoized (new_insn);
2026 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2027 return;
2028 }
2029
5a976006 2030 p = get_pipe (insn);
2031 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2032 new_insn = emit_insn_after (gen_lnop (), insn);
2033 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2034 {
5a976006 2035 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2036 PUT_MODE (new_insn, TImode);
2037 PUT_MODE (insn, VOIDmode);
2038 }
2039 else
2040 new_insn = emit_insn_after (gen_lnop (), insn);
2041 recog_memoized (new_insn);
d53c050c 2042 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
5a976006 2043}
2044
2045/* Insert nops in basic blocks to meet dual issue alignment
2046 requirements. Also make sure hbrp and hint instructions are at least
2047 one cycle apart, possibly inserting a nop. */
2048static void
2049pad_bb(void)
2050{
0af56f80 2051 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
5a976006 2052 int length;
2053 int addr;
2054
2055 /* This sets up INSN_ADDRESSES. */
2056 shorten_branches (get_insns ());
2057
2058 /* Keep track of length added by nops. */
2059 length = 0;
2060
2061 prev_insn = 0;
2062 insn = get_insns ();
2063 if (!active_insn_p (insn))
2064 insn = next_active_insn (insn);
2065 for (; insn; insn = next_insn)
2066 {
2067 next_insn = next_active_insn (insn);
2068 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2069 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2070 {
5a976006 2071 if (hbr_insn)
2072 {
2073 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2074 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2075 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2076 || (a1 - a0 == 4))
2077 {
2078 prev_insn = emit_insn_before (gen_lnop (), insn);
2079 PUT_MODE (prev_insn, GET_MODE (insn));
2080 PUT_MODE (insn, TImode);
d53c050c 2081 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
5a976006 2082 length += 4;
2083 }
2084 }
2085 hbr_insn = insn;
2086 }
4f8e39e2 2087 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
5a976006 2088 {
2089 if (GET_MODE (insn) == TImode)
2090 PUT_MODE (next_insn, TImode);
2091 insn = next_insn;
2092 next_insn = next_active_insn (insn);
2093 }
2094 addr = INSN_ADDRESSES (INSN_UID (insn));
2095 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2096 {
2097 if (((addr + length) & 7) != 0)
2098 {
2099 emit_nop_for_insn (prev_insn);
2100 length += 4;
2101 }
644459d0 2102 }
5a976006 2103 else if (GET_MODE (insn) == TImode
2104 && ((next_insn && GET_MODE (next_insn) != TImode)
2105 || get_attr_type (insn) == TYPE_MULTI0)
2106 && ((addr + length) & 7) != 0)
2107 {
2108 /* prev_insn will always be set because the first insn is
2109 always 8-byte aligned. */
2110 emit_nop_for_insn (prev_insn);
2111 length += 4;
2112 }
2113 prev_insn = insn;
644459d0 2114 }
644459d0 2115}
2116
5a976006 2117\f
2118/* Routines for branch hints. */
2119
644459d0 2120static void
0af56f80 2121spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
5a976006 2122 int distance, sbitmap blocks)
644459d0 2123{
5a976006 2124 rtx branch_label = 0;
0af56f80 2125 rtx_insn *hint;
2126 rtx_insn *insn;
c86d86ff 2127 rtx_jump_table_data *table;
644459d0 2128
2129 if (before == 0 || branch == 0 || target == 0)
2130 return;
2131
5a976006 2132 /* While scheduling we require hints to be no further than 600, so
2133 we need to enforce that here too */
644459d0 2134 if (distance > 600)
2135 return;
2136
5a976006 2137 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2138 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2139 before = NEXT_INSN (before);
644459d0 2140
2141 branch_label = gen_label_rtx ();
2142 LABEL_NUSES (branch_label)++;
2143 LABEL_PRESERVE_P (branch_label) = 1;
2144 insn = emit_label_before (branch_label, branch);
2145 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
08b7917c 2146 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
5a976006 2147
2148 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2149 recog_memoized (hint);
d53c050c 2150 INSN_LOCATION (hint) = INSN_LOCATION (branch);
5a976006 2151 HINTED_P (branch) = 1;
644459d0 2152
5a976006 2153 if (GET_CODE (target) == LABEL_REF)
2154 HINTED_P (XEXP (target, 0)) = 1;
2155 else if (tablejump_p (branch, 0, &table))
644459d0 2156 {
5a976006 2157 rtvec vec;
2158 int j;
2159 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2160 vec = XVEC (PATTERN (table), 0);
2161 else
2162 vec = XVEC (PATTERN (table), 1);
2163 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2164 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2165 }
5a976006 2166
2167 if (distance >= 588)
644459d0 2168 {
5a976006 2169 /* Make sure the hint isn't scheduled any earlier than this point,
2170 which could make it too far for the branch offest to fit */
2fbdf9ef 2171 insn = emit_insn_before (gen_blockage (), hint);
2172 recog_memoized (insn);
d53c050c 2173 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2174 }
2175 else if (distance <= 8 * 4)
2176 {
2177 /* To guarantee at least 8 insns between the hint and branch we
2178 insert nops. */
2179 int d;
2180 for (d = distance; d < 8 * 4; d += 4)
2181 {
2182 insn =
2183 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2184 recog_memoized (insn);
d53c050c 2185 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2186 }
2187
2188 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2189 insn = emit_insn_after (gen_blockage (), hint);
2190 recog_memoized (insn);
d53c050c 2191 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2192
2193 /* Make sure any nops inserted aren't scheduled after the call. */
2194 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2195 {
2196 insn = emit_insn_before (gen_blockage (), branch);
2197 recog_memoized (insn);
d53c050c 2198 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2fbdf9ef 2199 }
644459d0 2200 }
644459d0 2201}
2202
2203/* Returns 0 if we don't want a hint for this branch. Otherwise return
2204 the rtx for the branch target. */
2205static rtx
0af56f80 2206get_branch_target (rtx_insn *branch)
644459d0 2207{
aa90bb35 2208 if (JUMP_P (branch))
644459d0 2209 {
2210 rtx set, src;
2211
2212 /* Return statements */
2213 if (GET_CODE (PATTERN (branch)) == RETURN)
2214 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2215
fcc31b99 2216 /* ASM GOTOs. */
604157f6 2217 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2218 return NULL;
2219
644459d0 2220 set = single_set (branch);
2221 src = SET_SRC (set);
2222 if (GET_CODE (SET_DEST (set)) != PC)
2223 abort ();
2224
2225 if (GET_CODE (src) == IF_THEN_ELSE)
2226 {
2227 rtx lab = 0;
2228 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2229 if (note)
2230 {
2231 /* If the more probable case is not a fall through, then
2232 try a branch hint. */
9eb946de 2233 int prob = XINT (note, 0);
644459d0 2234 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2235 && GET_CODE (XEXP (src, 1)) != PC)
2236 lab = XEXP (src, 1);
2237 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2238 && GET_CODE (XEXP (src, 2)) != PC)
2239 lab = XEXP (src, 2);
2240 }
2241 if (lab)
2242 {
2243 if (GET_CODE (lab) == RETURN)
2244 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2245 return lab;
2246 }
2247 return 0;
2248 }
2249
2250 return src;
2251 }
aa90bb35 2252 else if (CALL_P (branch))
644459d0 2253 {
2254 rtx call;
2255 /* All of our call patterns are in a PARALLEL and the CALL is
2256 the first pattern in the PARALLEL. */
2257 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2258 abort ();
2259 call = XVECEXP (PATTERN (branch), 0, 0);
2260 if (GET_CODE (call) == SET)
2261 call = SET_SRC (call);
2262 if (GET_CODE (call) != CALL)
2263 abort ();
2264 return XEXP (XEXP (call, 0), 0);
2265 }
2266 return 0;
2267}
2268
5a976006 2269/* The special $hbr register is used to prevent the insn scheduler from
2270 moving hbr insns across instructions which invalidate them. It
2271 should only be used in a clobber, and this function searches for
2272 insns which clobber it. */
2273static bool
0af56f80 2274insn_clobbers_hbr (rtx_insn *insn)
5a976006 2275{
2276 if (INSN_P (insn)
2277 && GET_CODE (PATTERN (insn)) == PARALLEL)
2278 {
2279 rtx parallel = PATTERN (insn);
2280 rtx clobber;
2281 int j;
2282 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2283 {
2284 clobber = XVECEXP (parallel, 0, j);
2285 if (GET_CODE (clobber) == CLOBBER
2286 && GET_CODE (XEXP (clobber, 0)) == REG
2287 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2288 return 1;
2289 }
2290 }
2291 return 0;
2292}
2293
2294/* Search up to 32 insns starting at FIRST:
2295 - at any kind of hinted branch, just return
2296 - at any unconditional branch in the first 15 insns, just return
2297 - at a call or indirect branch, after the first 15 insns, force it to
2298 an even address and return
2299 - at any unconditional branch, after the first 15 insns, force it to
2300 an even address.
2301 At then end of the search, insert an hbrp within 4 insns of FIRST,
2302 and an hbrp within 16 instructions of FIRST.
2303 */
644459d0 2304static void
0af56f80 2305insert_hbrp_for_ilb_runout (rtx_insn *first)
644459d0 2306{
0af56f80 2307 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
5a976006 2308 int addr = 0, length, first_addr = -1;
2309 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2310 int insert_lnop_after = 0;
2311 for (insn = first; insn; insn = NEXT_INSN (insn))
2312 if (INSN_P (insn))
2313 {
2314 if (first_addr == -1)
2315 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2316 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2317 length = get_attr_length (insn);
2318
2319 if (before_4 == 0 && addr + length >= 4 * 4)
2320 before_4 = insn;
2321 /* We test for 14 instructions because the first hbrp will add
2322 up to 2 instructions. */
2323 if (before_16 == 0 && addr + length >= 14 * 4)
2324 before_16 = insn;
2325
2326 if (INSN_CODE (insn) == CODE_FOR_hbr)
2327 {
2328 /* Make sure an hbrp is at least 2 cycles away from a hint.
2329 Insert an lnop after the hbrp when necessary. */
2330 if (before_4 == 0 && addr > 0)
2331 {
2332 before_4 = insn;
2333 insert_lnop_after |= 1;
2334 }
2335 else if (before_4 && addr <= 4 * 4)
2336 insert_lnop_after |= 1;
2337 if (before_16 == 0 && addr > 10 * 4)
2338 {
2339 before_16 = insn;
2340 insert_lnop_after |= 2;
2341 }
2342 else if (before_16 && addr <= 14 * 4)
2343 insert_lnop_after |= 2;
2344 }
644459d0 2345
5a976006 2346 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2347 {
2348 if (addr < hbrp_addr0)
2349 hbrp_addr0 = addr;
2350 else if (addr < hbrp_addr1)
2351 hbrp_addr1 = addr;
2352 }
644459d0 2353
5a976006 2354 if (CALL_P (insn) || JUMP_P (insn))
2355 {
2356 if (HINTED_P (insn))
2357 return;
2358
2359 /* Any branch after the first 15 insns should be on an even
2360 address to avoid a special case branch. There might be
2361 some nops and/or hbrps inserted, so we test after 10
2362 insns. */
2363 if (addr > 10 * 4)
2364 SCHED_ON_EVEN_P (insn) = 1;
2365 }
644459d0 2366
5a976006 2367 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2368 return;
2369
2370
2371 if (addr + length >= 32 * 4)
644459d0 2372 {
5a976006 2373 gcc_assert (before_4 && before_16);
2374 if (hbrp_addr0 > 4 * 4)
644459d0 2375 {
5a976006 2376 insn =
2377 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2378 recog_memoized (insn);
d53c050c 2379 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2380 INSN_ADDRESSES_NEW (insn,
2381 INSN_ADDRESSES (INSN_UID (before_4)));
2382 PUT_MODE (insn, GET_MODE (before_4));
2383 PUT_MODE (before_4, TImode);
2384 if (insert_lnop_after & 1)
644459d0 2385 {
5a976006 2386 insn = emit_insn_before (gen_lnop (), before_4);
2387 recog_memoized (insn);
d53c050c 2388 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2389 INSN_ADDRESSES_NEW (insn,
2390 INSN_ADDRESSES (INSN_UID (before_4)));
2391 PUT_MODE (insn, TImode);
644459d0 2392 }
644459d0 2393 }
5a976006 2394 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2395 && hbrp_addr1 > 16 * 4)
644459d0 2396 {
5a976006 2397 insn =
2398 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2399 recog_memoized (insn);
d53c050c 2400 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2401 INSN_ADDRESSES_NEW (insn,
2402 INSN_ADDRESSES (INSN_UID (before_16)));
2403 PUT_MODE (insn, GET_MODE (before_16));
2404 PUT_MODE (before_16, TImode);
2405 if (insert_lnop_after & 2)
644459d0 2406 {
5a976006 2407 insn = emit_insn_before (gen_lnop (), before_16);
2408 recog_memoized (insn);
d53c050c 2409 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2410 INSN_ADDRESSES_NEW (insn,
2411 INSN_ADDRESSES (INSN_UID
2412 (before_16)));
2413 PUT_MODE (insn, TImode);
644459d0 2414 }
2415 }
5a976006 2416 return;
644459d0 2417 }
644459d0 2418 }
5a976006 2419 else if (BARRIER_P (insn))
2420 return;
644459d0 2421
644459d0 2422}
5a976006 2423
2424/* The SPU might hang when it executes 48 inline instructions after a
2425 hinted branch jumps to its hinted target. The beginning of a
851d9296 2426 function and the return from a call might have been hinted, and
2427 must be handled as well. To prevent a hang we insert 2 hbrps. The
2428 first should be within 6 insns of the branch target. The second
2429 should be within 22 insns of the branch target. When determining
2430 if hbrps are necessary, we look for only 32 inline instructions,
2431 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2432 when inserting new hbrps, we insert them within 4 and 16 insns of
2433 the target. */
644459d0 2434static void
5a976006 2435insert_hbrp (void)
644459d0 2436{
0af56f80 2437 rtx_insn *insn;
5a976006 2438 if (TARGET_SAFE_HINTS)
644459d0 2439 {
5a976006 2440 shorten_branches (get_insns ());
2441 /* Insert hbrp at beginning of function */
2442 insn = next_active_insn (get_insns ());
2443 if (insn)
2444 insert_hbrp_for_ilb_runout (insn);
2445 /* Insert hbrp after hinted targets. */
2446 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2447 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2448 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2449 }
644459d0 2450}
2451
5a976006 2452static int in_spu_reorg;
2453
8a42230a 2454static void
2455spu_var_tracking (void)
2456{
2457 if (flag_var_tracking)
2458 {
2459 df_analyze ();
2460 timevar_push (TV_VAR_TRACKING);
2461 variable_tracking_main ();
2462 timevar_pop (TV_VAR_TRACKING);
2463 df_finish_pass (false);
2464 }
2465}
2466
5a976006 2467/* Insert branch hints. There are no branch optimizations after this
2468 pass, so it's safe to set our branch hints now. */
644459d0 2469static void
5a976006 2470spu_machine_dependent_reorg (void)
644459d0 2471{
5a976006 2472 sbitmap blocks;
2473 basic_block bb;
0af56f80 2474 rtx_insn *branch, *insn;
5a976006 2475 rtx branch_target = 0;
2476 int branch_addr = 0, insn_addr, required_dist = 0;
2477 int i;
2478 unsigned int j;
644459d0 2479
5a976006 2480 if (!TARGET_BRANCH_HINTS || optimize == 0)
2481 {
2482 /* We still do it for unoptimized code because an external
2483 function might have hinted a call or return. */
a54ca889 2484 compute_bb_for_insn ();
5a976006 2485 insert_hbrp ();
2486 pad_bb ();
8a42230a 2487 spu_var_tracking ();
a54ca889 2488 free_bb_for_insn ();
5a976006 2489 return;
2490 }
644459d0 2491
fe672ac0 2492 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
53c5d9d4 2493 bitmap_clear (blocks);
644459d0 2494
5a976006 2495 in_spu_reorg = 1;
2496 compute_bb_for_insn ();
2497
a7a0184d 2498 /* (Re-)discover loops so that bb->loop_father can be used
2499 in the analysis below. */
2500 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2501
5a976006 2502 compact_blocks ();
2503
2504 spu_bb_info =
a28770e1 2505 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
5a976006 2506 sizeof (struct spu_bb_info));
2507
2508 /* We need exact insn addresses and lengths. */
2509 shorten_branches (get_insns ());
2510
a28770e1 2511 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
644459d0 2512 {
f5a6b05f 2513 bb = BASIC_BLOCK_FOR_FN (cfun, i);
5a976006 2514 branch = 0;
2515 if (spu_bb_info[i].prop_jump)
644459d0 2516 {
5a976006 2517 branch = spu_bb_info[i].prop_jump;
2518 branch_target = get_branch_target (branch);
2519 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2520 required_dist = spu_hint_dist;
2521 }
2522 /* Search from end of a block to beginning. In this loop, find
2523 jumps which need a branch and emit them only when:
2524 - it's an indirect branch and we're at the insn which sets
2525 the register
2526 - we're at an insn that will invalidate the hint. e.g., a
2527 call, another hint insn, inline asm that clobbers $hbr, and
2528 some inlined operations (divmodsi4). Don't consider jumps
2529 because they are only at the end of a block and are
2530 considered when we are deciding whether to propagate
2531 - we're getting too far away from the branch. The hbr insns
2532 only have a signed 10 bit offset
2533 We go back as far as possible so the branch will be considered
2534 for propagation when we get to the beginning of the block. */
2535 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2536 {
2537 if (INSN_P (insn))
2538 {
2539 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2540 if (branch
2541 && ((GET_CODE (branch_target) == REG
2542 && set_of (branch_target, insn) != NULL_RTX)
2543 || insn_clobbers_hbr (insn)
2544 || branch_addr - insn_addr > 600))
2545 {
0af56f80 2546 rtx_insn *next = NEXT_INSN (insn);
5a976006 2547 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2548 if (insn != BB_END (bb)
2549 && branch_addr - next_addr >= required_dist)
2550 {
2551 if (dump_file)
2552 fprintf (dump_file,
2553 "hint for %i in block %i before %i\n",
2554 INSN_UID (branch), bb->index,
2555 INSN_UID (next));
2556 spu_emit_branch_hint (next, branch, branch_target,
2557 branch_addr - next_addr, blocks);
2558 }
2559 branch = 0;
2560 }
2561
2562 /* JUMP_P will only be true at the end of a block. When
2563 branch is already set it means we've previously decided
2564 to propagate a hint for that branch into this block. */
2565 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2566 {
2567 branch = 0;
2568 if ((branch_target = get_branch_target (insn)))
2569 {
2570 branch = insn;
2571 branch_addr = insn_addr;
2572 required_dist = spu_hint_dist;
2573 }
2574 }
2575 }
2576 if (insn == BB_HEAD (bb))
2577 break;
2578 }
2579
2580 if (branch)
2581 {
2582 /* If we haven't emitted a hint for this branch yet, it might
2583 be profitable to emit it in one of the predecessor blocks,
2584 especially for loops. */
0af56f80 2585 rtx_insn *bbend;
5a976006 2586 basic_block prev = 0, prop = 0, prev2 = 0;
2587 int loop_exit = 0, simple_loop = 0;
2588 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2589
2590 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2591 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2592 prev = EDGE_PRED (bb, j)->src;
2593 else
2594 prev2 = EDGE_PRED (bb, j)->src;
2595
2596 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2597 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2598 loop_exit = 1;
2599 else if (EDGE_SUCC (bb, j)->dest == bb)
2600 simple_loop = 1;
2601
2602 /* If this branch is a loop exit then propagate to previous
2603 fallthru block. This catches the cases when it is a simple
2604 loop or when there is an initial branch into the loop. */
2605 if (prev && (loop_exit || simple_loop)
a7a0184d 2606 && bb_loop_depth (prev) <= bb_loop_depth (bb))
5a976006 2607 prop = prev;
2608
2609 /* If there is only one adjacent predecessor. Don't propagate
a7a0184d 2610 outside this loop. */
5a976006 2611 else if (prev && single_pred_p (bb)
a7a0184d 2612 && prev->loop_father == bb->loop_father)
5a976006 2613 prop = prev;
2614
2615 /* If this is the JOIN block of a simple IF-THEN then
9d75589a 2616 propagate the hint to the HEADER block. */
5a976006 2617 else if (prev && prev2
2618 && EDGE_COUNT (bb->preds) == 2
2619 && EDGE_COUNT (prev->preds) == 1
2620 && EDGE_PRED (prev, 0)->src == prev2
a7a0184d 2621 && prev2->loop_father == bb->loop_father
5a976006 2622 && GET_CODE (branch_target) != REG)
2623 prop = prev;
2624
2625 /* Don't propagate when:
2626 - this is a simple loop and the hint would be too far
2627 - this is not a simple loop and there are 16 insns in
2628 this block already
2629 - the predecessor block ends in a branch that will be
2630 hinted
2631 - the predecessor block ends in an insn that invalidates
2632 the hint */
2633 if (prop
2634 && prop->index >= 0
2635 && (bbend = BB_END (prop))
2636 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2637 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2638 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2639 {
2640 if (dump_file)
2641 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2642 "for %i (loop_exit %i simple_loop %i dist %i)\n",
a7a0184d 2643 bb->index, prop->index, bb_loop_depth (bb),
5a976006 2644 INSN_UID (branch), loop_exit, simple_loop,
2645 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2646
2647 spu_bb_info[prop->index].prop_jump = branch;
2648 spu_bb_info[prop->index].bb_index = i;
2649 }
2650 else if (branch_addr - next_addr >= required_dist)
2651 {
2652 if (dump_file)
2653 fprintf (dump_file, "hint for %i in block %i before %i\n",
2654 INSN_UID (branch), bb->index,
2655 INSN_UID (NEXT_INSN (insn)));
2656 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2657 branch_addr - next_addr, blocks);
2658 }
2659 branch = 0;
644459d0 2660 }
644459d0 2661 }
5a976006 2662 free (spu_bb_info);
644459d0 2663
53c5d9d4 2664 if (!bitmap_empty_p (blocks))
5a976006 2665 find_many_sub_basic_blocks (blocks);
2666
2667 /* We have to schedule to make sure alignment is ok. */
fc00614f 2668 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
5a976006 2669
2670 /* The hints need to be scheduled, so call it again. */
2671 schedule_insns ();
2fbdf9ef 2672 df_finish_pass (true);
5a976006 2673
2674 insert_hbrp ();
2675
2676 pad_bb ();
2677
8f1d58ad 2678 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2679 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2680 {
2681 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2682 between its branch label and the branch . We don't move the
2683 label because GCC expects it at the beginning of the block. */
2684 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2685 rtx label_ref = XVECEXP (unspec, 0, 0);
4cd001d5 2686 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2687 rtx_insn *branch;
8f1d58ad 2688 int offset = 0;
2689 for (branch = NEXT_INSN (label);
2690 !JUMP_P (branch) && !CALL_P (branch);
2691 branch = NEXT_INSN (branch))
2692 if (NONJUMP_INSN_P (branch))
2693 offset += get_attr_length (branch);
2694 if (offset > 0)
29c05e22 2695 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
8f1d58ad 2696 }
5a976006 2697
8a42230a 2698 spu_var_tracking ();
5a976006 2699
a7a0184d 2700 loop_optimizer_finalize ();
2701
5a976006 2702 free_bb_for_insn ();
2703
2704 in_spu_reorg = 0;
644459d0 2705}
2706\f
2707
2708/* Insn scheduling routines, primarily for dual issue. */
2709static int
2710spu_sched_issue_rate (void)
2711{
2712 return 2;
2713}
2714
2715static int
0af56f80 2716uses_ls_unit(rtx_insn *insn)
644459d0 2717{
5a976006 2718 rtx set = single_set (insn);
2719 if (set != 0
2720 && (GET_CODE (SET_DEST (set)) == MEM
2721 || GET_CODE (SET_SRC (set)) == MEM))
2722 return 1;
2723 return 0;
644459d0 2724}
2725
2726static int
0af56f80 2727get_pipe (rtx_insn *insn)
644459d0 2728{
2729 enum attr_type t;
2730 /* Handle inline asm */
2731 if (INSN_CODE (insn) == -1)
2732 return -1;
2733 t = get_attr_type (insn);
2734 switch (t)
2735 {
2736 case TYPE_CONVERT:
2737 return -2;
2738 case TYPE_MULTI0:
2739 return -1;
2740
2741 case TYPE_FX2:
2742 case TYPE_FX3:
2743 case TYPE_SPR:
2744 case TYPE_NOP:
2745 case TYPE_FXB:
2746 case TYPE_FPD:
2747 case TYPE_FP6:
2748 case TYPE_FP7:
644459d0 2749 return 0;
2750
2751 case TYPE_LNOP:
2752 case TYPE_SHUF:
2753 case TYPE_LOAD:
2754 case TYPE_STORE:
2755 case TYPE_BR:
2756 case TYPE_MULTI1:
2757 case TYPE_HBR:
5a976006 2758 case TYPE_IPREFETCH:
644459d0 2759 return 1;
2760 default:
2761 abort ();
2762 }
2763}
2764
5a976006 2765
2766/* haifa-sched.c has a static variable that keeps track of the current
2767 cycle. It is passed to spu_sched_reorder, and we record it here for
2768 use by spu_sched_variable_issue. It won't be accurate if the
2769 scheduler updates it's clock_var between the two calls. */
2770static int clock_var;
2771
2772/* This is used to keep track of insn alignment. Set to 0 at the
2773 beginning of each block and increased by the "length" attr of each
2774 insn scheduled. */
2775static int spu_sched_length;
2776
2777/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2778 ready list appropriately in spu_sched_reorder(). */
2779static int pipe0_clock;
2780static int pipe1_clock;
2781
2782static int prev_clock_var;
2783
2784static int prev_priority;
2785
2786/* The SPU needs to load the next ilb sometime during the execution of
2787 the previous ilb. There is a potential conflict if every cycle has a
2788 load or store. To avoid the conflict we make sure the load/store
2789 unit is free for at least one cycle during the execution of insns in
2790 the previous ilb. */
2791static int spu_ls_first;
2792static int prev_ls_clock;
2793
2794static void
2795spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2796 int max_ready ATTRIBUTE_UNUSED)
2797{
2798 spu_sched_length = 0;
2799}
2800
2801static void
2802spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2803 int max_ready ATTRIBUTE_UNUSED)
2804{
2805 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2806 {
2807 /* When any block might be at least 8-byte aligned, assume they
2808 will all be at least 8-byte aligned to make sure dual issue
2809 works out correctly. */
2810 spu_sched_length = 0;
2811 }
2812 spu_ls_first = INT_MAX;
2813 clock_var = -1;
2814 prev_ls_clock = -1;
2815 pipe0_clock = -1;
2816 pipe1_clock = -1;
2817 prev_clock_var = -1;
2818 prev_priority = -1;
2819}
2820
644459d0 2821static int
5a976006 2822spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
0af56f80 2823 int verbose ATTRIBUTE_UNUSED,
18282db0 2824 rtx_insn *insn, int more)
644459d0 2825{
5a976006 2826 int len;
2827 int p;
644459d0 2828 if (GET_CODE (PATTERN (insn)) == USE
2829 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2830 || (len = get_attr_length (insn)) == 0)
2831 return more;
2832
2833 spu_sched_length += len;
2834
2835 /* Reset on inline asm */
2836 if (INSN_CODE (insn) == -1)
2837 {
2838 spu_ls_first = INT_MAX;
2839 pipe0_clock = -1;
2840 pipe1_clock = -1;
2841 return 0;
2842 }
2843 p = get_pipe (insn);
2844 if (p == 0)
2845 pipe0_clock = clock_var;
2846 else
2847 pipe1_clock = clock_var;
2848
2849 if (in_spu_reorg)
2850 {
2851 if (clock_var - prev_ls_clock > 1
2852 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2853 spu_ls_first = INT_MAX;
2854 if (uses_ls_unit (insn))
2855 {
2856 if (spu_ls_first == INT_MAX)
2857 spu_ls_first = spu_sched_length;
2858 prev_ls_clock = clock_var;
2859 }
2860
2861 /* The scheduler hasn't inserted the nop, but we will later on.
2862 Include those nops in spu_sched_length. */
2863 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2864 spu_sched_length += 4;
2865 prev_clock_var = clock_var;
2866
2867 /* more is -1 when called from spu_sched_reorder for new insns
2868 that don't have INSN_PRIORITY */
2869 if (more >= 0)
2870 prev_priority = INSN_PRIORITY (insn);
2871 }
2872
9d75589a 2873 /* Always try issuing more insns. spu_sched_reorder will decide
5a976006 2874 when the cycle should be advanced. */
2875 return 1;
2876}
2877
2878/* This function is called for both TARGET_SCHED_REORDER and
2879 TARGET_SCHED_REORDER2. */
2880static int
2881spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
b24ef467 2882 rtx_insn **ready, int *nreadyp, int clock)
5a976006 2883{
2884 int i, nready = *nreadyp;
2885 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
b24ef467 2886 rtx_insn *insn;
5a976006 2887
2888 clock_var = clock;
2889
2890 if (nready <= 0 || pipe1_clock >= clock)
2891 return 0;
2892
2893 /* Find any rtl insns that don't generate assembly insns and schedule
2894 them first. */
2895 for (i = nready - 1; i >= 0; i--)
2896 {
2897 insn = ready[i];
2898 if (INSN_CODE (insn) == -1
2899 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 2900 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 2901 {
2902 ready[i] = ready[nready - 1];
2903 ready[nready - 1] = insn;
2904 return 1;
2905 }
2906 }
2907
2908 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2909 for (i = 0; i < nready; i++)
2910 if (INSN_CODE (ready[i]) != -1)
2911 {
2912 insn = ready[i];
2913 switch (get_attr_type (insn))
2914 {
2915 default:
2916 case TYPE_MULTI0:
2917 case TYPE_CONVERT:
2918 case TYPE_FX2:
2919 case TYPE_FX3:
2920 case TYPE_SPR:
2921 case TYPE_NOP:
2922 case TYPE_FXB:
2923 case TYPE_FPD:
2924 case TYPE_FP6:
2925 case TYPE_FP7:
2926 pipe_0 = i;
2927 break;
2928 case TYPE_LOAD:
2929 case TYPE_STORE:
2930 pipe_ls = i;
2931 case TYPE_LNOP:
2932 case TYPE_SHUF:
2933 case TYPE_BR:
2934 case TYPE_MULTI1:
2935 case TYPE_HBR:
2936 pipe_1 = i;
2937 break;
2938 case TYPE_IPREFETCH:
2939 pipe_hbrp = i;
2940 break;
2941 }
2942 }
2943
2944 /* In the first scheduling phase, schedule loads and stores together
2945 to increase the chance they will get merged during postreload CSE. */
2946 if (!reload_completed && pipe_ls >= 0)
2947 {
2948 insn = ready[pipe_ls];
2949 ready[pipe_ls] = ready[nready - 1];
2950 ready[nready - 1] = insn;
2951 return 1;
2952 }
2953
2954 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2955 if (pipe_hbrp >= 0)
2956 pipe_1 = pipe_hbrp;
2957
2958 /* When we have loads/stores in every cycle of the last 15 insns and
2959 we are about to schedule another load/store, emit an hbrp insn
2960 instead. */
2961 if (in_spu_reorg
2962 && spu_sched_length - spu_ls_first >= 4 * 15
2963 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2964 {
2965 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2966 recog_memoized (insn);
2967 if (pipe0_clock < clock)
2968 PUT_MODE (insn, TImode);
2969 spu_sched_variable_issue (file, verbose, insn, -1);
2970 return 0;
2971 }
2972
2973 /* In general, we want to emit nops to increase dual issue, but dual
2974 issue isn't faster when one of the insns could be scheduled later
2975 without effecting the critical path. We look at INSN_PRIORITY to
2976 make a good guess, but it isn't perfect so -mdual-nops=n can be
2977 used to effect it. */
2978 if (in_spu_reorg && spu_dual_nops < 10)
2979 {
9d75589a 2980 /* When we are at an even address and we are not issuing nops to
5a976006 2981 improve scheduling then we need to advance the cycle. */
2982 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2983 && (spu_dual_nops == 0
2984 || (pipe_1 != -1
2985 && prev_priority >
2986 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2987 return 0;
2988
2989 /* When at an odd address, schedule the highest priority insn
2990 without considering pipeline. */
2991 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2992 && (spu_dual_nops == 0
2993 || (prev_priority >
2994 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2995 return 1;
2996 }
2997
2998
2999 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3000 pipe0 insn in the ready list, schedule it. */
3001 if (pipe0_clock < clock && pipe_0 >= 0)
3002 schedule_i = pipe_0;
3003
3004 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3005 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3006 else
3007 schedule_i = pipe_1;
3008
3009 if (schedule_i > -1)
3010 {
3011 insn = ready[schedule_i];
3012 ready[schedule_i] = ready[nready - 1];
3013 ready[nready - 1] = insn;
3014 return 1;
3015 }
3016 return 0;
644459d0 3017}
3018
3019/* INSN is dependent on DEP_INSN. */
3020static int
18282db0 3021spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
644459d0 3022{
5a976006 3023 rtx set;
3024
3025 /* The blockage pattern is used to prevent instructions from being
3026 moved across it and has no cost. */
3027 if (INSN_CODE (insn) == CODE_FOR_blockage
3028 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3029 return 0;
3030
9d98604b 3031 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3032 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3033 return 0;
3034
3035 /* Make sure hbrps are spread out. */
3036 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3037 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3038 return 8;
3039
3040 /* Make sure hints and hbrps are 2 cycles apart. */
3041 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3042 || INSN_CODE (insn) == CODE_FOR_hbr)
3043 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3044 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3045 return 2;
3046
3047 /* An hbrp has no real dependency on other insns. */
3048 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3049 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3050 return 0;
3051
3052 /* Assuming that it is unlikely an argument register will be used in
3053 the first cycle of the called function, we reduce the cost for
3054 slightly better scheduling of dep_insn. When not hinted, the
3055 mispredicted branch would hide the cost as well. */
3056 if (CALL_P (insn))
3057 {
3058 rtx target = get_branch_target (insn);
3059 if (GET_CODE (target) != REG || !set_of (target, insn))
3060 return cost - 2;
3061 return cost;
3062 }
3063
3064 /* And when returning from a function, let's assume the return values
3065 are completed sooner too. */
3066 if (CALL_P (dep_insn))
644459d0 3067 return cost - 2;
5a976006 3068
3069 /* Make sure an instruction that loads from the back chain is schedule
3070 away from the return instruction so a hint is more likely to get
3071 issued. */
3072 if (INSN_CODE (insn) == CODE_FOR__return
3073 && (set = single_set (dep_insn))
3074 && GET_CODE (SET_DEST (set)) == REG
3075 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3076 return 20;
3077
644459d0 3078 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3079 scheduler makes every insn in a block anti-dependent on the final
3080 jump_insn. We adjust here so higher cost insns will get scheduled
3081 earlier. */
5a976006 3082 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3083 return insn_cost (dep_insn) - 3;
5a976006 3084
644459d0 3085 return cost;
3086}
3087\f
3088/* Create a CONST_DOUBLE from a string. */
842ae815 3089rtx
3754d046 3090spu_float_const (const char *string, machine_mode mode)
644459d0 3091{
3092 REAL_VALUE_TYPE value;
3093 value = REAL_VALUE_ATOF (string, mode);
3094 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3095}
3096
644459d0 3097int
3098spu_constant_address_p (rtx x)
3099{
3100 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3101 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3102 || GET_CODE (x) == HIGH);
3103}
3104
3105static enum spu_immediate
3106which_immediate_load (HOST_WIDE_INT val)
3107{
3108 gcc_assert (val == trunc_int_for_mode (val, SImode));
3109
3110 if (val >= -0x8000 && val <= 0x7fff)
3111 return SPU_IL;
3112 if (val >= 0 && val <= 0x3ffff)
3113 return SPU_ILA;
3114 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3115 return SPU_ILH;
3116 if ((val & 0xffff) == 0)
3117 return SPU_ILHU;
3118
3119 return SPU_NONE;
3120}
3121
dea01258 3122/* Return true when OP can be loaded by one of the il instructions, or
3123 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3124int
3754d046 3125immediate_load_p (rtx op, machine_mode mode)
dea01258 3126{
3127 if (CONSTANT_P (op))
3128 {
3129 enum immediate_class c = classify_immediate (op, mode);
5df189be 3130 return c == IC_IL1 || c == IC_IL1s
3072d30e 3131 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3132 }
3133 return 0;
3134}
3135
3136/* Return true if the first SIZE bytes of arr is a constant that can be
3137 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3138 represent the size and offset of the instruction to use. */
3139static int
3140cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3141{
3142 int cpat, run, i, start;
3143 cpat = 1;
3144 run = 0;
3145 start = -1;
3146 for (i = 0; i < size && cpat; i++)
3147 if (arr[i] != i+16)
3148 {
3149 if (!run)
3150 {
3151 start = i;
3152 if (arr[i] == 3)
3153 run = 1;
3154 else if (arr[i] == 2 && arr[i+1] == 3)
3155 run = 2;
3156 else if (arr[i] == 0)
3157 {
3158 while (arr[i+run] == run && i+run < 16)
3159 run++;
3160 if (run != 4 && run != 8)
3161 cpat = 0;
3162 }
3163 else
3164 cpat = 0;
3165 if ((i & (run-1)) != 0)
3166 cpat = 0;
3167 i += run;
3168 }
3169 else
3170 cpat = 0;
3171 }
b01a6dc3 3172 if (cpat && (run || size < 16))
dea01258 3173 {
3174 if (run == 0)
3175 run = 1;
3176 if (prun)
3177 *prun = run;
3178 if (pstart)
3179 *pstart = start == -1 ? 16-run : start;
3180 return 1;
3181 }
3182 return 0;
3183}
3184
3185/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3186 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3187static enum immediate_class
3754d046 3188classify_immediate (rtx op, machine_mode mode)
644459d0 3189{
3190 HOST_WIDE_INT val;
3191 unsigned char arr[16];
5df189be 3192 int i, j, repeated, fsmbi, repeat;
dea01258 3193
3194 gcc_assert (CONSTANT_P (op));
3195
644459d0 3196 if (GET_MODE (op) != VOIDmode)
3197 mode = GET_MODE (op);
3198
dea01258 3199 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3200 if (!flag_pic
3201 && mode == V4SImode
dea01258 3202 && GET_CODE (op) == CONST_VECTOR
3203 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3204 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3205 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3206 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3207 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3208 op = CONST_VECTOR_ELT (op, 0);
644459d0 3209
dea01258 3210 switch (GET_CODE (op))
3211 {
3212 case SYMBOL_REF:
3213 case LABEL_REF:
3214 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3215
dea01258 3216 case CONST:
0cfc65d4 3217 /* We can never know if the resulting address fits in 18 bits and can be
3218 loaded with ila. For now, assume the address will not overflow if
3219 the displacement is "small" (fits 'K' constraint). */
3220 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3221 {
3222 rtx sym = XEXP (XEXP (op, 0), 0);
3223 rtx cst = XEXP (XEXP (op, 0), 1);
3224
3225 if (GET_CODE (sym) == SYMBOL_REF
3226 && GET_CODE (cst) == CONST_INT
3227 && satisfies_constraint_K (cst))
3228 return IC_IL1s;
3229 }
3230 return IC_IL2s;
644459d0 3231
dea01258 3232 case HIGH:
3233 return IC_IL1s;
3234
3235 case CONST_VECTOR:
3236 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3237 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3238 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3239 return IC_POOL;
3240 /* Fall through. */
3241
3242 case CONST_INT:
3243 case CONST_DOUBLE:
3244 constant_to_array (mode, op, arr);
644459d0 3245
dea01258 3246 /* Check that each 4-byte slot is identical. */
3247 repeated = 1;
3248 for (i = 4; i < 16; i += 4)
3249 for (j = 0; j < 4; j++)
3250 if (arr[j] != arr[i + j])
3251 repeated = 0;
3252
3253 if (repeated)
3254 {
3255 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3256 val = trunc_int_for_mode (val, SImode);
3257
3258 if (which_immediate_load (val) != SPU_NONE)
3259 return IC_IL1;
3260 }
3261
3262 /* Any mode of 2 bytes or smaller can be loaded with an il
3263 instruction. */
3264 gcc_assert (GET_MODE_SIZE (mode) > 2);
3265
3266 fsmbi = 1;
5df189be 3267 repeat = 0;
dea01258 3268 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3269 if (arr[i] != 0 && repeat == 0)
3270 repeat = arr[i];
3271 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3272 fsmbi = 0;
3273 if (fsmbi)
5df189be 3274 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3275
3276 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3277 return IC_CPAT;
3278
3279 if (repeated)
3280 return IC_IL2;
3281
3282 return IC_POOL;
3283 default:
3284 break;
3285 }
3286 gcc_unreachable ();
644459d0 3287}
3288
3289static enum spu_immediate
3290which_logical_immediate (HOST_WIDE_INT val)
3291{
3292 gcc_assert (val == trunc_int_for_mode (val, SImode));
3293
3294 if (val >= -0x200 && val <= 0x1ff)
3295 return SPU_ORI;
3296 if (val >= 0 && val <= 0xffff)
3297 return SPU_IOHL;
3298 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3299 {
3300 val = trunc_int_for_mode (val, HImode);
3301 if (val >= -0x200 && val <= 0x1ff)
3302 return SPU_ORHI;
3303 if ((val & 0xff) == ((val >> 8) & 0xff))
3304 {
3305 val = trunc_int_for_mode (val, QImode);
3306 if (val >= -0x200 && val <= 0x1ff)
3307 return SPU_ORBI;
3308 }
3309 }
3310 return SPU_NONE;
3311}
3312
5df189be 3313/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3314 CONST_DOUBLEs. */
3315static int
3316const_vector_immediate_p (rtx x)
3317{
3318 int i;
3319 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3320 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3321 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3322 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3323 return 0;
3324 return 1;
3325}
3326
644459d0 3327int
3754d046 3328logical_immediate_p (rtx op, machine_mode mode)
644459d0 3329{
3330 HOST_WIDE_INT val;
3331 unsigned char arr[16];
3332 int i, j;
3333
3334 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3335 || GET_CODE (op) == CONST_VECTOR);
3336
5df189be 3337 if (GET_CODE (op) == CONST_VECTOR
3338 && !const_vector_immediate_p (op))
3339 return 0;
3340
644459d0 3341 if (GET_MODE (op) != VOIDmode)
3342 mode = GET_MODE (op);
3343
3344 constant_to_array (mode, op, arr);
3345
3346 /* Check that bytes are repeated. */
3347 for (i = 4; i < 16; i += 4)
3348 for (j = 0; j < 4; j++)
3349 if (arr[j] != arr[i + j])
3350 return 0;
3351
3352 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3353 val = trunc_int_for_mode (val, SImode);
3354
3355 i = which_logical_immediate (val);
3356 return i != SPU_NONE && i != SPU_IOHL;
3357}
3358
3359int
3754d046 3360iohl_immediate_p (rtx op, machine_mode mode)
644459d0 3361{
3362 HOST_WIDE_INT val;
3363 unsigned char arr[16];
3364 int i, j;
3365
3366 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3367 || GET_CODE (op) == CONST_VECTOR);
3368
5df189be 3369 if (GET_CODE (op) == CONST_VECTOR
3370 && !const_vector_immediate_p (op))
3371 return 0;
3372
644459d0 3373 if (GET_MODE (op) != VOIDmode)
3374 mode = GET_MODE (op);
3375
3376 constant_to_array (mode, op, arr);
3377
3378 /* Check that bytes are repeated. */
3379 for (i = 4; i < 16; i += 4)
3380 for (j = 0; j < 4; j++)
3381 if (arr[j] != arr[i + j])
3382 return 0;
3383
3384 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3385 val = trunc_int_for_mode (val, SImode);
3386
3387 return val >= 0 && val <= 0xffff;
3388}
3389
3390int
3754d046 3391arith_immediate_p (rtx op, machine_mode mode,
644459d0 3392 HOST_WIDE_INT low, HOST_WIDE_INT high)
3393{
3394 HOST_WIDE_INT val;
3395 unsigned char arr[16];
3396 int bytes, i, j;
3397
3398 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3399 || GET_CODE (op) == CONST_VECTOR);
3400
5df189be 3401 if (GET_CODE (op) == CONST_VECTOR
3402 && !const_vector_immediate_p (op))
3403 return 0;
3404
644459d0 3405 if (GET_MODE (op) != VOIDmode)
3406 mode = GET_MODE (op);
3407
3408 constant_to_array (mode, op, arr);
3409
3410 if (VECTOR_MODE_P (mode))
3411 mode = GET_MODE_INNER (mode);
3412
3413 bytes = GET_MODE_SIZE (mode);
3414 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3415
3416 /* Check that bytes are repeated. */
3417 for (i = bytes; i < 16; i += bytes)
3418 for (j = 0; j < bytes; j++)
3419 if (arr[j] != arr[i + j])
3420 return 0;
3421
3422 val = arr[0];
3423 for (j = 1; j < bytes; j++)
3424 val = (val << 8) | arr[j];
3425
3426 val = trunc_int_for_mode (val, mode);
3427
3428 return val >= low && val <= high;
3429}
3430
56c7bfc2 3431/* TRUE when op is an immediate and an exact power of 2, and given that
3432 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3433 all entries must be the same. */
3434bool
3754d046 3435exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
56c7bfc2 3436{
3754d046 3437 machine_mode int_mode;
56c7bfc2 3438 HOST_WIDE_INT val;
3439 unsigned char arr[16];
3440 int bytes, i, j;
3441
3442 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3443 || GET_CODE (op) == CONST_VECTOR);
3444
3445 if (GET_CODE (op) == CONST_VECTOR
3446 && !const_vector_immediate_p (op))
3447 return 0;
3448
3449 if (GET_MODE (op) != VOIDmode)
3450 mode = GET_MODE (op);
3451
3452 constant_to_array (mode, op, arr);
3453
3454 if (VECTOR_MODE_P (mode))
3455 mode = GET_MODE_INNER (mode);
3456
3457 bytes = GET_MODE_SIZE (mode);
3458 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3459
3460 /* Check that bytes are repeated. */
3461 for (i = bytes; i < 16; i += bytes)
3462 for (j = 0; j < bytes; j++)
3463 if (arr[j] != arr[i + j])
3464 return 0;
3465
3466 val = arr[0];
3467 for (j = 1; j < bytes; j++)
3468 val = (val << 8) | arr[j];
3469
3470 val = trunc_int_for_mode (val, int_mode);
3471
3472 /* Currently, we only handle SFmode */
3473 gcc_assert (mode == SFmode);
3474 if (mode == SFmode)
3475 {
3476 int exp = (val >> 23) - 127;
3477 return val > 0 && (val & 0x007fffff) == 0
3478 && exp >= low && exp <= high;
3479 }
3480 return FALSE;
3481}
3482
6cf5579e 3483/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3484
6f4e40cd 3485static bool
3486ea_symbol_ref_p (const_rtx x)
6cf5579e 3487{
6cf5579e 3488 tree decl;
3489
3490 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3491 {
3492 rtx plus = XEXP (x, 0);
3493 rtx op0 = XEXP (plus, 0);
3494 rtx op1 = XEXP (plus, 1);
3495 if (GET_CODE (op1) == CONST_INT)
3496 x = op0;
3497 }
3498
3499 return (GET_CODE (x) == SYMBOL_REF
3500 && (decl = SYMBOL_REF_DECL (x)) != 0
3501 && TREE_CODE (decl) == VAR_DECL
3502 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3503}
3504
644459d0 3505/* We accept:
5b865faf 3506 - any 32-bit constant (SImode, SFmode)
644459d0 3507 - any constant that can be generated with fsmbi (any mode)
5b865faf 3508 - a 64-bit constant where the high and low bits are identical
644459d0 3509 (DImode, DFmode)
5b865faf 3510 - a 128-bit constant where the four 32-bit words match. */
ca316360 3511bool
3754d046 3512spu_legitimate_constant_p (machine_mode mode, rtx x)
644459d0 3513{
6f4e40cd 3514 subrtx_iterator::array_type array;
5df189be 3515 if (GET_CODE (x) == HIGH)
3516 x = XEXP (x, 0);
6cf5579e 3517
3518 /* Reject any __ea qualified reference. These can't appear in
3519 instructions but must be forced to the constant pool. */
6f4e40cd 3520 FOR_EACH_SUBRTX (iter, array, x, ALL)
3521 if (ea_symbol_ref_p (*iter))
3522 return 0;
6cf5579e 3523
644459d0 3524 /* V4SI with all identical symbols is valid. */
5df189be 3525 if (!flag_pic
ca316360 3526 && mode == V4SImode
644459d0 3527 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3528 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3529 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3530 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3531 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3532 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3533
5df189be 3534 if (GET_CODE (x) == CONST_VECTOR
3535 && !const_vector_immediate_p (x))
3536 return 0;
644459d0 3537 return 1;
3538}
3539
3540/* Valid address are:
3541 - symbol_ref, label_ref, const
3542 - reg
9d98604b 3543 - reg + const_int, where const_int is 16 byte aligned
644459d0 3544 - reg + reg, alignment doesn't matter
3545 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3546 ignore the 4 least significant bits of the const. We only care about
3547 16 byte modes because the expand phase will change all smaller MEM
3548 references to TImode. */
3549static bool
3754d046 3550spu_legitimate_address_p (machine_mode mode,
fd50b071 3551 rtx x, bool reg_ok_strict)
644459d0 3552{
9d98604b 3553 int aligned = GET_MODE_SIZE (mode) >= 16;
3554 if (aligned
3555 && GET_CODE (x) == AND
644459d0 3556 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3557 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3558 x = XEXP (x, 0);
3559 switch (GET_CODE (x))
3560 {
644459d0 3561 case LABEL_REF:
6cf5579e 3562 return !TARGET_LARGE_MEM;
3563
9d98604b 3564 case SYMBOL_REF:
644459d0 3565 case CONST:
6cf5579e 3566 /* Keep __ea references until reload so that spu_expand_mov can see them
3567 in MEMs. */
6f4e40cd 3568 if (ea_symbol_ref_p (x))
6cf5579e 3569 return !reload_in_progress && !reload_completed;
9d98604b 3570 return !TARGET_LARGE_MEM;
644459d0 3571
3572 case CONST_INT:
3573 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3574
3575 case SUBREG:
3576 x = XEXP (x, 0);
9d98604b 3577 if (REG_P (x))
3578 return 0;
644459d0 3579
3580 case REG:
3581 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3582
3583 case PLUS:
3584 case LO_SUM:
3585 {
3586 rtx op0 = XEXP (x, 0);
3587 rtx op1 = XEXP (x, 1);
3588 if (GET_CODE (op0) == SUBREG)
3589 op0 = XEXP (op0, 0);
3590 if (GET_CODE (op1) == SUBREG)
3591 op1 = XEXP (op1, 0);
644459d0 3592 if (GET_CODE (op0) == REG
3593 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3594 && GET_CODE (op1) == CONST_INT
fa695424 3595 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3596 /* If virtual registers are involved, the displacement will
3597 change later on anyway, so checking would be premature.
3598 Reload will make sure the final displacement after
3599 register elimination is OK. */
3600 || op0 == arg_pointer_rtx
3601 || op0 == frame_pointer_rtx
3602 || op0 == virtual_stack_vars_rtx)
9d98604b 3603 && (!aligned || (INTVAL (op1) & 15) == 0))
3604 return TRUE;
644459d0 3605 if (GET_CODE (op0) == REG
3606 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3607 && GET_CODE (op1) == REG
3608 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3609 return TRUE;
644459d0 3610 }
3611 break;
3612
3613 default:
3614 break;
3615 }
9d98604b 3616 return FALSE;
644459d0 3617}
3618
6cf5579e 3619/* Like spu_legitimate_address_p, except with named addresses. */
3620static bool
3754d046 3621spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
6cf5579e 3622 bool reg_ok_strict, addr_space_t as)
3623{
3624 if (as == ADDR_SPACE_EA)
3625 return (REG_P (x) && (GET_MODE (x) == EAmode));
3626
3627 else if (as != ADDR_SPACE_GENERIC)
3628 gcc_unreachable ();
3629
3630 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3631}
3632
644459d0 3633/* When the address is reg + const_int, force the const_int into a
fa7637bd 3634 register. */
3defb88e 3635static rtx
644459d0 3636spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3754d046 3637 machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3638{
3639 rtx op0, op1;
3640 /* Make sure both operands are registers. */
3641 if (GET_CODE (x) == PLUS)
3642 {
3643 op0 = XEXP (x, 0);
3644 op1 = XEXP (x, 1);
3645 if (ALIGNED_SYMBOL_REF_P (op0))
3646 {
3647 op0 = force_reg (Pmode, op0);
3648 mark_reg_pointer (op0, 128);
3649 }
3650 else if (GET_CODE (op0) != REG)
3651 op0 = force_reg (Pmode, op0);
3652 if (ALIGNED_SYMBOL_REF_P (op1))
3653 {
3654 op1 = force_reg (Pmode, op1);
3655 mark_reg_pointer (op1, 128);
3656 }
3657 else if (GET_CODE (op1) != REG)
3658 op1 = force_reg (Pmode, op1);
3659 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3660 }
41e3a0c7 3661 return x;
644459d0 3662}
3663
6cf5579e 3664/* Like spu_legitimate_address, except with named address support. */
3665static rtx
3754d046 3666spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
6cf5579e 3667 addr_space_t as)
3668{
3669 if (as != ADDR_SPACE_GENERIC)
3670 return x;
3671
3672 return spu_legitimize_address (x, oldx, mode);
3673}
3674
fa695424 3675/* Reload reg + const_int for out-of-range displacements. */
3676rtx
3754d046 3677spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
fa695424 3678 int opnum, int type)
3679{
3680 bool removed_and = false;
3681
3682 if (GET_CODE (ad) == AND
3683 && CONST_INT_P (XEXP (ad, 1))
3684 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3685 {
3686 ad = XEXP (ad, 0);
3687 removed_and = true;
3688 }
3689
3690 if (GET_CODE (ad) == PLUS
3691 && REG_P (XEXP (ad, 0))
3692 && CONST_INT_P (XEXP (ad, 1))
3693 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3694 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3695 {
3696 /* Unshare the sum. */
3697 ad = copy_rtx (ad);
3698
3699 /* Reload the displacement. */
3700 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3701 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3702 opnum, (enum reload_type) type);
3703
3704 /* Add back AND for alignment if we stripped it. */
3705 if (removed_and)
3706 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3707
3708 return ad;
3709 }
3710
3711 return NULL_RTX;
3712}
3713
644459d0 3714/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3715 struct attribute_spec.handler. */
3716static tree
3717spu_handle_fndecl_attribute (tree * node,
3718 tree name,
3719 tree args ATTRIBUTE_UNUSED,
3720 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3721{
3722 if (TREE_CODE (*node) != FUNCTION_DECL)
3723 {
67a779df 3724 warning (0, "%qE attribute only applies to functions",
3725 name);
644459d0 3726 *no_add_attrs = true;
3727 }
3728
3729 return NULL_TREE;
3730}
3731
3732/* Handle the "vector" attribute. */
3733static tree
3734spu_handle_vector_attribute (tree * node, tree name,
3735 tree args ATTRIBUTE_UNUSED,
3736 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3737{
3738 tree type = *node, result = NULL_TREE;
3754d046 3739 machine_mode mode;
644459d0 3740 int unsigned_p;
3741
3742 while (POINTER_TYPE_P (type)
3743 || TREE_CODE (type) == FUNCTION_TYPE
3744 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3745 type = TREE_TYPE (type);
3746
3747 mode = TYPE_MODE (type);
3748
3749 unsigned_p = TYPE_UNSIGNED (type);
3750 switch (mode)
3751 {
3752 case DImode:
3753 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3754 break;
3755 case SImode:
3756 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3757 break;
3758 case HImode:
3759 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3760 break;
3761 case QImode:
3762 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3763 break;
3764 case SFmode:
3765 result = V4SF_type_node;
3766 break;
3767 case DFmode:
3768 result = V2DF_type_node;
3769 break;
3770 default:
3771 break;
3772 }
3773
3774 /* Propagate qualifiers attached to the element type
3775 onto the vector type. */
3776 if (result && result != type && TYPE_QUALS (type))
3777 result = build_qualified_type (result, TYPE_QUALS (type));
3778
3779 *no_add_attrs = true; /* No need to hang on to the attribute. */
3780
3781 if (!result)
67a779df 3782 warning (0, "%qE attribute ignored", name);
644459d0 3783 else
d991e6e8 3784 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3785
3786 return NULL_TREE;
3787}
3788
f2b32076 3789/* Return nonzero if FUNC is a naked function. */
644459d0 3790static int
3791spu_naked_function_p (tree func)
3792{
3793 tree a;
3794
3795 if (TREE_CODE (func) != FUNCTION_DECL)
3796 abort ();
3797
3798 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3799 return a != NULL_TREE;
3800}
3801
3802int
3803spu_initial_elimination_offset (int from, int to)
3804{
3805 int saved_regs_size = spu_saved_regs_size ();
3806 int sp_offset = 0;
d5bf7b64 3807 if (!crtl->is_leaf || crtl->outgoing_args_size
644459d0 3808 || get_frame_size () || saved_regs_size)
3809 sp_offset = STACK_POINTER_OFFSET;
3810 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3811 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3812 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3813 return get_frame_size ();
644459d0 3814 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3815 return sp_offset + crtl->outgoing_args_size
644459d0 3816 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3817 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3818 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3819 else
3820 gcc_unreachable ();
644459d0 3821}
3822
3823rtx
fb80456a 3824spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3825{
3754d046 3826 machine_mode mode = TYPE_MODE (type);
644459d0 3827 int byte_size = ((mode == BLKmode)
3828 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3829
3830 /* Make sure small structs are left justified in a register. */
3831 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3832 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3833 {
3754d046 3834 machine_mode smode;
644459d0 3835 rtvec v;
3836 int i;
3837 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3838 int n = byte_size / UNITS_PER_WORD;
3839 v = rtvec_alloc (nregs);
3840 for (i = 0; i < n; i++)
3841 {
3842 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3843 gen_rtx_REG (TImode,
3844 FIRST_RETURN_REGNUM
3845 + i),
3846 GEN_INT (UNITS_PER_WORD * i));
3847 byte_size -= UNITS_PER_WORD;
3848 }
3849
3850 if (n < nregs)
3851 {
3852 if (byte_size < 4)
3853 byte_size = 4;
3854 smode =
3855 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3856 RTVEC_ELT (v, n) =
3857 gen_rtx_EXPR_LIST (VOIDmode,
3858 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3859 GEN_INT (UNITS_PER_WORD * n));
3860 }
3861 return gen_rtx_PARALLEL (mode, v);
3862 }
3863 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3864}
3865
ee9034d4 3866static rtx
39cba157 3867spu_function_arg (cumulative_args_t cum_v,
3754d046 3868 machine_mode mode,
ee9034d4 3869 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3870{
39cba157 3871 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
644459d0 3872 int byte_size;
3873
a08c5dd0 3874 if (*cum >= MAX_REGISTER_ARGS)
644459d0 3875 return 0;
3876
3877 byte_size = ((mode == BLKmode)
3878 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3879
3880 /* The ABI does not allow parameters to be passed partially in
3881 reg and partially in stack. */
a08c5dd0 3882 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 3883 return 0;
3884
3885 /* Make sure small structs are left justified in a register. */
3886 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3887 && byte_size < UNITS_PER_WORD && byte_size > 0)
3888 {
3754d046 3889 machine_mode smode;
644459d0 3890 rtx gr_reg;
3891 if (byte_size < 4)
3892 byte_size = 4;
3893 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3894 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 3895 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 3896 const0_rtx);
3897 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3898 }
3899 else
a08c5dd0 3900 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 3901}
3902
ee9034d4 3903static void
3754d046 3904spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
ee9034d4 3905 const_tree type, bool named ATTRIBUTE_UNUSED)
3906{
39cba157 3907 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3908
ee9034d4 3909 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3910 ? 1
3911 : mode == BLKmode
3912 ? ((int_size_in_bytes (type) + 15) / 16)
3913 : mode == VOIDmode
3914 ? 1
3915 : HARD_REGNO_NREGS (cum, mode));
3916}
3917
644459d0 3918/* Variable sized types are passed by reference. */
3919static bool
39cba157 3920spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3754d046 3921 machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3922 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3923{
3924 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3925}
3926\f
3927
3928/* Var args. */
3929
3930/* Create and return the va_list datatype.
3931
3932 On SPU, va_list is an array type equivalent to
3933
3934 typedef struct __va_list_tag
3935 {
3936 void *__args __attribute__((__aligned(16)));
3937 void *__skip __attribute__((__aligned(16)));
3938
3939 } va_list[1];
3940
fa7637bd 3941 where __args points to the arg that will be returned by the next
644459d0 3942 va_arg(), and __skip points to the previous stack frame such that
3943 when __args == __skip we should advance __args by 32 bytes. */
3944static tree
3945spu_build_builtin_va_list (void)
3946{
3947 tree f_args, f_skip, record, type_decl;
3948 bool owp;
3949
3950 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3951
3952 type_decl =
54e46243 3953 build_decl (BUILTINS_LOCATION,
3954 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3955
54e46243 3956 f_args = build_decl (BUILTINS_LOCATION,
3957 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3958 f_skip = build_decl (BUILTINS_LOCATION,
3959 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3960
3961 DECL_FIELD_CONTEXT (f_args) = record;
3962 DECL_ALIGN (f_args) = 128;
3963 DECL_USER_ALIGN (f_args) = 1;
3964
3965 DECL_FIELD_CONTEXT (f_skip) = record;
3966 DECL_ALIGN (f_skip) = 128;
3967 DECL_USER_ALIGN (f_skip) = 1;
3968
bc907808 3969 TYPE_STUB_DECL (record) = type_decl;
644459d0 3970 TYPE_NAME (record) = type_decl;
3971 TYPE_FIELDS (record) = f_args;
1767a056 3972 DECL_CHAIN (f_args) = f_skip;
644459d0 3973
3974 /* We know this is being padded and we want it too. It is an internal
3975 type so hide the warnings from the user. */
3976 owp = warn_padded;
3977 warn_padded = false;
3978
3979 layout_type (record);
3980
3981 warn_padded = owp;
3982
3983 /* The correct type is an array type of one element. */
3984 return build_array_type (record, build_index_type (size_zero_node));
3985}
3986
3987/* Implement va_start by filling the va_list structure VALIST.
3988 NEXTARG points to the first anonymous stack argument.
3989
3990 The following global variables are used to initialize
3991 the va_list structure:
3992
abe32cce 3993 crtl->args.info;
644459d0 3994 the CUMULATIVE_ARGS for this function
3995
abe32cce 3996 crtl->args.arg_offset_rtx:
644459d0 3997 holds the offset of the first anonymous stack argument
3998 (relative to the virtual arg pointer). */
3999
8a58ed0a 4000static void
644459d0 4001spu_va_start (tree valist, rtx nextarg)
4002{
4003 tree f_args, f_skip;
4004 tree args, skip, t;
4005
4006 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4007 f_skip = DECL_CHAIN (f_args);
644459d0 4008
170efcd4 4009 valist = build_simple_mem_ref (valist);
644459d0 4010 args =
4011 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4012 skip =
4013 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4014
4015 /* Find the __args area. */
4016 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4017 if (crtl->args.pretend_args_size > 0)
2cc66f2a 4018 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
75a70cf9 4019 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4020 TREE_SIDE_EFFECTS (t) = 1;
4021 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4022
4023 /* Find the __skip area. */
4024 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2cc66f2a 4025 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4026 - STACK_POINTER_OFFSET));
75a70cf9 4027 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4028 TREE_SIDE_EFFECTS (t) = 1;
4029 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4030}
4031
4032/* Gimplify va_arg by updating the va_list structure
4033 VALIST as required to retrieve an argument of type
4034 TYPE, and returning that argument.
4035
4036 ret = va_arg(VALIST, TYPE);
4037
4038 generates code equivalent to:
4039
4040 paddedsize = (sizeof(TYPE) + 15) & -16;
4041 if (VALIST.__args + paddedsize > VALIST.__skip
4042 && VALIST.__args <= VALIST.__skip)
4043 addr = VALIST.__skip + 32;
4044 else
4045 addr = VALIST.__args;
4046 VALIST.__args = addr + paddedsize;
4047 ret = *(TYPE *)addr;
4048 */
4049static tree
75a70cf9 4050spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4051 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4052{
4053 tree f_args, f_skip;
4054 tree args, skip;
4055 HOST_WIDE_INT size, rsize;
2cc66f2a 4056 tree addr, tmp;
644459d0 4057 bool pass_by_reference_p;
4058
4059 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4060 f_skip = DECL_CHAIN (f_args);
644459d0 4061
182cf5a9 4062 valist = build_simple_mem_ref (valist);
644459d0 4063 args =
4064 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4065 skip =
4066 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4067
4068 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4069
4070 /* if an object is dynamically sized, a pointer to it is passed
4071 instead of the object itself. */
27a82950 4072 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4073 false);
644459d0 4074 if (pass_by_reference_p)
4075 type = build_pointer_type (type);
4076 size = int_size_in_bytes (type);
4077 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4078
4079 /* build conditional expression to calculate addr. The expression
4080 will be gimplified later. */
2cc66f2a 4081 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
644459d0 4082 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4083 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4084 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4085 unshare_expr (skip)));
644459d0 4086
4087 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2cc66f2a 4088 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4089 unshare_expr (args));
644459d0 4090
75a70cf9 4091 gimplify_assign (addr, tmp, pre_p);
644459d0 4092
4093 /* update VALIST.__args */
2cc66f2a 4094 tmp = fold_build_pointer_plus_hwi (addr, rsize);
75a70cf9 4095 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4096
8115f0af 4097 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4098 addr);
644459d0 4099
4100 if (pass_by_reference_p)
4101 addr = build_va_arg_indirect_ref (addr);
4102
4103 return build_va_arg_indirect_ref (addr);
4104}
4105
4106/* Save parameter registers starting with the register that corresponds
4107 to the first unnamed parameters. If the first unnamed parameter is
4108 in the stack then save no registers. Set pretend_args_size to the
4109 amount of space needed to save the registers. */
39cba157 4110static void
3754d046 4111spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
644459d0 4112 tree type, int *pretend_size, int no_rtl)
4113{
4114 if (!no_rtl)
4115 {
4116 rtx tmp;
4117 int regno;
4118 int offset;
39cba157 4119 int ncum = *get_cumulative_args (cum);
644459d0 4120
4121 /* cum currently points to the last named argument, we want to
4122 start at the next argument. */
39cba157 4123 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
644459d0 4124
4125 offset = -STACK_POINTER_OFFSET;
4126 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4127 {
4128 tmp = gen_frame_mem (V4SImode,
29c05e22 4129 plus_constant (Pmode, virtual_incoming_args_rtx,
644459d0 4130 offset));
4131 emit_move_insn (tmp,
4132 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4133 offset += 16;
4134 }
4135 *pretend_size = offset + STACK_POINTER_OFFSET;
4136 }
4137}
4138\f
b2d7ede1 4139static void
644459d0 4140spu_conditional_register_usage (void)
4141{
4142 if (flag_pic)
4143 {
4144 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4145 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4146 }
644459d0 4147}
4148
9d98604b 4149/* This is called any time we inspect the alignment of a register for
4150 addresses. */
644459d0 4151static int
9d98604b 4152reg_aligned_for_addr (rtx x)
644459d0 4153{
9d98604b 4154 int regno =
4155 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4156 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4157}
4158
69ced2d6 4159/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4160 into its SYMBOL_REF_FLAGS. */
4161static void
4162spu_encode_section_info (tree decl, rtx rtl, int first)
4163{
4164 default_encode_section_info (decl, rtl, first);
4165
4166 /* If a variable has a forced alignment to < 16 bytes, mark it with
4167 SYMBOL_FLAG_ALIGN1. */
4168 if (TREE_CODE (decl) == VAR_DECL
4169 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4170 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4171}
4172
644459d0 4173/* Return TRUE if we are certain the mem refers to a complete object
4174 which is both 16-byte aligned and padded to a 16-byte boundary. This
4175 would make it safe to store with a single instruction.
4176 We guarantee the alignment and padding for static objects by aligning
4177 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4178 FIXME: We currently cannot guarantee this for objects on the stack
4179 because assign_parm_setup_stack calls assign_stack_local with the
4180 alignment of the parameter mode and in that case the alignment never
4181 gets adjusted by LOCAL_ALIGNMENT. */
4182static int
4183store_with_one_insn_p (rtx mem)
4184{
3754d046 4185 machine_mode mode = GET_MODE (mem);
644459d0 4186 rtx addr = XEXP (mem, 0);
9d98604b 4187 if (mode == BLKmode)
644459d0 4188 return 0;
9d98604b 4189 if (GET_MODE_SIZE (mode) >= 16)
4190 return 1;
644459d0 4191 /* Only static objects. */
4192 if (GET_CODE (addr) == SYMBOL_REF)
4193 {
4194 /* We use the associated declaration to make sure the access is
fa7637bd 4195 referring to the whole object.
851d9296 4196 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4197 if it is necessary. Will there be cases where one exists, and
4198 the other does not? Will there be cases where both exist, but
4199 have different types? */
4200 tree decl = MEM_EXPR (mem);
4201 if (decl
4202 && TREE_CODE (decl) == VAR_DECL
4203 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4204 return 1;
4205 decl = SYMBOL_REF_DECL (addr);
4206 if (decl
4207 && TREE_CODE (decl) == VAR_DECL
4208 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4209 return 1;
4210 }
4211 return 0;
4212}
4213
9d98604b 4214/* Return 1 when the address is not valid for a simple load and store as
4215 required by the '_mov*' patterns. We could make this less strict
4216 for loads, but we prefer mem's to look the same so they are more
4217 likely to be merged. */
4218static int
4219address_needs_split (rtx mem)
4220{
4221 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4222 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4223 || !(store_with_one_insn_p (mem)
4224 || mem_is_padded_component_ref (mem))))
4225 return 1;
4226
4227 return 0;
4228}
4229
6cf5579e 4230static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4231static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4232static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4233
4234/* MEM is known to be an __ea qualified memory access. Emit a call to
4235 fetch the ppu memory to local store, and return its address in local
4236 store. */
4237
4238static void
4239ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4240{
4241 if (is_store)
4242 {
4243 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4244 if (!cache_fetch_dirty)
4245 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4246 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4247 2, ea_addr, EAmode, ndirty, SImode);
4248 }
4249 else
4250 {
4251 if (!cache_fetch)
4252 cache_fetch = init_one_libfunc ("__cache_fetch");
4253 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4254 1, ea_addr, EAmode);
4255 }
4256}
4257
4258/* Like ea_load_store, but do the cache tag comparison and, for stores,
4259 dirty bit marking, inline.
4260
4261 The cache control data structure is an array of
4262
4263 struct __cache_tag_array
4264 {
4265 unsigned int tag_lo[4];
4266 unsigned int tag_hi[4];
4267 void *data_pointer[4];
4268 int reserved[4];
4269 vector unsigned short dirty_bits[4];
4270 } */
4271
4272static void
4273ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4274{
4275 rtx ea_addr_si;
4276 HOST_WIDE_INT v;
4277 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4278 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4279 rtx index_mask = gen_reg_rtx (SImode);
4280 rtx tag_arr = gen_reg_rtx (Pmode);
4281 rtx splat_mask = gen_reg_rtx (TImode);
4282 rtx splat = gen_reg_rtx (V4SImode);
4283 rtx splat_hi = NULL_RTX;
4284 rtx tag_index = gen_reg_rtx (Pmode);
4285 rtx block_off = gen_reg_rtx (SImode);
4286 rtx tag_addr = gen_reg_rtx (Pmode);
4287 rtx tag = gen_reg_rtx (V4SImode);
4288 rtx cache_tag = gen_reg_rtx (V4SImode);
4289 rtx cache_tag_hi = NULL_RTX;
4290 rtx cache_ptrs = gen_reg_rtx (TImode);
4291 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4292 rtx tag_equal = gen_reg_rtx (V4SImode);
4293 rtx tag_equal_hi = NULL_RTX;
4294 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4295 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4296 rtx eq_index = gen_reg_rtx (SImode);
0af56f80 4297 rtx bcomp, hit_label, hit_ref, cont_label;
4298 rtx_insn *insn;
6cf5579e 4299
4300 if (spu_ea_model != 32)
4301 {
4302 splat_hi = gen_reg_rtx (V4SImode);
4303 cache_tag_hi = gen_reg_rtx (V4SImode);
4304 tag_equal_hi = gen_reg_rtx (V4SImode);
4305 }
4306
29c05e22 4307 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
6cf5579e 4308 emit_move_insn (tag_arr, tag_arr_sym);
4309 v = 0x0001020300010203LL;
4310 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4311 ea_addr_si = ea_addr;
4312 if (spu_ea_model != 32)
4313 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4314
4315 /* tag_index = ea_addr & (tag_array_size - 128) */
4316 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4317
4318 /* splat ea_addr to all 4 slots. */
4319 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4320 /* Similarly for high 32 bits of ea_addr. */
4321 if (spu_ea_model != 32)
4322 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4323
4324 /* block_off = ea_addr & 127 */
4325 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4326
4327 /* tag_addr = tag_arr + tag_index */
4328 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4329
4330 /* Read cache tags. */
4331 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4332 if (spu_ea_model != 32)
4333 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
29c05e22 4334 plus_constant (Pmode,
4335 tag_addr, 16)));
6cf5579e 4336
4337 /* tag = ea_addr & -128 */
4338 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4339
4340 /* Read all four cache data pointers. */
4341 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
29c05e22 4342 plus_constant (Pmode,
4343 tag_addr, 32)));
6cf5579e 4344
4345 /* Compare tags. */
4346 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4347 if (spu_ea_model != 32)
4348 {
4349 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4350 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4351 }
4352
4353 /* At most one of the tags compare equal, so tag_equal has one
4354 32-bit slot set to all 1's, with the other slots all zero.
4355 gbb picks off low bit from each byte in the 128-bit registers,
4356 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4357 we have a hit. */
4358 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4359 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4360
4361 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4362 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4363
4364 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4365 (rotating eq_index mod 16 bytes). */
4366 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4367 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4368
4369 /* Add block offset to form final data address. */
4370 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4371
4372 /* Check that we did hit. */
4373 hit_label = gen_label_rtx ();
4374 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4375 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4376 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4377 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4378 hit_ref, pc_rtx)));
4379 /* Say that this branch is very likely to happen. */
4380 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
9eb946de 4381 add_int_reg_note (insn, REG_BR_PROB, v);
6cf5579e 4382
4383 ea_load_store (mem, is_store, ea_addr, data_addr);
4384 cont_label = gen_label_rtx ();
4385 emit_jump_insn (gen_jump (cont_label));
4386 emit_barrier ();
4387
4388 emit_label (hit_label);
4389
4390 if (is_store)
4391 {
4392 HOST_WIDE_INT v_hi;
4393 rtx dirty_bits = gen_reg_rtx (TImode);
4394 rtx dirty_off = gen_reg_rtx (SImode);
4395 rtx dirty_128 = gen_reg_rtx (TImode);
4396 rtx neg_block_off = gen_reg_rtx (SImode);
4397
4398 /* Set up mask with one dirty bit per byte of the mem we are
4399 writing, starting from top bit. */
4400 v_hi = v = -1;
4401 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4402 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4403 {
4404 v_hi = v;
4405 v = 0;
4406 }
4407 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4408
4409 /* Form index into cache dirty_bits. eq_index is one of
4410 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4411 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4412 offset to each of the four dirty_bits elements. */
4413 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4414
4415 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4416
4417 /* Rotate bit mask to proper bit. */
4418 emit_insn (gen_negsi2 (neg_block_off, block_off));
4419 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4420 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4421
4422 /* Or in the new dirty bits. */
4423 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4424
4425 /* Store. */
4426 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4427 }
4428
4429 emit_label (cont_label);
4430}
4431
4432static rtx
4433expand_ea_mem (rtx mem, bool is_store)
4434{
4435 rtx ea_addr;
4436 rtx data_addr = gen_reg_rtx (Pmode);
4437 rtx new_mem;
4438
4439 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4440 if (optimize_size || optimize == 0)
4441 ea_load_store (mem, is_store, ea_addr, data_addr);
4442 else
4443 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4444
4445 if (ea_alias_set == -1)
4446 ea_alias_set = new_alias_set ();
4447
4448 /* We generate a new MEM RTX to refer to the copy of the data
4449 in the cache. We do not copy memory attributes (except the
4450 alignment) from the original MEM, as they may no longer apply
4451 to the cache copy. */
4452 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4453 set_mem_alias_set (new_mem, ea_alias_set);
4454 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4455
4456 return new_mem;
4457}
4458
644459d0 4459int
3754d046 4460spu_expand_mov (rtx * ops, machine_mode mode)
644459d0 4461{
4462 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4463 {
4464 /* Perform the move in the destination SUBREG's inner mode. */
4465 ops[0] = SUBREG_REG (ops[0]);
4466 mode = GET_MODE (ops[0]);
4467 ops[1] = gen_lowpart_common (mode, ops[1]);
4468 gcc_assert (ops[1]);
4469 }
644459d0 4470
4471 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4472 {
4473 rtx from = SUBREG_REG (ops[1]);
3754d046 4474 machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4475
4476 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4477 && GET_MODE_CLASS (imode) == MODE_INT
4478 && subreg_lowpart_p (ops[1]));
4479
4480 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4481 imode = SImode;
4482 if (imode != GET_MODE (from))
4483 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4484
4485 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4486 {
d6bf3b14 4487 enum insn_code icode = convert_optab_handler (trunc_optab,
4488 mode, imode);
644459d0 4489 emit_insn (GEN_FCN (icode) (ops[0], from));
4490 }
4491 else
4492 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4493 return 1;
4494 }
4495
4496 /* At least one of the operands needs to be a register. */
4497 if ((reload_in_progress | reload_completed) == 0
4498 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4499 {
4500 rtx temp = force_reg (mode, ops[1]);
4501 emit_move_insn (ops[0], temp);
4502 return 1;
4503 }
4504 if (reload_in_progress || reload_completed)
4505 {
dea01258 4506 if (CONSTANT_P (ops[1]))
4507 return spu_split_immediate (ops);
644459d0 4508 return 0;
4509 }
9d98604b 4510
4511 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4512 extend them. */
4513 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4514 {
9d98604b 4515 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4516 if (val != INTVAL (ops[1]))
644459d0 4517 {
9d98604b 4518 emit_move_insn (ops[0], GEN_INT (val));
4519 return 1;
644459d0 4520 }
4521 }
9d98604b 4522 if (MEM_P (ops[0]))
6cf5579e 4523 {
4524 if (MEM_ADDR_SPACE (ops[0]))
4525 ops[0] = expand_ea_mem (ops[0], true);
4526 return spu_split_store (ops);
4527 }
9d98604b 4528 if (MEM_P (ops[1]))
6cf5579e 4529 {
4530 if (MEM_ADDR_SPACE (ops[1]))
4531 ops[1] = expand_ea_mem (ops[1], false);
4532 return spu_split_load (ops);
4533 }
9d98604b 4534
644459d0 4535 return 0;
4536}
4537
9d98604b 4538static void
4539spu_convert_move (rtx dst, rtx src)
644459d0 4540{
3754d046 4541 machine_mode mode = GET_MODE (dst);
4542 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
9d98604b 4543 rtx reg;
4544 gcc_assert (GET_MODE (src) == TImode);
4545 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4546 emit_insn (gen_rtx_SET (VOIDmode, reg,
4547 gen_rtx_TRUNCATE (int_mode,
4548 gen_rtx_LSHIFTRT (TImode, src,
4549 GEN_INT (int_mode == DImode ? 64 : 96)))));
4550 if (int_mode != mode)
4551 {
4552 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4553 emit_move_insn (dst, reg);
4554 }
4555}
644459d0 4556
9d98604b 4557/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4558 the address from SRC and SRC+16. Return a REG or CONST_INT that
4559 specifies how many bytes to rotate the loaded registers, plus any
4560 extra from EXTRA_ROTQBY. The address and rotate amounts are
4561 normalized to improve merging of loads and rotate computations. */
4562static rtx
4563spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4564{
4565 rtx addr = XEXP (src, 0);
4566 rtx p0, p1, rot, addr0, addr1;
4567 int rot_amt;
644459d0 4568
4569 rot = 0;
4570 rot_amt = 0;
9d98604b 4571
4572 if (MEM_ALIGN (src) >= 128)
4573 /* Address is already aligned; simply perform a TImode load. */ ;
4574 else if (GET_CODE (addr) == PLUS)
644459d0 4575 {
4576 /* 8 cases:
4577 aligned reg + aligned reg => lqx
4578 aligned reg + unaligned reg => lqx, rotqby
4579 aligned reg + aligned const => lqd
4580 aligned reg + unaligned const => lqd, rotqbyi
4581 unaligned reg + aligned reg => lqx, rotqby
4582 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4583 unaligned reg + aligned const => lqd, rotqby
4584 unaligned reg + unaligned const -> not allowed by legitimate address
4585 */
4586 p0 = XEXP (addr, 0);
4587 p1 = XEXP (addr, 1);
9d98604b 4588 if (!reg_aligned_for_addr (p0))
644459d0 4589 {
9d98604b 4590 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4591 {
9d98604b 4592 rot = gen_reg_rtx (SImode);
4593 emit_insn (gen_addsi3 (rot, p0, p1));
4594 }
4595 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4596 {
4597 if (INTVAL (p1) > 0
4598 && REG_POINTER (p0)
4599 && INTVAL (p1) * BITS_PER_UNIT
4600 < REGNO_POINTER_ALIGN (REGNO (p0)))
4601 {
4602 rot = gen_reg_rtx (SImode);
4603 emit_insn (gen_addsi3 (rot, p0, p1));
4604 addr = p0;
4605 }
4606 else
4607 {
4608 rtx x = gen_reg_rtx (SImode);
4609 emit_move_insn (x, p1);
4610 if (!spu_arith_operand (p1, SImode))
4611 p1 = x;
4612 rot = gen_reg_rtx (SImode);
4613 emit_insn (gen_addsi3 (rot, p0, p1));
4614 addr = gen_rtx_PLUS (Pmode, p0, x);
4615 }
644459d0 4616 }
4617 else
4618 rot = p0;
4619 }
4620 else
4621 {
4622 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4623 {
4624 rot_amt = INTVAL (p1) & 15;
9d98604b 4625 if (INTVAL (p1) & -16)
4626 {
4627 p1 = GEN_INT (INTVAL (p1) & -16);
4628 addr = gen_rtx_PLUS (SImode, p0, p1);
4629 }
4630 else
4631 addr = p0;
644459d0 4632 }
9d98604b 4633 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4634 rot = p1;
4635 }
4636 }
9d98604b 4637 else if (REG_P (addr))
644459d0 4638 {
9d98604b 4639 if (!reg_aligned_for_addr (addr))
644459d0 4640 rot = addr;
4641 }
4642 else if (GET_CODE (addr) == CONST)
4643 {
4644 if (GET_CODE (XEXP (addr, 0)) == PLUS
4645 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4646 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4647 {
4648 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4649 if (rot_amt & -16)
4650 addr = gen_rtx_CONST (Pmode,
4651 gen_rtx_PLUS (Pmode,
4652 XEXP (XEXP (addr, 0), 0),
4653 GEN_INT (rot_amt & -16)));
4654 else
4655 addr = XEXP (XEXP (addr, 0), 0);
4656 }
4657 else
9d98604b 4658 {
4659 rot = gen_reg_rtx (Pmode);
4660 emit_move_insn (rot, addr);
4661 }
644459d0 4662 }
4663 else if (GET_CODE (addr) == CONST_INT)
4664 {
4665 rot_amt = INTVAL (addr);
4666 addr = GEN_INT (rot_amt & -16);
4667 }
4668 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4669 {
4670 rot = gen_reg_rtx (Pmode);
4671 emit_move_insn (rot, addr);
4672 }
644459d0 4673
9d98604b 4674 rot_amt += extra_rotby;
644459d0 4675
4676 rot_amt &= 15;
4677
4678 if (rot && rot_amt)
4679 {
9d98604b 4680 rtx x = gen_reg_rtx (SImode);
4681 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4682 rot = x;
644459d0 4683 rot_amt = 0;
4684 }
9d98604b 4685 if (!rot && rot_amt)
4686 rot = GEN_INT (rot_amt);
4687
4688 addr0 = copy_rtx (addr);
4689 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4690 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4691
4692 if (dst1)
4693 {
29c05e22 4694 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
9d98604b 4695 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4696 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4697 }
644459d0 4698
9d98604b 4699 return rot;
4700}
4701
4702int
4703spu_split_load (rtx * ops)
4704{
3754d046 4705 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4706 rtx addr, load, rot;
4707 int rot_amt;
644459d0 4708
9d98604b 4709 if (GET_MODE_SIZE (mode) >= 16)
4710 return 0;
644459d0 4711
9d98604b 4712 addr = XEXP (ops[1], 0);
4713 gcc_assert (GET_CODE (addr) != AND);
4714
4715 if (!address_needs_split (ops[1]))
4716 {
4717 ops[1] = change_address (ops[1], TImode, addr);
4718 load = gen_reg_rtx (TImode);
4719 emit_insn (gen__movti (load, ops[1]));
4720 spu_convert_move (ops[0], load);
4721 return 1;
4722 }
4723
4724 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4725
4726 load = gen_reg_rtx (TImode);
4727 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4728
4729 if (rot)
4730 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4731
9d98604b 4732 spu_convert_move (ops[0], load);
4733 return 1;
644459d0 4734}
4735
9d98604b 4736int
644459d0 4737spu_split_store (rtx * ops)
4738{
3754d046 4739 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4740 rtx reg;
644459d0 4741 rtx addr, p0, p1, p1_lo, smem;
4742 int aform;
4743 int scalar;
4744
9d98604b 4745 if (GET_MODE_SIZE (mode) >= 16)
4746 return 0;
4747
644459d0 4748 addr = XEXP (ops[0], 0);
9d98604b 4749 gcc_assert (GET_CODE (addr) != AND);
4750
4751 if (!address_needs_split (ops[0]))
4752 {
4753 reg = gen_reg_rtx (TImode);
4754 emit_insn (gen_spu_convert (reg, ops[1]));
4755 ops[0] = change_address (ops[0], TImode, addr);
4756 emit_move_insn (ops[0], reg);
4757 return 1;
4758 }
644459d0 4759
4760 if (GET_CODE (addr) == PLUS)
4761 {
4762 /* 8 cases:
4763 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4764 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4765 aligned reg + aligned const => lqd, c?d, shuf, stqx
4766 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4767 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4768 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4769 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4770 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4771 */
4772 aform = 0;
4773 p0 = XEXP (addr, 0);
4774 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4775 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4776 {
4777 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4778 if (reg_aligned_for_addr (p0))
4779 {
4780 p1 = GEN_INT (INTVAL (p1) & -16);
4781 if (p1 == const0_rtx)
4782 addr = p0;
4783 else
4784 addr = gen_rtx_PLUS (SImode, p0, p1);
4785 }
4786 else
4787 {
4788 rtx x = gen_reg_rtx (SImode);
4789 emit_move_insn (x, p1);
4790 addr = gen_rtx_PLUS (SImode, p0, x);
4791 }
644459d0 4792 }
4793 }
9d98604b 4794 else if (REG_P (addr))
644459d0 4795 {
4796 aform = 0;
4797 p0 = addr;
4798 p1 = p1_lo = const0_rtx;
4799 }
4800 else
4801 {
4802 aform = 1;
4803 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4804 p1 = 0; /* aform doesn't use p1 */
4805 p1_lo = addr;
4806 if (ALIGNED_SYMBOL_REF_P (addr))
4807 p1_lo = const0_rtx;
9d98604b 4808 else if (GET_CODE (addr) == CONST
4809 && GET_CODE (XEXP (addr, 0)) == PLUS
4810 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4811 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4812 {
9d98604b 4813 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4814 if ((v & -16) != 0)
4815 addr = gen_rtx_CONST (Pmode,
4816 gen_rtx_PLUS (Pmode,
4817 XEXP (XEXP (addr, 0), 0),
4818 GEN_INT (v & -16)));
4819 else
4820 addr = XEXP (XEXP (addr, 0), 0);
4821 p1_lo = GEN_INT (v & 15);
644459d0 4822 }
4823 else if (GET_CODE (addr) == CONST_INT)
4824 {
4825 p1_lo = GEN_INT (INTVAL (addr) & 15);
4826 addr = GEN_INT (INTVAL (addr) & -16);
4827 }
9d98604b 4828 else
4829 {
4830 p1_lo = gen_reg_rtx (SImode);
4831 emit_move_insn (p1_lo, addr);
4832 }
644459d0 4833 }
4834
4cbad5bb 4835 gcc_assert (aform == 0 || aform == 1);
9d98604b 4836 reg = gen_reg_rtx (TImode);
e04cf423 4837
644459d0 4838 scalar = store_with_one_insn_p (ops[0]);
4839 if (!scalar)
4840 {
4841 /* We could copy the flags from the ops[0] MEM to mem here,
4842 We don't because we want this load to be optimized away if
4843 possible, and copying the flags will prevent that in certain
4844 cases, e.g. consider the volatile flag. */
4845
9d98604b 4846 rtx pat = gen_reg_rtx (TImode);
e04cf423 4847 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4848 set_mem_alias_set (lmem, 0);
4849 emit_insn (gen_movti (reg, lmem));
644459d0 4850
9d98604b 4851 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4852 p0 = stack_pointer_rtx;
4853 if (!p1_lo)
4854 p1_lo = const0_rtx;
4855
4856 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4857 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4858 }
644459d0 4859 else
4860 {
4861 if (GET_CODE (ops[1]) == REG)
4862 emit_insn (gen_spu_convert (reg, ops[1]));
4863 else if (GET_CODE (ops[1]) == SUBREG)
4864 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4865 else
4866 abort ();
4867 }
4868
4869 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4870 emit_insn (gen_ashlti3
4871 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4872
9d98604b 4873 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4874 /* We can't use the previous alias set because the memory has changed
4875 size and can potentially overlap objects of other types. */
4876 set_mem_alias_set (smem, 0);
4877
e04cf423 4878 emit_insn (gen_movti (smem, reg));
9d98604b 4879 return 1;
644459d0 4880}
4881
4882/* Return TRUE if X is MEM which is a struct member reference
4883 and the member can safely be loaded and stored with a single
4884 instruction because it is padded. */
4885static int
4886mem_is_padded_component_ref (rtx x)
4887{
4888 tree t = MEM_EXPR (x);
4889 tree r;
4890 if (!t || TREE_CODE (t) != COMPONENT_REF)
4891 return 0;
4892 t = TREE_OPERAND (t, 1);
4893 if (!t || TREE_CODE (t) != FIELD_DECL
4894 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4895 return 0;
4896 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4897 r = DECL_FIELD_CONTEXT (t);
4898 if (!r || TREE_CODE (r) != RECORD_TYPE)
4899 return 0;
4900 /* Make sure they are the same mode */
4901 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4902 return 0;
4903 /* If there are no following fields then the field alignment assures
fa7637bd 4904 the structure is padded to the alignment which means this field is
4905 padded too. */
644459d0 4906 if (TREE_CHAIN (t) == 0)
4907 return 1;
4908 /* If the following field is also aligned then this field will be
4909 padded. */
4910 t = TREE_CHAIN (t);
4911 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4912 return 1;
4913 return 0;
4914}
4915
c7b91b14 4916/* Parse the -mfixed-range= option string. */
4917static void
4918fix_range (const char *const_str)
4919{
4920 int i, first, last;
4921 char *str, *dash, *comma;
4922
4923 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4924 REG2 are either register names or register numbers. The effect
4925 of this option is to mark the registers in the range from REG1 to
4926 REG2 as ``fixed'' so they won't be used by the compiler. */
4927
4928 i = strlen (const_str);
4929 str = (char *) alloca (i + 1);
4930 memcpy (str, const_str, i + 1);
4931
4932 while (1)
4933 {
4934 dash = strchr (str, '-');
4935 if (!dash)
4936 {
4937 warning (0, "value of -mfixed-range must have form REG1-REG2");
4938 return;
4939 }
4940 *dash = '\0';
4941 comma = strchr (dash + 1, ',');
4942 if (comma)
4943 *comma = '\0';
4944
4945 first = decode_reg_name (str);
4946 if (first < 0)
4947 {
4948 warning (0, "unknown register name: %s", str);
4949 return;
4950 }
4951
4952 last = decode_reg_name (dash + 1);
4953 if (last < 0)
4954 {
4955 warning (0, "unknown register name: %s", dash + 1);
4956 return;
4957 }
4958
4959 *dash = '-';
4960
4961 if (first > last)
4962 {
4963 warning (0, "%s-%s is an empty range", str, dash + 1);
4964 return;
4965 }
4966
4967 for (i = first; i <= last; ++i)
4968 fixed_regs[i] = call_used_regs[i] = 1;
4969
4970 if (!comma)
4971 break;
4972
4973 *comma = ',';
4974 str = comma + 1;
4975 }
4976}
4977
644459d0 4978/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4979 can be generated using the fsmbi instruction. */
4980int
4981fsmbi_const_p (rtx x)
4982{
dea01258 4983 if (CONSTANT_P (x))
4984 {
5df189be 4985 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4986 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4987 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4988 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4989 }
4990 return 0;
4991}
4992
4993/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4994 can be generated using the cbd, chd, cwd or cdd instruction. */
4995int
3754d046 4996cpat_const_p (rtx x, machine_mode mode)
dea01258 4997{
4998 if (CONSTANT_P (x))
4999 {
5000 enum immediate_class c = classify_immediate (x, mode);
5001 return c == IC_CPAT;
5002 }
5003 return 0;
5004}
644459d0 5005
dea01258 5006rtx
5007gen_cpat_const (rtx * ops)
5008{
5009 unsigned char dst[16];
5010 int i, offset, shift, isize;
5011 if (GET_CODE (ops[3]) != CONST_INT
5012 || GET_CODE (ops[2]) != CONST_INT
5013 || (GET_CODE (ops[1]) != CONST_INT
5014 && GET_CODE (ops[1]) != REG))
5015 return 0;
5016 if (GET_CODE (ops[1]) == REG
5017 && (!REG_POINTER (ops[1])
5018 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5019 return 0;
644459d0 5020
5021 for (i = 0; i < 16; i++)
dea01258 5022 dst[i] = i + 16;
5023 isize = INTVAL (ops[3]);
5024 if (isize == 1)
5025 shift = 3;
5026 else if (isize == 2)
5027 shift = 2;
5028 else
5029 shift = 0;
5030 offset = (INTVAL (ops[2]) +
5031 (GET_CODE (ops[1]) ==
5032 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5033 for (i = 0; i < isize; i++)
5034 dst[offset + i] = i + shift;
5035 return array_to_constant (TImode, dst);
644459d0 5036}
5037
5038/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5039 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5040 than 16 bytes, the value is repeated across the rest of the array. */
5041void
3754d046 5042constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
644459d0 5043{
5044 HOST_WIDE_INT val;
5045 int i, j, first;
5046
5047 memset (arr, 0, 16);
5048 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5049 if (GET_CODE (x) == CONST_INT
5050 || (GET_CODE (x) == CONST_DOUBLE
5051 && (mode == SFmode || mode == DFmode)))
5052 {
5053 gcc_assert (mode != VOIDmode && mode != BLKmode);
5054
5055 if (GET_CODE (x) == CONST_DOUBLE)
5056 val = const_double_to_hwint (x);
5057 else
5058 val = INTVAL (x);
5059 first = GET_MODE_SIZE (mode) - 1;
5060 for (i = first; i >= 0; i--)
5061 {
5062 arr[i] = val & 0xff;
5063 val >>= 8;
5064 }
5065 /* Splat the constant across the whole array. */
5066 for (j = 0, i = first + 1; i < 16; i++)
5067 {
5068 arr[i] = arr[j];
5069 j = (j == first) ? 0 : j + 1;
5070 }
5071 }
5072 else if (GET_CODE (x) == CONST_DOUBLE)
5073 {
5074 val = CONST_DOUBLE_LOW (x);
5075 for (i = 15; i >= 8; i--)
5076 {
5077 arr[i] = val & 0xff;
5078 val >>= 8;
5079 }
5080 val = CONST_DOUBLE_HIGH (x);
5081 for (i = 7; i >= 0; i--)
5082 {
5083 arr[i] = val & 0xff;
5084 val >>= 8;
5085 }
5086 }
5087 else if (GET_CODE (x) == CONST_VECTOR)
5088 {
5089 int units;
5090 rtx elt;
5091 mode = GET_MODE_INNER (mode);
5092 units = CONST_VECTOR_NUNITS (x);
5093 for (i = 0; i < units; i++)
5094 {
5095 elt = CONST_VECTOR_ELT (x, i);
5096 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5097 {
5098 if (GET_CODE (elt) == CONST_DOUBLE)
5099 val = const_double_to_hwint (elt);
5100 else
5101 val = INTVAL (elt);
5102 first = GET_MODE_SIZE (mode) - 1;
5103 if (first + i * GET_MODE_SIZE (mode) > 16)
5104 abort ();
5105 for (j = first; j >= 0; j--)
5106 {
5107 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5108 val >>= 8;
5109 }
5110 }
5111 }
5112 }
5113 else
5114 gcc_unreachable();
5115}
5116
5117/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5118 smaller than 16 bytes, use the bytes that would represent that value
5119 in a register, e.g., for QImode return the value of arr[3]. */
5120rtx
3754d046 5121array_to_constant (machine_mode mode, const unsigned char arr[16])
644459d0 5122{
3754d046 5123 machine_mode inner_mode;
644459d0 5124 rtvec v;
5125 int units, size, i, j, k;
5126 HOST_WIDE_INT val;
5127
5128 if (GET_MODE_CLASS (mode) == MODE_INT
5129 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5130 {
5131 j = GET_MODE_SIZE (mode);
5132 i = j < 4 ? 4 - j : 0;
5133 for (val = 0; i < j; i++)
5134 val = (val << 8) | arr[i];
5135 val = trunc_int_for_mode (val, mode);
5136 return GEN_INT (val);
5137 }
5138
5139 if (mode == TImode)
5140 {
5141 HOST_WIDE_INT high;
5142 for (i = high = 0; i < 8; i++)
5143 high = (high << 8) | arr[i];
5144 for (i = 8, val = 0; i < 16; i++)
5145 val = (val << 8) | arr[i];
5146 return immed_double_const (val, high, TImode);
5147 }
5148 if (mode == SFmode)
5149 {
5150 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5151 val = trunc_int_for_mode (val, SImode);
171b6d22 5152 return hwint_to_const_double (SFmode, val);
644459d0 5153 }
5154 if (mode == DFmode)
5155 {
1f915911 5156 for (i = 0, val = 0; i < 8; i++)
5157 val = (val << 8) | arr[i];
171b6d22 5158 return hwint_to_const_double (DFmode, val);
644459d0 5159 }
5160
5161 if (!VECTOR_MODE_P (mode))
5162 abort ();
5163
5164 units = GET_MODE_NUNITS (mode);
5165 size = GET_MODE_UNIT_SIZE (mode);
5166 inner_mode = GET_MODE_INNER (mode);
5167 v = rtvec_alloc (units);
5168
5169 for (k = i = 0; i < units; ++i)
5170 {
5171 val = 0;
5172 for (j = 0; j < size; j++, k++)
5173 val = (val << 8) | arr[k];
5174
5175 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5176 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5177 else
5178 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5179 }
5180 if (k > 16)
5181 abort ();
5182
5183 return gen_rtx_CONST_VECTOR (mode, v);
5184}
5185
5186static void
5187reloc_diagnostic (rtx x)
5188{
712d2297 5189 tree decl = 0;
644459d0 5190 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5191 return;
5192
5193 if (GET_CODE (x) == SYMBOL_REF)
5194 decl = SYMBOL_REF_DECL (x);
5195 else if (GET_CODE (x) == CONST
5196 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5197 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5198
5199 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5200 if (decl && !DECL_P (decl))
5201 decl = 0;
5202
644459d0 5203 /* The decl could be a string constant. */
5204 if (decl && DECL_P (decl))
712d2297 5205 {
5206 location_t loc;
5207 /* We use last_assemble_variable_decl to get line information. It's
5208 not always going to be right and might not even be close, but will
5209 be right for the more common cases. */
5210 if (!last_assemble_variable_decl || in_section == ctors_section)
5211 loc = DECL_SOURCE_LOCATION (decl);
5212 else
5213 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5214
712d2297 5215 if (TARGET_WARN_RELOC)
5216 warning_at (loc, 0,
5217 "creating run-time relocation for %qD", decl);
5218 else
5219 error_at (loc,
5220 "creating run-time relocation for %qD", decl);
5221 }
5222 else
5223 {
5224 if (TARGET_WARN_RELOC)
5225 warning_at (input_location, 0, "creating run-time relocation");
5226 else
5227 error_at (input_location, "creating run-time relocation");
5228 }
644459d0 5229}
5230
5231/* Hook into assemble_integer so we can generate an error for run-time
5232 relocations. The SPU ABI disallows them. */
5233static bool
5234spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5235{
5236 /* By default run-time relocations aren't supported, but we allow them
5237 in case users support it in their own run-time loader. And we provide
5238 a warning for those users that don't. */
5239 if ((GET_CODE (x) == SYMBOL_REF)
5240 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5241 reloc_diagnostic (x);
5242
5243 return default_assemble_integer (x, size, aligned_p);
5244}
5245
5246static void
5247spu_asm_globalize_label (FILE * file, const char *name)
5248{
5249 fputs ("\t.global\t", file);
5250 assemble_name (file, name);
5251 fputs ("\n", file);
5252}
5253
5254static bool
20d892d1 5255spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5256 int opno ATTRIBUTE_UNUSED, int *total,
f529eb25 5257 bool speed ATTRIBUTE_UNUSED)
644459d0 5258{
3754d046 5259 machine_mode mode = GET_MODE (x);
644459d0 5260 int cost = COSTS_N_INSNS (2);
5261
5262 /* Folding to a CONST_VECTOR will use extra space but there might
5263 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5264 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5265 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5266 because this cost will only be compared against a single insn.
5267 if (code == CONST_VECTOR)
ca316360 5268 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5269 */
5270
5271 /* Use defaults for float operations. Not accurate but good enough. */
5272 if (mode == DFmode)
5273 {
5274 *total = COSTS_N_INSNS (13);
5275 return true;
5276 }
5277 if (mode == SFmode)
5278 {
5279 *total = COSTS_N_INSNS (6);
5280 return true;
5281 }
5282 switch (code)
5283 {
5284 case CONST_INT:
5285 if (satisfies_constraint_K (x))
5286 *total = 0;
5287 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5288 *total = COSTS_N_INSNS (1);
5289 else
5290 *total = COSTS_N_INSNS (3);
5291 return true;
5292
5293 case CONST:
5294 *total = COSTS_N_INSNS (3);
5295 return true;
5296
5297 case LABEL_REF:
5298 case SYMBOL_REF:
5299 *total = COSTS_N_INSNS (0);
5300 return true;
5301
5302 case CONST_DOUBLE:
5303 *total = COSTS_N_INSNS (5);
5304 return true;
5305
5306 case FLOAT_EXTEND:
5307 case FLOAT_TRUNCATE:
5308 case FLOAT:
5309 case UNSIGNED_FLOAT:
5310 case FIX:
5311 case UNSIGNED_FIX:
5312 *total = COSTS_N_INSNS (7);
5313 return true;
5314
5315 case PLUS:
5316 if (mode == TImode)
5317 {
5318 *total = COSTS_N_INSNS (9);
5319 return true;
5320 }
5321 break;
5322
5323 case MULT:
5324 cost =
5325 GET_CODE (XEXP (x, 0)) ==
5326 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5327 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5328 {
5329 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5330 {
5331 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5332 cost = COSTS_N_INSNS (14);
5333 if ((val & 0xffff) == 0)
5334 cost = COSTS_N_INSNS (9);
5335 else if (val > 0 && val < 0x10000)
5336 cost = COSTS_N_INSNS (11);
5337 }
5338 }
5339 *total = cost;
5340 return true;
5341 case DIV:
5342 case UDIV:
5343 case MOD:
5344 case UMOD:
5345 *total = COSTS_N_INSNS (20);
5346 return true;
5347 case ROTATE:
5348 case ROTATERT:
5349 case ASHIFT:
5350 case ASHIFTRT:
5351 case LSHIFTRT:
5352 *total = COSTS_N_INSNS (4);
5353 return true;
5354 case UNSPEC:
5355 if (XINT (x, 1) == UNSPEC_CONVERT)
5356 *total = COSTS_N_INSNS (0);
5357 else
5358 *total = COSTS_N_INSNS (4);
5359 return true;
5360 }
5361 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5362 if (GET_MODE_CLASS (mode) == MODE_INT
5363 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5364 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5365 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5366 *total = cost;
5367 return true;
5368}
5369
3754d046 5370static machine_mode
1bd43494 5371spu_unwind_word_mode (void)
644459d0 5372{
1bd43494 5373 return SImode;
644459d0 5374}
5375
5376/* Decide whether we can make a sibling call to a function. DECL is the
5377 declaration of the function being targeted by the call and EXP is the
5378 CALL_EXPR representing the call. */
5379static bool
5380spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5381{
5382 return decl && !TARGET_LARGE_MEM;
5383}
5384
5385/* We need to correctly update the back chain pointer and the Available
5386 Stack Size (which is in the second slot of the sp register.) */
5387void
5388spu_allocate_stack (rtx op0, rtx op1)
5389{
5390 HOST_WIDE_INT v;
5391 rtx chain = gen_reg_rtx (V4SImode);
5392 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5393 rtx sp = gen_reg_rtx (V4SImode);
5394 rtx splatted = gen_reg_rtx (V4SImode);
5395 rtx pat = gen_reg_rtx (TImode);
5396
5397 /* copy the back chain so we can save it back again. */
5398 emit_move_insn (chain, stack_bot);
5399
5400 op1 = force_reg (SImode, op1);
5401
5402 v = 0x1020300010203ll;
5403 emit_move_insn (pat, immed_double_const (v, v, TImode));
5404 emit_insn (gen_shufb (splatted, op1, op1, pat));
5405
5406 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5407 emit_insn (gen_subv4si3 (sp, sp, splatted));
5408
5409 if (flag_stack_check)
5410 {
5411 rtx avail = gen_reg_rtx(SImode);
5412 rtx result = gen_reg_rtx(SImode);
5413 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5414 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5415 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5416 }
5417
5418 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5419
5420 emit_move_insn (stack_bot, chain);
5421
5422 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5423}
5424
5425void
5426spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5427{
5428 static unsigned char arr[16] =
5429 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5430 rtx temp = gen_reg_rtx (SImode);
5431 rtx temp2 = gen_reg_rtx (SImode);
5432 rtx temp3 = gen_reg_rtx (V4SImode);
5433 rtx temp4 = gen_reg_rtx (V4SImode);
5434 rtx pat = gen_reg_rtx (TImode);
5435 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5436
5437 /* Restore the backchain from the first word, sp from the second. */
5438 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5439 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5440
5441 emit_move_insn (pat, array_to_constant (TImode, arr));
5442
5443 /* Compute Available Stack Size for sp */
5444 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5445 emit_insn (gen_shufb (temp3, temp, temp, pat));
5446
5447 /* Compute Available Stack Size for back chain */
5448 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5449 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5450 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5451
5452 emit_insn (gen_addv4si3 (sp, sp, temp3));
5453 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5454}
5455
5456static void
5457spu_init_libfuncs (void)
5458{
5459 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5460 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5461 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5462 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5463 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5464 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5465 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5466 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5467 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4d3aeb29 5468 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
644459d0 5469 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5470 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5471
5472 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5473 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5474
5825ec3f 5475 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5476 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5477 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5478 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5479 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5480 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5481 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5482 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5483 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5484 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5485 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5486 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5487
19a53068 5488 set_optab_libfunc (smul_optab, TImode, "__multi3");
5489 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5490 set_optab_libfunc (smod_optab, TImode, "__modti3");
5491 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5492 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5493 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5494}
5495
5496/* Make a subreg, stripping any existing subreg. We could possibly just
5497 call simplify_subreg, but in this case we know what we want. */
5498rtx
3754d046 5499spu_gen_subreg (machine_mode mode, rtx x)
644459d0 5500{
5501 if (GET_CODE (x) == SUBREG)
5502 x = SUBREG_REG (x);
5503 if (GET_MODE (x) == mode)
5504 return x;
5505 return gen_rtx_SUBREG (mode, x, 0);
5506}
5507
5508static bool
fb80456a 5509spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5510{
5511 return (TYPE_MODE (type) == BLKmode
5512 && ((type) == 0
5513 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5514 || int_size_in_bytes (type) >
5515 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5516}
5517\f
5518/* Create the built-in types and functions */
5519
c2233b46 5520enum spu_function_code
5521{
5522#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5523#include "spu-builtins.def"
5524#undef DEF_BUILTIN
5525 NUM_SPU_BUILTINS
5526};
5527
5528extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5529
644459d0 5530struct spu_builtin_description spu_builtins[] = {
5531#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5532 {fcode, icode, name, type, params},
644459d0 5533#include "spu-builtins.def"
5534#undef DEF_BUILTIN
5535};
5536
0c5c4d59 5537static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5538
5539/* Returns the spu builtin decl for CODE. */
e6925042 5540
5541static tree
5542spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5543{
5544 if (code >= NUM_SPU_BUILTINS)
5545 return error_mark_node;
5546
0c5c4d59 5547 return spu_builtin_decls[code];
e6925042 5548}
5549
5550
644459d0 5551static void
5552spu_init_builtins (void)
5553{
5554 struct spu_builtin_description *d;
5555 unsigned int i;
5556
5557 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5558 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5559 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5560 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5561 V4SF_type_node = build_vector_type (float_type_node, 4);
5562 V2DF_type_node = build_vector_type (double_type_node, 2);
5563
5564 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5565 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5566 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5567 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5568
c4ecce0c 5569 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5570
5571 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5572 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5573 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5574 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5575 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5576 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5577 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5578 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5579 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5580 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5581 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5582 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5583
5584 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5585 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5586 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5587 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5588 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5589 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5590 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5591 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5592
5593 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5594 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5595
5596 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5597
5598 spu_builtin_types[SPU_BTI_PTR] =
5599 build_pointer_type (build_qualified_type
5600 (void_type_node,
5601 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5602
5603 /* For each builtin we build a new prototype. The tree code will make
5604 sure nodes are shared. */
5605 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5606 {
5607 tree p;
5608 char name[64]; /* build_function will make a copy. */
5609 int parm;
5610
5611 if (d->name == 0)
5612 continue;
5613
5dfbd18f 5614 /* Find last parm. */
644459d0 5615 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5616 ;
644459d0 5617
5618 p = void_list_node;
5619 while (parm > 1)
5620 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5621
5622 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5623
5624 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5625 spu_builtin_decls[i] =
3726fe5e 5626 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5627 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5628 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5629
5630 /* These builtins don't throw. */
0c5c4d59 5631 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5632 }
5633}
5634
cf31d486 5635void
5636spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5637{
5638 static unsigned char arr[16] =
5639 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5640
5641 rtx temp = gen_reg_rtx (Pmode);
5642 rtx temp2 = gen_reg_rtx (V4SImode);
5643 rtx temp3 = gen_reg_rtx (V4SImode);
5644 rtx pat = gen_reg_rtx (TImode);
5645 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5646
5647 emit_move_insn (pat, array_to_constant (TImode, arr));
5648
5649 /* Restore the sp. */
5650 emit_move_insn (temp, op1);
5651 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5652
5653 /* Compute available stack size for sp. */
5654 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5655 emit_insn (gen_shufb (temp3, temp, temp, pat));
5656
5657 emit_insn (gen_addv4si3 (sp, sp, temp3));
5658 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5659}
5660
644459d0 5661int
5662spu_safe_dma (HOST_WIDE_INT channel)
5663{
006e4b96 5664 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5665}
5666
5667void
5668spu_builtin_splats (rtx ops[])
5669{
3754d046 5670 machine_mode mode = GET_MODE (ops[0]);
644459d0 5671 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5672 {
5673 unsigned char arr[16];
5674 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5675 emit_move_insn (ops[0], array_to_constant (mode, arr));
5676 }
644459d0 5677 else
5678 {
5679 rtx reg = gen_reg_rtx (TImode);
5680 rtx shuf;
5681 if (GET_CODE (ops[1]) != REG
5682 && GET_CODE (ops[1]) != SUBREG)
5683 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5684 switch (mode)
5685 {
5686 case V2DImode:
5687 case V2DFmode:
5688 shuf =
5689 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5690 TImode);
5691 break;
5692 case V4SImode:
5693 case V4SFmode:
5694 shuf =
5695 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5696 TImode);
5697 break;
5698 case V8HImode:
5699 shuf =
5700 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5701 TImode);
5702 break;
5703 case V16QImode:
5704 shuf =
5705 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5706 TImode);
5707 break;
5708 default:
5709 abort ();
5710 }
5711 emit_move_insn (reg, shuf);
5712 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5713 }
5714}
5715
5716void
5717spu_builtin_extract (rtx ops[])
5718{
3754d046 5719 machine_mode mode;
644459d0 5720 rtx rot, from, tmp;
5721
5722 mode = GET_MODE (ops[1]);
5723
5724 if (GET_CODE (ops[2]) == CONST_INT)
5725 {
5726 switch (mode)
5727 {
5728 case V16QImode:
5729 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5730 break;
5731 case V8HImode:
5732 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5733 break;
5734 case V4SFmode:
5735 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5736 break;
5737 case V4SImode:
5738 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5739 break;
5740 case V2DImode:
5741 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5742 break;
5743 case V2DFmode:
5744 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5745 break;
5746 default:
5747 abort ();
5748 }
5749 return;
5750 }
5751
5752 from = spu_gen_subreg (TImode, ops[1]);
5753 rot = gen_reg_rtx (TImode);
5754 tmp = gen_reg_rtx (SImode);
5755
5756 switch (mode)
5757 {
5758 case V16QImode:
5759 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5760 break;
5761 case V8HImode:
5762 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5763 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5764 break;
5765 case V4SFmode:
5766 case V4SImode:
5767 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5768 break;
5769 case V2DImode:
5770 case V2DFmode:
5771 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5772 break;
5773 default:
5774 abort ();
5775 }
5776 emit_insn (gen_rotqby_ti (rot, from, tmp));
5777
5778 emit_insn (gen_spu_convert (ops[0], rot));
5779}
5780
5781void
5782spu_builtin_insert (rtx ops[])
5783{
3754d046 5784 machine_mode mode = GET_MODE (ops[0]);
5785 machine_mode imode = GET_MODE_INNER (mode);
644459d0 5786 rtx mask = gen_reg_rtx (TImode);
5787 rtx offset;
5788
5789 if (GET_CODE (ops[3]) == CONST_INT)
5790 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5791 else
5792 {
5793 offset = gen_reg_rtx (SImode);
5794 emit_insn (gen_mulsi3
5795 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5796 }
5797 emit_insn (gen_cpat
5798 (mask, stack_pointer_rtx, offset,
5799 GEN_INT (GET_MODE_SIZE (imode))));
5800 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5801}
5802
5803void
5804spu_builtin_promote (rtx ops[])
5805{
3754d046 5806 machine_mode mode, imode;
644459d0 5807 rtx rot, from, offset;
5808 HOST_WIDE_INT pos;
5809
5810 mode = GET_MODE (ops[0]);
5811 imode = GET_MODE_INNER (mode);
5812
5813 from = gen_reg_rtx (TImode);
5814 rot = spu_gen_subreg (TImode, ops[0]);
5815
5816 emit_insn (gen_spu_convert (from, ops[1]));
5817
5818 if (GET_CODE (ops[2]) == CONST_INT)
5819 {
5820 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5821 if (GET_MODE_SIZE (imode) < 4)
5822 pos += 4 - GET_MODE_SIZE (imode);
5823 offset = GEN_INT (pos & 15);
5824 }
5825 else
5826 {
5827 offset = gen_reg_rtx (SImode);
5828 switch (mode)
5829 {
5830 case V16QImode:
5831 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5832 break;
5833 case V8HImode:
5834 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5835 emit_insn (gen_addsi3 (offset, offset, offset));
5836 break;
5837 case V4SFmode:
5838 case V4SImode:
5839 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5840 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5841 break;
5842 case V2DImode:
5843 case V2DFmode:
5844 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5845 break;
5846 default:
5847 abort ();
5848 }
5849 }
5850 emit_insn (gen_rotqby_ti (rot, from, offset));
5851}
5852
e96f2783 5853static void
5854spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5855{
e96f2783 5856 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5857 rtx shuf = gen_reg_rtx (V4SImode);
5858 rtx insn = gen_reg_rtx (V4SImode);
5859 rtx shufc;
5860 rtx insnc;
5861 rtx mem;
5862
5863 fnaddr = force_reg (SImode, fnaddr);
5864 cxt = force_reg (SImode, cxt);
5865
5866 if (TARGET_LARGE_MEM)
5867 {
5868 rtx rotl = gen_reg_rtx (V4SImode);
5869 rtx mask = gen_reg_rtx (V4SImode);
5870 rtx bi = gen_reg_rtx (SImode);
e96f2783 5871 static unsigned char const shufa[16] = {
644459d0 5872 2, 3, 0, 1, 18, 19, 16, 17,
5873 0, 1, 2, 3, 16, 17, 18, 19
5874 };
e96f2783 5875 static unsigned char const insna[16] = {
644459d0 5876 0x41, 0, 0, 79,
5877 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5878 0x60, 0x80, 0, 79,
5879 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5880 };
5881
5882 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5883 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5884
5885 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5886 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5887 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5888 emit_insn (gen_selb (insn, insnc, rotl, mask));
5889
e96f2783 5890 mem = adjust_address (m_tramp, V4SImode, 0);
5891 emit_move_insn (mem, insn);
644459d0 5892
5893 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5894 mem = adjust_address (m_tramp, Pmode, 16);
5895 emit_move_insn (mem, bi);
644459d0 5896 }
5897 else
5898 {
5899 rtx scxt = gen_reg_rtx (SImode);
5900 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5901 static unsigned char const insna[16] = {
644459d0 5902 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5903 0x30, 0, 0, 0,
5904 0, 0, 0, 0,
5905 0, 0, 0, 0
5906 };
5907
5908 shufc = gen_reg_rtx (TImode);
5909 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5910
5911 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5912 fits 18 bits and the last 4 are zeros. This will be true if
5913 the stack pointer is initialized to 0x3fff0 at program start,
5914 otherwise the ila instruction will be garbage. */
5915
5916 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5917 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5918 emit_insn (gen_cpat
5919 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5920 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5921 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5922
e96f2783 5923 mem = adjust_address (m_tramp, V4SImode, 0);
5924 emit_move_insn (mem, insn);
644459d0 5925 }
5926 emit_insn (gen_sync ());
5927}
5928
08c6cbd2 5929static bool
5930spu_warn_func_return (tree decl)
5931{
5932 /* Naked functions are implemented entirely in assembly, including the
5933 return sequence, so suppress warnings about this. */
5934 return !spu_naked_function_p (decl);
5935}
5936
644459d0 5937void
5938spu_expand_sign_extend (rtx ops[])
5939{
5940 unsigned char arr[16];
5941 rtx pat = gen_reg_rtx (TImode);
5942 rtx sign, c;
5943 int i, last;
5944 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5945 if (GET_MODE (ops[1]) == QImode)
5946 {
5947 sign = gen_reg_rtx (HImode);
5948 emit_insn (gen_extendqihi2 (sign, ops[1]));
5949 for (i = 0; i < 16; i++)
5950 arr[i] = 0x12;
5951 arr[last] = 0x13;
5952 }
5953 else
5954 {
5955 for (i = 0; i < 16; i++)
5956 arr[i] = 0x10;
5957 switch (GET_MODE (ops[1]))
5958 {
5959 case HImode:
5960 sign = gen_reg_rtx (SImode);
5961 emit_insn (gen_extendhisi2 (sign, ops[1]));
5962 arr[last] = 0x03;
5963 arr[last - 1] = 0x02;
5964 break;
5965 case SImode:
5966 sign = gen_reg_rtx (SImode);
5967 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5968 for (i = 0; i < 4; i++)
5969 arr[last - i] = 3 - i;
5970 break;
5971 case DImode:
5972 sign = gen_reg_rtx (SImode);
5973 c = gen_reg_rtx (SImode);
5974 emit_insn (gen_spu_convert (c, ops[1]));
5975 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5976 for (i = 0; i < 8; i++)
5977 arr[last - i] = 7 - i;
5978 break;
5979 default:
5980 abort ();
5981 }
5982 }
5983 emit_move_insn (pat, array_to_constant (TImode, arr));
5984 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5985}
5986
5987/* expand vector initialization. If there are any constant parts,
5988 load constant parts first. Then load any non-constant parts. */
5989void
5990spu_expand_vector_init (rtx target, rtx vals)
5991{
3754d046 5992 machine_mode mode = GET_MODE (target);
644459d0 5993 int n_elts = GET_MODE_NUNITS (mode);
5994 int n_var = 0;
5995 bool all_same = true;
790c536c 5996 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5997 int i;
5998
5999 first = XVECEXP (vals, 0, 0);
6000 for (i = 0; i < n_elts; ++i)
6001 {
6002 x = XVECEXP (vals, 0, i);
e442af0b 6003 if (!(CONST_INT_P (x)
6004 || GET_CODE (x) == CONST_DOUBLE
6005 || GET_CODE (x) == CONST_FIXED))
644459d0 6006 ++n_var;
6007 else
6008 {
6009 if (first_constant == NULL_RTX)
6010 first_constant = x;
6011 }
6012 if (i > 0 && !rtx_equal_p (x, first))
6013 all_same = false;
6014 }
6015
6016 /* if all elements are the same, use splats to repeat elements */
6017 if (all_same)
6018 {
6019 if (!CONSTANT_P (first)
6020 && !register_operand (first, GET_MODE (x)))
6021 first = force_reg (GET_MODE (first), first);
6022 emit_insn (gen_spu_splats (target, first));
6023 return;
6024 }
6025
6026 /* load constant parts */
6027 if (n_var != n_elts)
6028 {
6029 if (n_var == 0)
6030 {
6031 emit_move_insn (target,
6032 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6033 }
6034 else
6035 {
6036 rtx constant_parts_rtx = copy_rtx (vals);
6037
6038 gcc_assert (first_constant != NULL_RTX);
6039 /* fill empty slots with the first constant, this increases
6040 our chance of using splats in the recursive call below. */
6041 for (i = 0; i < n_elts; ++i)
e442af0b 6042 {
6043 x = XVECEXP (constant_parts_rtx, 0, i);
6044 if (!(CONST_INT_P (x)
6045 || GET_CODE (x) == CONST_DOUBLE
6046 || GET_CODE (x) == CONST_FIXED))
6047 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6048 }
644459d0 6049
6050 spu_expand_vector_init (target, constant_parts_rtx);
6051 }
6052 }
6053
6054 /* load variable parts */
6055 if (n_var != 0)
6056 {
6057 rtx insert_operands[4];
6058
6059 insert_operands[0] = target;
6060 insert_operands[2] = target;
6061 for (i = 0; i < n_elts; ++i)
6062 {
6063 x = XVECEXP (vals, 0, i);
e442af0b 6064 if (!(CONST_INT_P (x)
6065 || GET_CODE (x) == CONST_DOUBLE
6066 || GET_CODE (x) == CONST_FIXED))
644459d0 6067 {
6068 if (!register_operand (x, GET_MODE (x)))
6069 x = force_reg (GET_MODE (x), x);
6070 insert_operands[1] = x;
6071 insert_operands[3] = GEN_INT (i);
6072 spu_builtin_insert (insert_operands);
6073 }
6074 }
6075 }
6076}
6352eedf 6077
5474166e 6078/* Return insn index for the vector compare instruction for given CODE,
6079 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6080
6081static int
6082get_vec_cmp_insn (enum rtx_code code,
3754d046 6083 machine_mode dest_mode,
6084 machine_mode op_mode)
5474166e 6085
6086{
6087 switch (code)
6088 {
6089 case EQ:
6090 if (dest_mode == V16QImode && op_mode == V16QImode)
6091 return CODE_FOR_ceq_v16qi;
6092 if (dest_mode == V8HImode && op_mode == V8HImode)
6093 return CODE_FOR_ceq_v8hi;
6094 if (dest_mode == V4SImode && op_mode == V4SImode)
6095 return CODE_FOR_ceq_v4si;
6096 if (dest_mode == V4SImode && op_mode == V4SFmode)
6097 return CODE_FOR_ceq_v4sf;
6098 if (dest_mode == V2DImode && op_mode == V2DFmode)
6099 return CODE_FOR_ceq_v2df;
6100 break;
6101 case GT:
6102 if (dest_mode == V16QImode && op_mode == V16QImode)
6103 return CODE_FOR_cgt_v16qi;
6104 if (dest_mode == V8HImode && op_mode == V8HImode)
6105 return CODE_FOR_cgt_v8hi;
6106 if (dest_mode == V4SImode && op_mode == V4SImode)
6107 return CODE_FOR_cgt_v4si;
6108 if (dest_mode == V4SImode && op_mode == V4SFmode)
6109 return CODE_FOR_cgt_v4sf;
6110 if (dest_mode == V2DImode && op_mode == V2DFmode)
6111 return CODE_FOR_cgt_v2df;
6112 break;
6113 case GTU:
6114 if (dest_mode == V16QImode && op_mode == V16QImode)
6115 return CODE_FOR_clgt_v16qi;
6116 if (dest_mode == V8HImode && op_mode == V8HImode)
6117 return CODE_FOR_clgt_v8hi;
6118 if (dest_mode == V4SImode && op_mode == V4SImode)
6119 return CODE_FOR_clgt_v4si;
6120 break;
6121 default:
6122 break;
6123 }
6124 return -1;
6125}
6126
6127/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6128 DMODE is expected destination mode. This is a recursive function. */
6129
6130static rtx
6131spu_emit_vector_compare (enum rtx_code rcode,
6132 rtx op0, rtx op1,
3754d046 6133 machine_mode dmode)
5474166e 6134{
6135 int vec_cmp_insn;
6136 rtx mask;
3754d046 6137 machine_mode dest_mode;
6138 machine_mode op_mode = GET_MODE (op1);
5474166e 6139
6140 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6141
6142 /* Floating point vector compare instructions uses destination V4SImode.
6143 Double floating point vector compare instructions uses destination V2DImode.
6144 Move destination to appropriate mode later. */
6145 if (dmode == V4SFmode)
6146 dest_mode = V4SImode;
6147 else if (dmode == V2DFmode)
6148 dest_mode = V2DImode;
6149 else
6150 dest_mode = dmode;
6151
6152 mask = gen_reg_rtx (dest_mode);
6153 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6154
6155 if (vec_cmp_insn == -1)
6156 {
6157 bool swap_operands = false;
6158 bool try_again = false;
6159 switch (rcode)
6160 {
6161 case LT:
6162 rcode = GT;
6163 swap_operands = true;
6164 try_again = true;
6165 break;
6166 case LTU:
6167 rcode = GTU;
6168 swap_operands = true;
6169 try_again = true;
6170 break;
6171 case NE:
e20943d4 6172 case UNEQ:
6173 case UNLE:
6174 case UNLT:
6175 case UNGE:
6176 case UNGT:
6177 case UNORDERED:
5474166e 6178 /* Treat A != B as ~(A==B). */
6179 {
e20943d4 6180 enum rtx_code rev_code;
5474166e 6181 enum insn_code nor_code;
e20943d4 6182 rtx rev_mask;
6183
6184 rev_code = reverse_condition_maybe_unordered (rcode);
6185 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6186
d6bf3b14 6187 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6188 gcc_assert (nor_code != CODE_FOR_nothing);
e20943d4 6189 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
5474166e 6190 if (dmode != dest_mode)
6191 {
6192 rtx temp = gen_reg_rtx (dest_mode);
6193 convert_move (temp, mask, 0);
6194 return temp;
6195 }
6196 return mask;
6197 }
6198 break;
6199 case GE:
6200 case GEU:
6201 case LE:
6202 case LEU:
6203 /* Try GT/GTU/LT/LTU OR EQ */
6204 {
6205 rtx c_rtx, eq_rtx;
6206 enum insn_code ior_code;
6207 enum rtx_code new_code;
6208
6209 switch (rcode)
6210 {
6211 case GE: new_code = GT; break;
6212 case GEU: new_code = GTU; break;
6213 case LE: new_code = LT; break;
6214 case LEU: new_code = LTU; break;
6215 default:
6216 gcc_unreachable ();
6217 }
6218
6219 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6220 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6221
d6bf3b14 6222 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6223 gcc_assert (ior_code != CODE_FOR_nothing);
6224 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6225 if (dmode != dest_mode)
6226 {
6227 rtx temp = gen_reg_rtx (dest_mode);
6228 convert_move (temp, mask, 0);
6229 return temp;
6230 }
6231 return mask;
6232 }
6233 break;
e20943d4 6234 case LTGT:
6235 /* Try LT OR GT */
6236 {
6237 rtx lt_rtx, gt_rtx;
6238 enum insn_code ior_code;
6239
6240 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6241 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6242
6243 ior_code = optab_handler (ior_optab, dest_mode);
6244 gcc_assert (ior_code != CODE_FOR_nothing);
6245 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6246 if (dmode != dest_mode)
6247 {
6248 rtx temp = gen_reg_rtx (dest_mode);
6249 convert_move (temp, mask, 0);
6250 return temp;
6251 }
6252 return mask;
6253 }
6254 break;
6255 case ORDERED:
6256 /* Implement as (A==A) & (B==B) */
6257 {
6258 rtx a_rtx, b_rtx;
6259 enum insn_code and_code;
6260
6261 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6262 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6263
6264 and_code = optab_handler (and_optab, dest_mode);
6265 gcc_assert (and_code != CODE_FOR_nothing);
6266 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6267 if (dmode != dest_mode)
6268 {
6269 rtx temp = gen_reg_rtx (dest_mode);
6270 convert_move (temp, mask, 0);
6271 return temp;
6272 }
6273 return mask;
6274 }
6275 break;
5474166e 6276 default:
6277 gcc_unreachable ();
6278 }
6279
6280 /* You only get two chances. */
6281 if (try_again)
6282 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6283
6284 gcc_assert (vec_cmp_insn != -1);
6285
6286 if (swap_operands)
6287 {
6288 rtx tmp;
6289 tmp = op0;
6290 op0 = op1;
6291 op1 = tmp;
6292 }
6293 }
6294
6295 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6296 if (dmode != dest_mode)
6297 {
6298 rtx temp = gen_reg_rtx (dest_mode);
6299 convert_move (temp, mask, 0);
6300 return temp;
6301 }
6302 return mask;
6303}
6304
6305
6306/* Emit vector conditional expression.
6307 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6308 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6309
6310int
6311spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6312 rtx cond, rtx cc_op0, rtx cc_op1)
6313{
3754d046 6314 machine_mode dest_mode = GET_MODE (dest);
5474166e 6315 enum rtx_code rcode = GET_CODE (cond);
6316 rtx mask;
6317
6318 /* Get the vector mask for the given relational operations. */
6319 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6320
6321 emit_insn(gen_selb (dest, op2, op1, mask));
6322
6323 return 1;
6324}
6325
6352eedf 6326static rtx
3754d046 6327spu_force_reg (machine_mode mode, rtx op)
6352eedf 6328{
6329 rtx x, r;
6330 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6331 {
6332 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6333 || GET_MODE (op) == BLKmode)
6334 return force_reg (mode, convert_to_mode (mode, op, 0));
6335 abort ();
6336 }
6337
6338 r = force_reg (GET_MODE (op), op);
6339 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6340 {
6341 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6342 if (x)
6343 return x;
6344 }
6345
6346 x = gen_reg_rtx (mode);
6347 emit_insn (gen_spu_convert (x, r));
6348 return x;
6349}
6350
6351static void
6352spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6353{
6354 HOST_WIDE_INT v = 0;
6355 int lsbits;
6356 /* Check the range of immediate operands. */
6357 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6358 {
6359 int range = p - SPU_BTI_7;
5df189be 6360
6361 if (!CONSTANT_P (op))
bf776685 6362 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6363 d->name,
6364 spu_builtin_range[range].low, spu_builtin_range[range].high);
6365
6366 if (GET_CODE (op) == CONST
6367 && (GET_CODE (XEXP (op, 0)) == PLUS
6368 || GET_CODE (XEXP (op, 0)) == MINUS))
6369 {
6370 v = INTVAL (XEXP (XEXP (op, 0), 1));
6371 op = XEXP (XEXP (op, 0), 0);
6372 }
6373 else if (GET_CODE (op) == CONST_INT)
6374 v = INTVAL (op);
5df189be 6375 else if (GET_CODE (op) == CONST_VECTOR
6376 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6377 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6378
6379 /* The default for v is 0 which is valid in every range. */
6380 if (v < spu_builtin_range[range].low
6381 || v > spu_builtin_range[range].high)
bf776685 6382 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6383 d->name,
6384 spu_builtin_range[range].low, spu_builtin_range[range].high,
6385 v);
6352eedf 6386
6387 switch (p)
6388 {
6389 case SPU_BTI_S10_4:
6390 lsbits = 4;
6391 break;
6392 case SPU_BTI_U16_2:
6393 /* This is only used in lqa, and stqa. Even though the insns
6394 encode 16 bits of the address (all but the 2 least
6395 significant), only 14 bits are used because it is masked to
6396 be 16 byte aligned. */
6397 lsbits = 4;
6398 break;
6399 case SPU_BTI_S16_2:
6400 /* This is used for lqr and stqr. */
6401 lsbits = 2;
6402 break;
6403 default:
6404 lsbits = 0;
6405 }
6406
6407 if (GET_CODE (op) == LABEL_REF
6408 || (GET_CODE (op) == SYMBOL_REF
6409 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6410 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6411 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6412 d->name);
6413 }
6414}
6415
6416
70ca06f8 6417static int
5df189be 6418expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6419 rtx target, rtx ops[])
6420{
bc620c5c 6421 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6422 int i = 0, a;
6352eedf 6423
6424 /* Expand the arguments into rtl. */
6425
6426 if (d->parm[0] != SPU_BTI_VOID)
6427 ops[i++] = target;
6428
70ca06f8 6429 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6430 {
5df189be 6431 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6432 if (arg == 0)
6433 abort ();
b9c74b4d 6434 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6435 }
70ca06f8 6436
32f79657 6437 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6438 return i;
6352eedf 6439}
6440
6441static rtx
6442spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6443 tree exp, rtx target)
6352eedf 6444{
6445 rtx pat;
6446 rtx ops[8];
bc620c5c 6447 enum insn_code icode = (enum insn_code) d->icode;
3754d046 6448 machine_mode mode, tmode;
6352eedf 6449 int i, p;
70ca06f8 6450 int n_operands;
6352eedf 6451 tree return_type;
6452
6453 /* Set up ops[] with values from arglist. */
70ca06f8 6454 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6455
6456 /* Handle the target operand which must be operand 0. */
6457 i = 0;
6458 if (d->parm[0] != SPU_BTI_VOID)
6459 {
6460
6461 /* We prefer the mode specified for the match_operand otherwise
6462 use the mode from the builtin function prototype. */
6463 tmode = insn_data[d->icode].operand[0].mode;
6464 if (tmode == VOIDmode)
6465 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6466
6467 /* Try to use target because not using it can lead to extra copies
6468 and when we are using all of the registers extra copies leads
6469 to extra spills. */
6470 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6471 ops[0] = target;
6472 else
6473 target = ops[0] = gen_reg_rtx (tmode);
6474
6475 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6476 abort ();
6477
6478 i++;
6479 }
6480
a76866d3 6481 if (d->fcode == SPU_MASK_FOR_LOAD)
6482 {
3754d046 6483 machine_mode mode = insn_data[icode].operand[1].mode;
a76866d3 6484 tree arg;
6485 rtx addr, op, pat;
6486
6487 /* get addr */
5df189be 6488 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6489 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6490 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6491 addr = memory_address (mode, op);
6492
6493 /* negate addr */
6494 op = gen_reg_rtx (GET_MODE (addr));
6495 emit_insn (gen_rtx_SET (VOIDmode, op,
6496 gen_rtx_NEG (GET_MODE (addr), addr)));
6497 op = gen_rtx_MEM (mode, op);
6498
6499 pat = GEN_FCN (icode) (target, op);
6500 if (!pat)
6501 return 0;
6502 emit_insn (pat);
6503 return target;
6504 }
6505
6352eedf 6506 /* Ignore align_hint, but still expand it's args in case they have
6507 side effects. */
6508 if (icode == CODE_FOR_spu_align_hint)
6509 return 0;
6510
6511 /* Handle the rest of the operands. */
70ca06f8 6512 for (p = 1; i < n_operands; i++, p++)
6352eedf 6513 {
6514 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6515 mode = insn_data[d->icode].operand[i].mode;
6516 else
6517 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6518
6519 /* mode can be VOIDmode here for labels */
6520
6521 /* For specific intrinsics with an immediate operand, e.g.,
6522 si_ai(), we sometimes need to convert the scalar argument to a
6523 vector argument by splatting the scalar. */
6524 if (VECTOR_MODE_P (mode)
6525 && (GET_CODE (ops[i]) == CONST_INT
6526 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6527 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6528 {
6529 if (GET_CODE (ops[i]) == CONST_INT)
6530 ops[i] = spu_const (mode, INTVAL (ops[i]));
6531 else
6532 {
6533 rtx reg = gen_reg_rtx (mode);
3754d046 6534 machine_mode imode = GET_MODE_INNER (mode);
6352eedf 6535 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6536 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6537 if (imode != GET_MODE (ops[i]))
6538 ops[i] = convert_to_mode (imode, ops[i],
6539 TYPE_UNSIGNED (spu_builtin_types
6540 [d->parm[i]]));
6541 emit_insn (gen_spu_splats (reg, ops[i]));
6542 ops[i] = reg;
6543 }
6544 }
6545
5df189be 6546 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6547
6352eedf 6548 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6549 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6550 }
6551
70ca06f8 6552 switch (n_operands)
6352eedf 6553 {
6554 case 0:
6555 pat = GEN_FCN (icode) (0);
6556 break;
6557 case 1:
6558 pat = GEN_FCN (icode) (ops[0]);
6559 break;
6560 case 2:
6561 pat = GEN_FCN (icode) (ops[0], ops[1]);
6562 break;
6563 case 3:
6564 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6565 break;
6566 case 4:
6567 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6568 break;
6569 case 5:
6570 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6571 break;
6572 case 6:
6573 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6574 break;
6575 default:
6576 abort ();
6577 }
6578
6579 if (!pat)
6580 abort ();
6581
6582 if (d->type == B_CALL || d->type == B_BISLED)
6583 emit_call_insn (pat);
6584 else if (d->type == B_JUMP)
6585 {
6586 emit_jump_insn (pat);
6587 emit_barrier ();
6588 }
6589 else
6590 emit_insn (pat);
6591
6592 return_type = spu_builtin_types[d->parm[0]];
6593 if (d->parm[0] != SPU_BTI_VOID
6594 && GET_MODE (target) != TYPE_MODE (return_type))
6595 {
6596 /* target is the return value. It should always be the mode of
6597 the builtin function prototype. */
6598 target = spu_force_reg (TYPE_MODE (return_type), target);
6599 }
6600
6601 return target;
6602}
6603
6604rtx
6605spu_expand_builtin (tree exp,
6606 rtx target,
6607 rtx subtarget ATTRIBUTE_UNUSED,
3754d046 6608 machine_mode mode ATTRIBUTE_UNUSED,
6352eedf 6609 int ignore ATTRIBUTE_UNUSED)
6610{
5df189be 6611 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6612 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6613 struct spu_builtin_description *d;
6614
6615 if (fcode < NUM_SPU_BUILTINS)
6616 {
6617 d = &spu_builtins[fcode];
6618
5df189be 6619 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6620 }
6621 abort ();
6622}
6623
a76866d3 6624/* Implement targetm.vectorize.builtin_mask_for_load. */
6625static tree
6626spu_builtin_mask_for_load (void)
6627{
0c5c4d59 6628 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6629}
5df189be 6630
a28df51d 6631/* Implement targetm.vectorize.builtin_vectorization_cost. */
6632static int
0822b158 6633spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
d13adc77 6634 tree vectype,
0822b158 6635 int misalign ATTRIBUTE_UNUSED)
559093aa 6636{
d13adc77 6637 unsigned elements;
6638
559093aa 6639 switch (type_of_cost)
6640 {
6641 case scalar_stmt:
6642 case vector_stmt:
6643 case vector_load:
6644 case vector_store:
6645 case vec_to_scalar:
6646 case scalar_to_vec:
6647 case cond_branch_not_taken:
6648 case vec_perm:
5df2530b 6649 case vec_promote_demote:
559093aa 6650 return 1;
6651
6652 case scalar_store:
6653 return 10;
6654
6655 case scalar_load:
6656 /* Load + rotate. */
6657 return 2;
6658
6659 case unaligned_load:
6660 return 2;
6661
6662 case cond_branch_taken:
6663 return 6;
6664
d13adc77 6665 case vec_construct:
6666 elements = TYPE_VECTOR_SUBPARTS (vectype);
6667 return elements / 2 + 1;
6668
559093aa 6669 default:
6670 gcc_unreachable ();
6671 }
a28df51d 6672}
6673
4db2b577 6674/* Implement targetm.vectorize.init_cost. */
6675
61b33788 6676static void *
4db2b577 6677spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6678{
f97dec81 6679 unsigned *cost = XNEWVEC (unsigned, 3);
6680 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
4db2b577 6681 return cost;
6682}
6683
6684/* Implement targetm.vectorize.add_stmt_cost. */
6685
61b33788 6686static unsigned
4db2b577 6687spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
f97dec81 6688 struct _stmt_vec_info *stmt_info, int misalign,
6689 enum vect_cost_model_location where)
4db2b577 6690{
6691 unsigned *cost = (unsigned *) data;
6692 unsigned retval = 0;
6693
6694 if (flag_vect_cost_model)
6695 {
f97dec81 6696 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4db2b577 6697 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6698
6699 /* Statements in an inner loop relative to the loop being
6700 vectorized are weighted more heavily. The value here is
6701 arbitrary and could potentially be improved with analysis. */
f97dec81 6702 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4db2b577 6703 count *= 50; /* FIXME. */
6704
6705 retval = (unsigned) (count * stmt_cost);
f97dec81 6706 cost[where] += retval;
4db2b577 6707 }
6708
6709 return retval;
6710}
6711
6712/* Implement targetm.vectorize.finish_cost. */
6713
f97dec81 6714static void
6715spu_finish_cost (void *data, unsigned *prologue_cost,
6716 unsigned *body_cost, unsigned *epilogue_cost)
4db2b577 6717{
f97dec81 6718 unsigned *cost = (unsigned *) data;
6719 *prologue_cost = cost[vect_prologue];
6720 *body_cost = cost[vect_body];
6721 *epilogue_cost = cost[vect_epilogue];
4db2b577 6722}
6723
6724/* Implement targetm.vectorize.destroy_cost_data. */
6725
61b33788 6726static void
4db2b577 6727spu_destroy_cost_data (void *data)
6728{
6729 free (data);
6730}
6731
0e87db76 6732/* Return true iff, data reference of TYPE can reach vector alignment (16)
6733 after applying N number of iterations. This routine does not determine
6734 how may iterations are required to reach desired alignment. */
6735
6736static bool
a9f1838b 6737spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6738{
6739 if (is_packed)
6740 return false;
6741
6742 /* All other types are naturally aligned. */
6743 return true;
6744}
6745
6cf5579e 6746/* Return the appropriate mode for a named address pointer. */
3754d046 6747static machine_mode
6cf5579e 6748spu_addr_space_pointer_mode (addr_space_t addrspace)
6749{
6750 switch (addrspace)
6751 {
6752 case ADDR_SPACE_GENERIC:
6753 return ptr_mode;
6754 case ADDR_SPACE_EA:
6755 return EAmode;
6756 default:
6757 gcc_unreachable ();
6758 }
6759}
6760
6761/* Return the appropriate mode for a named address address. */
3754d046 6762static machine_mode
6cf5579e 6763spu_addr_space_address_mode (addr_space_t addrspace)
6764{
6765 switch (addrspace)
6766 {
6767 case ADDR_SPACE_GENERIC:
6768 return Pmode;
6769 case ADDR_SPACE_EA:
6770 return EAmode;
6771 default:
6772 gcc_unreachable ();
6773 }
6774}
6775
6776/* Determine if one named address space is a subset of another. */
6777
6778static bool
6779spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6780{
6781 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6782 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6783
6784 if (subset == superset)
6785 return true;
6786
6787 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6788 being subsets but instead as disjoint address spaces. */
6789 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6790 return false;
6791
6792 else
6793 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6794}
6795
6796/* Convert from one address space to another. */
6797static rtx
6798spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6799{
6800 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6801 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6802
6803 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6804 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6805
6806 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6807 {
6808 rtx result, ls;
6809
6810 ls = gen_const_mem (DImode,
6811 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6812 set_mem_align (ls, 128);
6813
6814 result = gen_reg_rtx (Pmode);
6815 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6816 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6817 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6818 ls, const0_rtx, Pmode, 1);
6819
6820 emit_insn (gen_subsi3 (result, op, ls));
6821
6822 return result;
6823 }
6824
6825 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6826 {
6827 rtx result, ls;
6828
6829 ls = gen_const_mem (DImode,
6830 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6831 set_mem_align (ls, 128);
6832
6833 result = gen_reg_rtx (EAmode);
6834 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6835 op = force_reg (Pmode, op);
6836 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6837 ls, const0_rtx, EAmode, 1);
6838 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6839
6840 if (EAmode == SImode)
6841 emit_insn (gen_addsi3 (result, op, ls));
6842 else
6843 emit_insn (gen_adddi3 (result, op, ls));
6844
6845 return result;
6846 }
6847
6848 else
6849 gcc_unreachable ();
6850}
6851
6852
d52fd16a 6853/* Count the total number of instructions in each pipe and return the
6854 maximum, which is used as the Minimum Iteration Interval (MII)
6855 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6856 -2 are instructions that can go in pipe0 or pipe1. */
6857static int
6858spu_sms_res_mii (struct ddg *g)
6859{
6860 int i;
6861 unsigned t[4] = {0, 0, 0, 0};
6862
6863 for (i = 0; i < g->num_nodes; i++)
6864 {
0af56f80 6865 rtx_insn *insn = g->nodes[i].insn;
d52fd16a 6866 int p = get_pipe (insn) + 2;
6867
1e944a0b 6868 gcc_assert (p >= 0);
6869 gcc_assert (p < 4);
d52fd16a 6870
6871 t[p]++;
6872 if (dump_file && INSN_P (insn))
6873 fprintf (dump_file, "i%d %s %d %d\n",
6874 INSN_UID (insn),
6875 insn_data[INSN_CODE(insn)].name,
6876 p, t[p]);
6877 }
6878 if (dump_file)
6879 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6880
6881 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6882}
6883
6884
5df189be 6885void
6886spu_init_expanders (void)
9d98604b 6887{
5df189be 6888 if (cfun)
9d98604b 6889 {
6890 rtx r0, r1;
6891 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6892 frame_pointer_needed is true. We don't know that until we're
6893 expanding the prologue. */
6894 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6895
6896 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6897 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6898 to be treated as aligned, so generate them here. */
6899 r0 = gen_reg_rtx (SImode);
6900 r1 = gen_reg_rtx (SImode);
6901 mark_reg_pointer (r0, 128);
6902 mark_reg_pointer (r1, 128);
6903 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6904 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6905 }
ea32e033 6906}
6907
3754d046 6908static machine_mode
ea32e033 6909spu_libgcc_cmp_return_mode (void)
6910{
6911
6912/* For SPU word mode is TI mode so it is better to use SImode
6913 for compare returns. */
6914 return SImode;
6915}
6916
3754d046 6917static machine_mode
ea32e033 6918spu_libgcc_shift_count_mode (void)
6919{
6920/* For SPU word mode is TI mode so it is better to use SImode
6921 for shift counts. */
6922 return SImode;
6923}
5a976006 6924
a08dfd55 6925/* Implement targetm.section_type_flags. */
6926static unsigned int
6927spu_section_type_flags (tree decl, const char *name, int reloc)
6928{
6929 /* .toe needs to have type @nobits. */
6930 if (strcmp (name, ".toe") == 0)
6931 return SECTION_BSS;
6cf5579e 6932 /* Don't load _ea into the current address space. */
6933 if (strcmp (name, "._ea") == 0)
6934 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6935 return default_section_type_flags (decl, name, reloc);
6936}
c2233b46 6937
6cf5579e 6938/* Implement targetm.select_section. */
6939static section *
6940spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6941{
6942 /* Variables and constants defined in the __ea address space
6943 go into a special section named "._ea". */
6944 if (TREE_TYPE (decl) != error_mark_node
6945 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6946 {
6947 /* We might get called with string constants, but get_named_section
6948 doesn't like them as they are not DECLs. Also, we need to set
6949 flags in that case. */
6950 if (!DECL_P (decl))
6951 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6952
6953 return get_named_section (decl, "._ea", reloc);
6954 }
6955
6956 return default_elf_select_section (decl, reloc, align);
6957}
6958
6959/* Implement targetm.unique_section. */
6960static void
6961spu_unique_section (tree decl, int reloc)
6962{
6963 /* We don't support unique section names in the __ea address
6964 space for now. */
6965 if (TREE_TYPE (decl) != error_mark_node
6966 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6967 return;
6968
6969 default_unique_section (decl, reloc);
6970}
6971
56c7bfc2 6972/* Generate a constant or register which contains 2^SCALE. We assume
6973 the result is valid for MODE. Currently, MODE must be V4SFmode and
6974 SCALE must be SImode. */
6975rtx
3754d046 6976spu_gen_exp2 (machine_mode mode, rtx scale)
56c7bfc2 6977{
6978 gcc_assert (mode == V4SFmode);
6979 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6980 if (GET_CODE (scale) != CONST_INT)
6981 {
6982 /* unsigned int exp = (127 + scale) << 23;
6983 __vector float m = (__vector float) spu_splats (exp); */
6984 rtx reg = force_reg (SImode, scale);
6985 rtx exp = gen_reg_rtx (SImode);
6986 rtx mul = gen_reg_rtx (mode);
6987 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6988 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6989 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6990 return mul;
6991 }
6992 else
6993 {
6994 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6995 unsigned char arr[16];
6996 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6997 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6998 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6999 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7000 return array_to_constant (mode, arr);
7001 }
7002}
7003
9d98604b 7004/* After reload, just change the convert into a move instruction
7005 or a dead instruction. */
7006void
7007spu_split_convert (rtx ops[])
7008{
7009 if (REGNO (ops[0]) == REGNO (ops[1]))
7010 emit_note (NOTE_INSN_DELETED);
7011 else
7012 {
7013 /* Use TImode always as this might help hard reg copyprop. */
7014 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7015 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7016 emit_insn (gen_move_insn (op0, op1));
7017 }
7018}
7019
b3878a6c 7020void
4cbad5bb 7021spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7022{
7023 fprintf (file, "# profile\n");
7024 fprintf (file, "brsl $75, _mcount\n");
7025}
7026
329c1e4e 7027/* Implement targetm.ref_may_alias_errno. */
7028static bool
7029spu_ref_may_alias_errno (ao_ref *ref)
7030{
7031 tree base = ao_ref_base (ref);
7032
7033 /* With SPU newlib, errno is defined as something like
7034 _impure_data._errno
7035 The default implementation of this target macro does not
7036 recognize such expressions, so special-code for it here. */
7037
7038 if (TREE_CODE (base) == VAR_DECL
7039 && !TREE_STATIC (base)
7040 && DECL_EXTERNAL (base)
7041 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7042 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7043 "_impure_data") == 0
7044 /* _errno is the first member of _impure_data. */
7045 && ref->offset == 0)
7046 return true;
7047
7048 return default_ref_may_alias_errno (ref);
7049}
7050
f17d2d13 7051/* Output thunk to FILE that implements a C++ virtual function call (with
7052 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7053 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7054 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7055 relative to the resulting this pointer. */
7056
7057static void
7058spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7059 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7060 tree function)
7061{
7062 rtx op[8];
7063
7064 /* Make sure unwind info is emitted for the thunk if needed. */
7065 final_start_function (emit_barrier (), file, 1);
7066
7067 /* Operand 0 is the target function. */
7068 op[0] = XEXP (DECL_RTL (function), 0);
7069
7070 /* Operand 1 is the 'this' pointer. */
7071 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7072 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7073 else
7074 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7075
7076 /* Operands 2/3 are the low/high halfwords of delta. */
7077 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7078 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7079
7080 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7081 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7082 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7083
7084 /* Operands 6/7 are temporary registers. */
7085 op[6] = gen_rtx_REG (Pmode, 79);
7086 op[7] = gen_rtx_REG (Pmode, 78);
7087
7088 /* Add DELTA to this pointer. */
7089 if (delta)
7090 {
7091 if (delta >= -0x200 && delta < 0x200)
7092 output_asm_insn ("ai\t%1,%1,%2", op);
7093 else if (delta >= -0x8000 && delta < 0x8000)
7094 {
7095 output_asm_insn ("il\t%6,%2", op);
7096 output_asm_insn ("a\t%1,%1,%6", op);
7097 }
7098 else
7099 {
7100 output_asm_insn ("ilhu\t%6,%3", op);
7101 output_asm_insn ("iohl\t%6,%2", op);
7102 output_asm_insn ("a\t%1,%1,%6", op);
7103 }
7104 }
7105
7106 /* Perform vcall adjustment. */
7107 if (vcall_offset)
7108 {
7109 output_asm_insn ("lqd\t%7,0(%1)", op);
7110 output_asm_insn ("rotqby\t%7,%7,%1", op);
7111
7112 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7113 output_asm_insn ("ai\t%7,%7,%4", op);
7114 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7115 {
7116 output_asm_insn ("il\t%6,%4", op);
7117 output_asm_insn ("a\t%7,%7,%6", op);
7118 }
7119 else
7120 {
7121 output_asm_insn ("ilhu\t%6,%5", op);
7122 output_asm_insn ("iohl\t%6,%4", op);
7123 output_asm_insn ("a\t%7,%7,%6", op);
7124 }
7125
7126 output_asm_insn ("lqd\t%6,0(%7)", op);
7127 output_asm_insn ("rotqby\t%6,%6,%7", op);
7128 output_asm_insn ("a\t%1,%1,%6", op);
7129 }
7130
7131 /* Jump to target. */
7132 output_asm_insn ("br\t%0", op);
7133
7134 final_end_function ();
7135}
7136
d5065e6e 7137/* Canonicalize a comparison from one we don't have to one we do have. */
7138static void
7139spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7140 bool op0_preserve_value)
7141{
7142 if (!op0_preserve_value
7143 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7144 {
7145 rtx tem = *op0;
7146 *op0 = *op1;
7147 *op1 = tem;
7148 *code = (int)swap_condition ((enum rtx_code)*code);
7149 }
7150}
3defb88e 7151\f
7152/* Table of machine attributes. */
7153static const struct attribute_spec spu_attribute_table[] =
7154{
7155 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7156 affects_type_identity } */
7157 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7158 false },
7159 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7160 false },
7161 { NULL, 0, 0, false, false, false, NULL, false }
7162};
7163
7164/* TARGET overrides. */
7165
7166#undef TARGET_ADDR_SPACE_POINTER_MODE
7167#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7168
7169#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7170#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7171
7172#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7173#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7174 spu_addr_space_legitimate_address_p
7175
7176#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7177#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7178
7179#undef TARGET_ADDR_SPACE_SUBSET_P
7180#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7181
7182#undef TARGET_ADDR_SPACE_CONVERT
7183#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7184
7185#undef TARGET_INIT_BUILTINS
7186#define TARGET_INIT_BUILTINS spu_init_builtins
7187#undef TARGET_BUILTIN_DECL
7188#define TARGET_BUILTIN_DECL spu_builtin_decl
7189
7190#undef TARGET_EXPAND_BUILTIN
7191#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7192
7193#undef TARGET_UNWIND_WORD_MODE
7194#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7195
7196#undef TARGET_LEGITIMIZE_ADDRESS
7197#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7198
7199/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7200 and .quad for the debugger. When it is known that the assembler is fixed,
7201 these can be removed. */
7202#undef TARGET_ASM_UNALIGNED_SI_OP
7203#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7204
7205#undef TARGET_ASM_ALIGNED_DI_OP
7206#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7207
7208/* The .8byte directive doesn't seem to work well for a 32 bit
7209 architecture. */
7210#undef TARGET_ASM_UNALIGNED_DI_OP
7211#define TARGET_ASM_UNALIGNED_DI_OP NULL
7212
7213#undef TARGET_RTX_COSTS
7214#define TARGET_RTX_COSTS spu_rtx_costs
7215
7216#undef TARGET_ADDRESS_COST
d9c5e5f4 7217#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3defb88e 7218
7219#undef TARGET_SCHED_ISSUE_RATE
7220#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7221
7222#undef TARGET_SCHED_INIT_GLOBAL
7223#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7224
7225#undef TARGET_SCHED_INIT
7226#define TARGET_SCHED_INIT spu_sched_init
7227
7228#undef TARGET_SCHED_VARIABLE_ISSUE
7229#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7230
7231#undef TARGET_SCHED_REORDER
7232#define TARGET_SCHED_REORDER spu_sched_reorder
7233
7234#undef TARGET_SCHED_REORDER2
7235#define TARGET_SCHED_REORDER2 spu_sched_reorder
7236
7237#undef TARGET_SCHED_ADJUST_COST
7238#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7239
7240#undef TARGET_ATTRIBUTE_TABLE
7241#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7242
7243#undef TARGET_ASM_INTEGER
7244#define TARGET_ASM_INTEGER spu_assemble_integer
7245
7246#undef TARGET_SCALAR_MODE_SUPPORTED_P
7247#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7248
7249#undef TARGET_VECTOR_MODE_SUPPORTED_P
7250#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7251
7252#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7253#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7254
7255#undef TARGET_ASM_GLOBALIZE_LABEL
7256#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7257
7258#undef TARGET_PASS_BY_REFERENCE
7259#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7260
7261#undef TARGET_FUNCTION_ARG
7262#define TARGET_FUNCTION_ARG spu_function_arg
7263
7264#undef TARGET_FUNCTION_ARG_ADVANCE
7265#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7266
7267#undef TARGET_MUST_PASS_IN_STACK
7268#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7269
7270#undef TARGET_BUILD_BUILTIN_VA_LIST
7271#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7272
7273#undef TARGET_EXPAND_BUILTIN_VA_START
7274#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7275
7276#undef TARGET_SETUP_INCOMING_VARARGS
7277#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7278
7279#undef TARGET_MACHINE_DEPENDENT_REORG
7280#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7281
7282#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7283#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7284
7285#undef TARGET_INIT_LIBFUNCS
7286#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7287
7288#undef TARGET_RETURN_IN_MEMORY
7289#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7290
7291#undef TARGET_ENCODE_SECTION_INFO
7292#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7293
7294#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7295#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7296
7297#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7298#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7299
7300#undef TARGET_VECTORIZE_INIT_COST
7301#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7302
7303#undef TARGET_VECTORIZE_ADD_STMT_COST
7304#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7305
7306#undef TARGET_VECTORIZE_FINISH_COST
7307#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7308
7309#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7310#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7311
7312#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7313#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7314
7315#undef TARGET_LIBGCC_CMP_RETURN_MODE
7316#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7317
7318#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7319#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7320
7321#undef TARGET_SCHED_SMS_RES_MII
7322#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7323
7324#undef TARGET_SECTION_TYPE_FLAGS
7325#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7326
7327#undef TARGET_ASM_SELECT_SECTION
7328#define TARGET_ASM_SELECT_SECTION spu_select_section
7329
7330#undef TARGET_ASM_UNIQUE_SECTION
7331#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7332
7333#undef TARGET_LEGITIMATE_ADDRESS_P
7334#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7335
7336#undef TARGET_LEGITIMATE_CONSTANT_P
7337#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7338
7339#undef TARGET_TRAMPOLINE_INIT
7340#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7341
08c6cbd2 7342#undef TARGET_WARN_FUNC_RETURN
7343#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7344
3defb88e 7345#undef TARGET_OPTION_OVERRIDE
7346#define TARGET_OPTION_OVERRIDE spu_option_override
7347
7348#undef TARGET_CONDITIONAL_REGISTER_USAGE
7349#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7350
7351#undef TARGET_REF_MAY_ALIAS_ERRNO
7352#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7353
7354#undef TARGET_ASM_OUTPUT_MI_THUNK
7355#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7356#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7357#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7358
7359/* Variable tracking should be run after all optimizations which
7360 change order of insns. It also needs a valid CFG. */
7361#undef TARGET_DELAY_VARTRACK
7362#define TARGET_DELAY_VARTRACK true
7363
d5065e6e 7364#undef TARGET_CANONICALIZE_COMPARISON
7365#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7366
5f35dd0e 7367#undef TARGET_CAN_USE_DOLOOP_P
7368#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7369
3defb88e 7370struct gcc_target targetm = TARGET_INITIALIZER;
7371
c2233b46 7372#include "gt-spu.h"