]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2015-06-17 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
d353bf18 1/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
b20a8bb4 30#include "alias.h"
31#include "symtab.h"
644459d0 32#include "tree.h"
b20a8bb4 33#include "fold-const.h"
9ed99284 34#include "stringpool.h"
35#include "stor-layout.h"
36#include "calls.h"
37#include "varasm.h"
d53441c8 38#include "function.h"
d53441c8 39#include "expmed.h"
40#include "dojump.h"
41#include "explow.h"
42#include "emit-rtl.h"
43#include "stmt.h"
644459d0 44#include "expr.h"
34517c64 45#include "insn-codes.h"
644459d0 46#include "optabs.h"
47#include "except.h"
644459d0 48#include "output.h"
94ea8568 49#include "predict.h"
50#include "dominance.h"
51#include "cfg.h"
52#include "cfgrtl.h"
53#include "cfganal.h"
54#include "lcm.h"
55#include "cfgbuild.h"
56#include "cfgcleanup.h"
644459d0 57#include "basic-block.h"
0b205f4c 58#include "diagnostic-core.h"
644459d0 59#include "tm_p.h"
60#include "target.h"
61#include "target-def.h"
62#include "langhooks.h"
63#include "reload.h"
644459d0 64#include "sched-int.h"
65#include "params.h"
bc61cadb 66#include "tree-ssa-alias.h"
67#include "internal-fn.h"
68#include "gimple-fold.h"
69#include "tree-eh.h"
70#include "gimple-expr.h"
e795d6e1 71#include "gimple.h"
a8783bee 72#include "gimplify.h"
644459d0 73#include "tm-constrs.h"
5a976006 74#include "sbitmap.h"
5a976006 75#include "df.h"
94ea8568 76#include "ddg.h"
77#include "timevar.h"
b9ed1410 78#include "dumpfile.h"
a7a0184d 79#include "cfgloop.h"
f7715905 80#include "builtins.h"
6f4e40cd 81#include "rtl-iter.h"
6352eedf 82
83/* Builtin types, data and prototypes. */
c2233b46 84
85enum spu_builtin_type_index
86{
87 SPU_BTI_END_OF_PARAMS,
88
89 /* We create new type nodes for these. */
90 SPU_BTI_V16QI,
91 SPU_BTI_V8HI,
92 SPU_BTI_V4SI,
93 SPU_BTI_V2DI,
94 SPU_BTI_V4SF,
95 SPU_BTI_V2DF,
96 SPU_BTI_UV16QI,
97 SPU_BTI_UV8HI,
98 SPU_BTI_UV4SI,
99 SPU_BTI_UV2DI,
100
101 /* A 16-byte type. (Implemented with V16QI_type_node) */
102 SPU_BTI_QUADWORD,
103
104 /* These all correspond to intSI_type_node */
105 SPU_BTI_7,
106 SPU_BTI_S7,
107 SPU_BTI_U7,
108 SPU_BTI_S10,
109 SPU_BTI_S10_4,
110 SPU_BTI_U14,
111 SPU_BTI_16,
112 SPU_BTI_S16,
113 SPU_BTI_S16_2,
114 SPU_BTI_U16,
115 SPU_BTI_U16_2,
116 SPU_BTI_U18,
117
118 /* These correspond to the standard types */
119 SPU_BTI_INTQI,
120 SPU_BTI_INTHI,
121 SPU_BTI_INTSI,
122 SPU_BTI_INTDI,
123
124 SPU_BTI_UINTQI,
125 SPU_BTI_UINTHI,
126 SPU_BTI_UINTSI,
127 SPU_BTI_UINTDI,
128
129 SPU_BTI_FLOAT,
130 SPU_BTI_DOUBLE,
131
132 SPU_BTI_VOID,
133 SPU_BTI_PTR,
134
135 SPU_BTI_MAX
136};
137
138#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
139#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
140#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
141#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
142#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
143#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
144#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
145#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
146#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
147#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
148
149static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
150
6352eedf 151struct spu_builtin_range
152{
153 int low, high;
154};
155
156static struct spu_builtin_range spu_builtin_range[] = {
157 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
158 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
159 {0ll, 0x7fll}, /* SPU_BTI_U7 */
160 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
161 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
162 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
163 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
164 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
165 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
166 {0ll, 0xffffll}, /* SPU_BTI_U16 */
167 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
168 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
169};
170
644459d0 171\f
172/* Target specific attribute specifications. */
173char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
174
175/* Prototypes and external defs. */
0af56f80 176static int get_pipe (rtx_insn *insn);
644459d0 177static int spu_naked_function_p (tree func);
644459d0 178static int mem_is_padded_component_ref (rtx x);
c7b91b14 179static void fix_range (const char *);
9d98604b 180static rtx spu_expand_load (rtx, rtx, rtx, int);
644459d0 181
5474166e 182/* Which instruction set architecture to use. */
183int spu_arch;
184/* Which cpu are we tuning for. */
185int spu_tune;
186
5a976006 187/* The hardware requires 8 insns between a hint and the branch it
188 effects. This variable describes how many rtl instructions the
189 compiler needs to see before inserting a hint, and then the compiler
190 will insert enough nops to make it at least 8 insns. The default is
191 for the compiler to allow up to 2 nops be emitted. The nops are
192 inserted in pairs, so we round down. */
193int spu_hint_dist = (8*4) - (2*4);
194
644459d0 195enum spu_immediate {
196 SPU_NONE,
197 SPU_IL,
198 SPU_ILA,
199 SPU_ILH,
200 SPU_ILHU,
201 SPU_ORI,
202 SPU_ORHI,
203 SPU_ORBI,
99369027 204 SPU_IOHL
644459d0 205};
dea01258 206enum immediate_class
207{
208 IC_POOL, /* constant pool */
209 IC_IL1, /* one il* instruction */
210 IC_IL2, /* both ilhu and iohl instructions */
211 IC_IL1s, /* one il* instruction */
212 IC_IL2s, /* both ilhu and iohl instructions */
213 IC_FSMBI, /* the fsmbi instruction */
214 IC_CPAT, /* one of the c*d instructions */
5df189be 215 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 216};
644459d0 217
218static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
219static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 220static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
221static enum immediate_class classify_immediate (rtx op,
3754d046 222 machine_mode mode);
644459d0 223
6cf5579e 224/* Pointer mode for __ea references. */
225#define EAmode (spu_ea_model != 32 ? DImode : SImode)
226
ef51d1e3 227\f
5eb28709 228/* Define the structure for the machine field in struct function. */
229struct GTY(()) machine_function
230{
231 /* Register to use for PIC accesses. */
232 rtx pic_reg;
233};
234
235/* How to allocate a 'struct machine_function'. */
236static struct machine_function *
237spu_init_machine_status (void)
238{
25a27413 239 return ggc_cleared_alloc<machine_function> ();
5eb28709 240}
241
4c834714 242/* Implement TARGET_OPTION_OVERRIDE. */
243static void
244spu_option_override (void)
644459d0 245{
5eb28709 246 /* Set up function hooks. */
247 init_machine_status = spu_init_machine_status;
248
14d408d9 249 /* Small loops will be unpeeled at -O3. For SPU it is more important
250 to keep code small by default. */
686e2769 251 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 252 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 253 global_options.x_param_values,
254 global_options_set.x_param_values);
14d408d9 255
644459d0 256 flag_omit_frame_pointer = 1;
257
5a976006 258 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 259 if (align_functions < 8)
260 align_functions = 8;
c7b91b14 261
5a976006 262 spu_hint_dist = 8*4 - spu_max_nops*4;
263 if (spu_hint_dist < 0)
264 spu_hint_dist = 0;
265
c7b91b14 266 if (spu_fixed_range_string)
267 fix_range (spu_fixed_range_string);
5474166e 268
269 /* Determine processor architectural level. */
270 if (spu_arch_string)
271 {
272 if (strcmp (&spu_arch_string[0], "cell") == 0)
273 spu_arch = PROCESSOR_CELL;
274 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
275 spu_arch = PROCESSOR_CELLEDP;
276 else
8e181c9d 277 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 278 }
279
280 /* Determine processor to tune for. */
281 if (spu_tune_string)
282 {
283 if (strcmp (&spu_tune_string[0], "cell") == 0)
284 spu_tune = PROCESSOR_CELL;
285 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
286 spu_tune = PROCESSOR_CELLEDP;
287 else
8e181c9d 288 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 289 }
98bbec1e 290
13684256 291 /* Change defaults according to the processor architecture. */
292 if (spu_arch == PROCESSOR_CELLEDP)
293 {
294 /* If no command line option has been otherwise specified, change
295 the default to -mno-safe-hints on celledp -- only the original
296 Cell/B.E. processors require this workaround. */
297 if (!(target_flags_explicit & MASK_SAFE_HINTS))
298 target_flags &= ~MASK_SAFE_HINTS;
299 }
300
98bbec1e 301 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 302}
303\f
304/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
305 struct attribute_spec.handler. */
306
644459d0 307/* True if MODE is valid for the target. By "valid", we mean able to
308 be manipulated in non-trivial ways. In particular, this means all
309 the arithmetic is supported. */
310static bool
3754d046 311spu_scalar_mode_supported_p (machine_mode mode)
644459d0 312{
313 switch (mode)
314 {
315 case QImode:
316 case HImode:
317 case SImode:
318 case SFmode:
319 case DImode:
320 case TImode:
321 case DFmode:
322 return true;
323
324 default:
325 return false;
326 }
327}
328
329/* Similarly for vector modes. "Supported" here is less strict. At
330 least some operations are supported; need to check optabs or builtins
331 for further details. */
332static bool
3754d046 333spu_vector_mode_supported_p (machine_mode mode)
644459d0 334{
335 switch (mode)
336 {
337 case V16QImode:
338 case V8HImode:
339 case V4SImode:
340 case V2DImode:
341 case V4SFmode:
342 case V2DFmode:
343 return true;
344
345 default:
346 return false;
347 }
348}
349
350/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
351 least significant bytes of the outer mode. This function returns
352 TRUE for the SUBREG's where this is correct. */
353int
354valid_subreg (rtx op)
355{
3754d046 356 machine_mode om = GET_MODE (op);
357 machine_mode im = GET_MODE (SUBREG_REG (op));
644459d0 358 return om != VOIDmode && im != VOIDmode
359 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 360 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
361 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 362}
363
364/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 365 and adjust the start offset. */
644459d0 366static rtx
367adjust_operand (rtx op, HOST_WIDE_INT * start)
368{
3754d046 369 machine_mode mode;
644459d0 370 int op_size;
38aca5eb 371 /* Strip any paradoxical SUBREG. */
372 if (GET_CODE (op) == SUBREG
373 && (GET_MODE_BITSIZE (GET_MODE (op))
374 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 375 {
376 if (start)
377 *start -=
378 GET_MODE_BITSIZE (GET_MODE (op)) -
379 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
380 op = SUBREG_REG (op);
381 }
382 /* If it is smaller than SI, assure a SUBREG */
383 op_size = GET_MODE_BITSIZE (GET_MODE (op));
384 if (op_size < 32)
385 {
386 if (start)
387 *start += 32 - op_size;
388 op_size = 32;
389 }
390 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
391 mode = mode_for_size (op_size, MODE_INT, 0);
392 if (mode != GET_MODE (op))
393 op = gen_rtx_SUBREG (mode, op, 0);
394 return op;
395}
396
397void
398spu_expand_extv (rtx ops[], int unsignedp)
399{
9d98604b 400 rtx dst = ops[0], src = ops[1];
644459d0 401 HOST_WIDE_INT width = INTVAL (ops[2]);
402 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 403 HOST_WIDE_INT align_mask;
404 rtx s0, s1, mask, r0;
644459d0 405
9d98604b 406 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 407
9d98604b 408 if (MEM_P (src))
644459d0 409 {
9d98604b 410 /* First, determine if we need 1 TImode load or 2. We need only 1
411 if the bits being extracted do not cross the alignment boundary
412 as determined by the MEM and its address. */
413
414 align_mask = -MEM_ALIGN (src);
415 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 416 {
9d98604b 417 /* Alignment is sufficient for 1 load. */
418 s0 = gen_reg_rtx (TImode);
419 r0 = spu_expand_load (s0, 0, src, start / 8);
420 start &= 7;
421 if (r0)
422 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 423 }
9d98604b 424 else
425 {
426 /* Need 2 loads. */
427 s0 = gen_reg_rtx (TImode);
428 s1 = gen_reg_rtx (TImode);
429 r0 = spu_expand_load (s0, s1, src, start / 8);
430 start &= 7;
431
432 gcc_assert (start + width <= 128);
433 if (r0)
434 {
435 rtx r1 = gen_reg_rtx (SImode);
436 mask = gen_reg_rtx (TImode);
437 emit_move_insn (mask, GEN_INT (-1));
438 emit_insn (gen_rotqby_ti (s0, s0, r0));
439 emit_insn (gen_rotqby_ti (s1, s1, r0));
440 if (GET_CODE (r0) == CONST_INT)
441 r1 = GEN_INT (INTVAL (r0) & 15);
442 else
443 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
444 emit_insn (gen_shlqby_ti (mask, mask, r1));
445 emit_insn (gen_selb (s0, s1, s0, mask));
446 }
447 }
448
449 }
450 else if (GET_CODE (src) == SUBREG)
451 {
452 rtx r = SUBREG_REG (src);
453 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
454 s0 = gen_reg_rtx (TImode);
455 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
d1f9b275 456 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
9d98604b 457 else
458 emit_move_insn (s0, src);
459 }
460 else
461 {
462 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
463 s0 = gen_reg_rtx (TImode);
464 emit_move_insn (s0, src);
644459d0 465 }
466
9d98604b 467 /* Now s0 is TImode and contains the bits to extract at start. */
468
469 if (start)
470 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
471
472 if (128 - width)
f5ff0b21 473 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 474
9d98604b 475 emit_move_insn (dst, s0);
644459d0 476}
477
478void
479spu_expand_insv (rtx ops[])
480{
481 HOST_WIDE_INT width = INTVAL (ops[1]);
482 HOST_WIDE_INT start = INTVAL (ops[2]);
483 HOST_WIDE_INT maskbits;
3754d046 484 machine_mode dst_mode;
644459d0 485 rtx dst = ops[0], src = ops[3];
4cbad5bb 486 int dst_size;
644459d0 487 rtx mask;
488 rtx shift_reg;
489 int shift;
490
491
492 if (GET_CODE (ops[0]) == MEM)
493 dst = gen_reg_rtx (TImode);
494 else
495 dst = adjust_operand (dst, &start);
496 dst_mode = GET_MODE (dst);
497 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
498
499 if (CONSTANT_P (src))
500 {
3754d046 501 machine_mode m =
644459d0 502 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
503 src = force_reg (m, convert_to_mode (m, src, 0));
504 }
505 src = adjust_operand (src, 0);
644459d0 506
507 mask = gen_reg_rtx (dst_mode);
508 shift_reg = gen_reg_rtx (dst_mode);
509 shift = dst_size - start - width;
510
511 /* It's not safe to use subreg here because the compiler assumes
512 that the SUBREG_REG is right justified in the SUBREG. */
513 convert_move (shift_reg, src, 1);
514
515 if (shift > 0)
516 {
517 switch (dst_mode)
518 {
519 case SImode:
520 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
521 break;
522 case DImode:
523 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
524 break;
525 case TImode:
526 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
527 break;
528 default:
529 abort ();
530 }
531 }
532 else if (shift < 0)
533 abort ();
534
535 switch (dst_size)
536 {
537 case 32:
538 maskbits = (-1ll << (32 - width - start));
539 if (start)
540 maskbits += (1ll << (32 - start));
541 emit_move_insn (mask, GEN_INT (maskbits));
542 break;
543 case 64:
544 maskbits = (-1ll << (64 - width - start));
545 if (start)
546 maskbits += (1ll << (64 - start));
547 emit_move_insn (mask, GEN_INT (maskbits));
548 break;
549 case 128:
550 {
551 unsigned char arr[16];
552 int i = start / 8;
553 memset (arr, 0, sizeof (arr));
554 arr[i] = 0xff >> (start & 7);
555 for (i++; i <= (start + width - 1) / 8; i++)
556 arr[i] = 0xff;
557 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
558 emit_move_insn (mask, array_to_constant (TImode, arr));
559 }
560 break;
561 default:
562 abort ();
563 }
564 if (GET_CODE (ops[0]) == MEM)
565 {
644459d0 566 rtx low = gen_reg_rtx (SImode);
644459d0 567 rtx rotl = gen_reg_rtx (SImode);
568 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 569 rtx addr;
570 rtx addr0;
571 rtx addr1;
644459d0 572 rtx mem;
573
9d98604b 574 addr = force_reg (Pmode, XEXP (ops[0], 0));
575 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 576 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
577 emit_insn (gen_negsi2 (rotl, low));
578 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
579 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 580 mem = change_address (ops[0], TImode, addr0);
644459d0 581 set_mem_alias_set (mem, 0);
582 emit_move_insn (dst, mem);
583 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 584 if (start + width > MEM_ALIGN (ops[0]))
585 {
586 rtx shl = gen_reg_rtx (SImode);
587 rtx mask1 = gen_reg_rtx (TImode);
588 rtx dst1 = gen_reg_rtx (TImode);
589 rtx mem1;
29c05e22 590 addr1 = plus_constant (Pmode, addr, 16);
9d98604b 591 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 592 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
593 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 594 mem1 = change_address (ops[0], TImode, addr1);
644459d0 595 set_mem_alias_set (mem1, 0);
596 emit_move_insn (dst1, mem1);
597 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
598 emit_move_insn (mem1, dst1);
599 }
9d98604b 600 emit_move_insn (mem, dst);
644459d0 601 }
602 else
71cd778d 603 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 604}
605
606
607int
608spu_expand_block_move (rtx ops[])
609{
610 HOST_WIDE_INT bytes, align, offset;
611 rtx src, dst, sreg, dreg, target;
612 int i;
613 if (GET_CODE (ops[2]) != CONST_INT
614 || GET_CODE (ops[3]) != CONST_INT
48eb4342 615 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 616 return 0;
617
618 bytes = INTVAL (ops[2]);
619 align = INTVAL (ops[3]);
620
621 if (bytes <= 0)
622 return 1;
623
624 dst = ops[0];
625 src = ops[1];
626
627 if (align == 16)
628 {
629 for (offset = 0; offset + 16 <= bytes; offset += 16)
630 {
631 dst = adjust_address (ops[0], V16QImode, offset);
632 src = adjust_address (ops[1], V16QImode, offset);
633 emit_move_insn (dst, src);
634 }
635 if (offset < bytes)
636 {
637 rtx mask;
638 unsigned char arr[16] = { 0 };
639 for (i = 0; i < bytes - offset; i++)
640 arr[i] = 0xff;
641 dst = adjust_address (ops[0], V16QImode, offset);
642 src = adjust_address (ops[1], V16QImode, offset);
643 mask = gen_reg_rtx (V16QImode);
644 sreg = gen_reg_rtx (V16QImode);
645 dreg = gen_reg_rtx (V16QImode);
646 target = gen_reg_rtx (V16QImode);
647 emit_move_insn (mask, array_to_constant (V16QImode, arr));
648 emit_move_insn (dreg, dst);
649 emit_move_insn (sreg, src);
650 emit_insn (gen_selb (target, dreg, sreg, mask));
651 emit_move_insn (dst, target);
652 }
653 return 1;
654 }
655 return 0;
656}
657
658enum spu_comp_code
659{ SPU_EQ, SPU_GT, SPU_GTU };
660
5474166e 661int spu_comp_icode[12][3] = {
662 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
663 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
664 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
665 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
666 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
667 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
668 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
669 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
670 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
671 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
672 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
673 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 674};
675
676/* Generate a compare for CODE. Return a brand-new rtx that represents
677 the result of the compare. GCC can figure this out too if we don't
678 provide all variations of compares, but GCC always wants to use
679 WORD_MODE, we can generate better code in most cases if we do it
680 ourselves. */
681void
74f4459c 682spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 683{
684 int reverse_compare = 0;
685 int reverse_test = 0;
5d70b918 686 rtx compare_result, eq_result;
687 rtx comp_rtx, eq_rtx;
3754d046 688 machine_mode comp_mode;
689 machine_mode op_mode;
b9c74b4d 690 enum spu_comp_code scode, eq_code;
691 enum insn_code ior_code;
74f4459c 692 enum rtx_code code = GET_CODE (cmp);
693 rtx op0 = XEXP (cmp, 0);
694 rtx op1 = XEXP (cmp, 1);
644459d0 695 int index;
5d70b918 696 int eq_test = 0;
644459d0 697
74f4459c 698 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 699 and so on, to keep the constant in operand 1. */
74f4459c 700 if (GET_CODE (op1) == CONST_INT)
644459d0 701 {
74f4459c 702 HOST_WIDE_INT val = INTVAL (op1) - 1;
703 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 704 switch (code)
705 {
706 case GE:
74f4459c 707 op1 = GEN_INT (val);
644459d0 708 code = GT;
709 break;
710 case LT:
74f4459c 711 op1 = GEN_INT (val);
644459d0 712 code = LE;
713 break;
714 case GEU:
74f4459c 715 op1 = GEN_INT (val);
644459d0 716 code = GTU;
717 break;
718 case LTU:
74f4459c 719 op1 = GEN_INT (val);
644459d0 720 code = LEU;
721 break;
722 default:
723 break;
724 }
725 }
726
686195ea 727 /* However, if we generate an integer result, performing a reverse test
728 would require an extra negation, so avoid that where possible. */
729 if (GET_CODE (op1) == CONST_INT && is_set == 1)
730 {
731 HOST_WIDE_INT val = INTVAL (op1) + 1;
732 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
733 switch (code)
734 {
735 case LE:
736 op1 = GEN_INT (val);
737 code = LT;
738 break;
739 case LEU:
740 op1 = GEN_INT (val);
741 code = LTU;
742 break;
743 default:
744 break;
745 }
746 }
747
5d70b918 748 comp_mode = SImode;
74f4459c 749 op_mode = GET_MODE (op0);
5d70b918 750
644459d0 751 switch (code)
752 {
753 case GE:
644459d0 754 scode = SPU_GT;
07027691 755 if (HONOR_NANS (op_mode))
5d70b918 756 {
757 reverse_compare = 0;
758 reverse_test = 0;
759 eq_test = 1;
760 eq_code = SPU_EQ;
761 }
762 else
763 {
764 reverse_compare = 1;
765 reverse_test = 1;
766 }
644459d0 767 break;
768 case LE:
644459d0 769 scode = SPU_GT;
07027691 770 if (HONOR_NANS (op_mode))
5d70b918 771 {
772 reverse_compare = 1;
773 reverse_test = 0;
774 eq_test = 1;
775 eq_code = SPU_EQ;
776 }
777 else
778 {
779 reverse_compare = 0;
780 reverse_test = 1;
781 }
644459d0 782 break;
783 case LT:
784 reverse_compare = 1;
785 reverse_test = 0;
786 scode = SPU_GT;
787 break;
788 case GEU:
789 reverse_compare = 1;
790 reverse_test = 1;
791 scode = SPU_GTU;
792 break;
793 case LEU:
794 reverse_compare = 0;
795 reverse_test = 1;
796 scode = SPU_GTU;
797 break;
798 case LTU:
799 reverse_compare = 1;
800 reverse_test = 0;
801 scode = SPU_GTU;
802 break;
803 case NE:
804 reverse_compare = 0;
805 reverse_test = 1;
806 scode = SPU_EQ;
807 break;
808
809 case EQ:
810 scode = SPU_EQ;
811 break;
812 case GT:
813 scode = SPU_GT;
814 break;
815 case GTU:
816 scode = SPU_GTU;
817 break;
818 default:
819 scode = SPU_EQ;
820 break;
821 }
822
644459d0 823 switch (op_mode)
824 {
825 case QImode:
826 index = 0;
827 comp_mode = QImode;
828 break;
829 case HImode:
830 index = 1;
831 comp_mode = HImode;
832 break;
833 case SImode:
834 index = 2;
835 break;
836 case DImode:
837 index = 3;
838 break;
839 case TImode:
840 index = 4;
841 break;
842 case SFmode:
843 index = 5;
844 break;
845 case DFmode:
846 index = 6;
847 break;
848 case V16QImode:
5474166e 849 index = 7;
850 comp_mode = op_mode;
851 break;
644459d0 852 case V8HImode:
5474166e 853 index = 8;
854 comp_mode = op_mode;
855 break;
644459d0 856 case V4SImode:
5474166e 857 index = 9;
858 comp_mode = op_mode;
859 break;
644459d0 860 case V4SFmode:
5474166e 861 index = 10;
862 comp_mode = V4SImode;
863 break;
644459d0 864 case V2DFmode:
5474166e 865 index = 11;
866 comp_mode = V2DImode;
644459d0 867 break;
5474166e 868 case V2DImode:
644459d0 869 default:
870 abort ();
871 }
872
74f4459c 873 if (GET_MODE (op1) == DFmode
07027691 874 && (scode != SPU_GT && scode != SPU_EQ))
875 abort ();
644459d0 876
74f4459c 877 if (is_set == 0 && op1 == const0_rtx
878 && (GET_MODE (op0) == SImode
686195ea 879 || GET_MODE (op0) == HImode
880 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
644459d0 881 {
882 /* Don't need to set a register with the result when we are
883 comparing against zero and branching. */
884 reverse_test = !reverse_test;
74f4459c 885 compare_result = op0;
644459d0 886 }
887 else
888 {
889 compare_result = gen_reg_rtx (comp_mode);
890
891 if (reverse_compare)
892 {
74f4459c 893 rtx t = op1;
894 op1 = op0;
895 op0 = t;
644459d0 896 }
897
898 if (spu_comp_icode[index][scode] == 0)
899 abort ();
900
901 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 902 (op0, op_mode))
903 op0 = force_reg (op_mode, op0);
644459d0 904 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 905 (op1, op_mode))
906 op1 = force_reg (op_mode, op1);
644459d0 907 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 908 op0, op1);
644459d0 909 if (comp_rtx == 0)
910 abort ();
911 emit_insn (comp_rtx);
912
5d70b918 913 if (eq_test)
914 {
915 eq_result = gen_reg_rtx (comp_mode);
916 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 917 op0, op1);
5d70b918 918 if (eq_rtx == 0)
919 abort ();
920 emit_insn (eq_rtx);
d6bf3b14 921 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 922 gcc_assert (ior_code != CODE_FOR_nothing);
923 emit_insn (GEN_FCN (ior_code)
924 (compare_result, compare_result, eq_result));
925 }
644459d0 926 }
927
928 if (is_set == 0)
929 {
930 rtx bcomp;
931 rtx loc_ref;
932
933 /* We don't have branch on QI compare insns, so we convert the
934 QI compare result to a HI result. */
935 if (comp_mode == QImode)
936 {
937 rtx old_res = compare_result;
938 compare_result = gen_reg_rtx (HImode);
939 comp_mode = HImode;
940 emit_insn (gen_extendqihi2 (compare_result, old_res));
941 }
942
943 if (reverse_test)
944 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
945 else
946 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
947
74f4459c 948 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
d1f9b275 949 emit_jump_insn (gen_rtx_SET (pc_rtx,
644459d0 950 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
951 loc_ref, pc_rtx)));
952 }
953 else if (is_set == 2)
954 {
74f4459c 955 rtx target = operands[0];
644459d0 956 int compare_size = GET_MODE_BITSIZE (comp_mode);
957 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
3754d046 958 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
644459d0 959 rtx select_mask;
960 rtx op_t = operands[2];
961 rtx op_f = operands[3];
962
963 /* The result of the comparison can be SI, HI or QI mode. Create a
964 mask based on that result. */
965 if (target_size > compare_size)
966 {
967 select_mask = gen_reg_rtx (mode);
968 emit_insn (gen_extend_compare (select_mask, compare_result));
969 }
970 else if (target_size < compare_size)
971 select_mask =
972 gen_rtx_SUBREG (mode, compare_result,
973 (compare_size - target_size) / BITS_PER_UNIT);
974 else if (comp_mode != mode)
975 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
976 else
977 select_mask = compare_result;
978
979 if (GET_MODE (target) != GET_MODE (op_t)
980 || GET_MODE (target) != GET_MODE (op_f))
981 abort ();
982
983 if (reverse_test)
984 emit_insn (gen_selb (target, op_t, op_f, select_mask));
985 else
986 emit_insn (gen_selb (target, op_f, op_t, select_mask));
987 }
988 else
989 {
74f4459c 990 rtx target = operands[0];
644459d0 991 if (reverse_test)
d1f9b275 992 emit_insn (gen_rtx_SET (compare_result,
644459d0 993 gen_rtx_NOT (comp_mode, compare_result)));
994 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
995 emit_insn (gen_extendhisi2 (target, compare_result));
996 else if (GET_MODE (target) == SImode
997 && GET_MODE (compare_result) == QImode)
998 emit_insn (gen_extend_compare (target, compare_result));
999 else
1000 emit_move_insn (target, compare_result);
1001 }
1002}
1003
1004HOST_WIDE_INT
1005const_double_to_hwint (rtx x)
1006{
1007 HOST_WIDE_INT val;
1008 REAL_VALUE_TYPE rv;
1009 if (GET_MODE (x) == SFmode)
1010 {
1011 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1012 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1013 }
1014 else if (GET_MODE (x) == DFmode)
1015 {
1016 long l[2];
1017 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1018 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1019 val = l[0];
1020 val = (val << 32) | (l[1] & 0xffffffff);
1021 }
1022 else
1023 abort ();
1024 return val;
1025}
1026
1027rtx
3754d046 1028hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
644459d0 1029{
1030 long tv[2];
1031 REAL_VALUE_TYPE rv;
1032 gcc_assert (mode == SFmode || mode == DFmode);
1033
1034 if (mode == SFmode)
1035 tv[0] = (v << 32) >> 32;
1036 else if (mode == DFmode)
1037 {
1038 tv[1] = (v << 32) >> 32;
1039 tv[0] = v >> 32;
1040 }
1041 real_from_target (&rv, tv, mode);
1042 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1043}
1044
1045void
1046print_operand_address (FILE * file, register rtx addr)
1047{
1048 rtx reg;
1049 rtx offset;
1050
e04cf423 1051 if (GET_CODE (addr) == AND
1052 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1053 && INTVAL (XEXP (addr, 1)) == -16)
1054 addr = XEXP (addr, 0);
1055
644459d0 1056 switch (GET_CODE (addr))
1057 {
1058 case REG:
1059 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1060 break;
1061
1062 case PLUS:
1063 reg = XEXP (addr, 0);
1064 offset = XEXP (addr, 1);
1065 if (GET_CODE (offset) == REG)
1066 {
1067 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1068 reg_names[REGNO (offset)]);
1069 }
1070 else if (GET_CODE (offset) == CONST_INT)
1071 {
1072 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1073 INTVAL (offset), reg_names[REGNO (reg)]);
1074 }
1075 else
1076 abort ();
1077 break;
1078
1079 case CONST:
1080 case LABEL_REF:
1081 case SYMBOL_REF:
1082 case CONST_INT:
1083 output_addr_const (file, addr);
1084 break;
1085
1086 default:
1087 debug_rtx (addr);
1088 abort ();
1089 }
1090}
1091
1092void
1093print_operand (FILE * file, rtx x, int code)
1094{
3754d046 1095 machine_mode mode = GET_MODE (x);
644459d0 1096 HOST_WIDE_INT val;
1097 unsigned char arr[16];
1098 int xcode = GET_CODE (x);
dea01258 1099 int i, info;
644459d0 1100 if (GET_MODE (x) == VOIDmode)
1101 switch (code)
1102 {
644459d0 1103 case 'L': /* 128 bits, signed */
1104 case 'm': /* 128 bits, signed */
1105 case 'T': /* 128 bits, signed */
1106 case 't': /* 128 bits, signed */
1107 mode = TImode;
1108 break;
644459d0 1109 case 'K': /* 64 bits, signed */
1110 case 'k': /* 64 bits, signed */
1111 case 'D': /* 64 bits, signed */
1112 case 'd': /* 64 bits, signed */
1113 mode = DImode;
1114 break;
644459d0 1115 case 'J': /* 32 bits, signed */
1116 case 'j': /* 32 bits, signed */
1117 case 's': /* 32 bits, signed */
1118 case 'S': /* 32 bits, signed */
1119 mode = SImode;
1120 break;
1121 }
1122 switch (code)
1123 {
1124
1125 case 'j': /* 32 bits, signed */
1126 case 'k': /* 64 bits, signed */
1127 case 'm': /* 128 bits, signed */
1128 if (xcode == CONST_INT
1129 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1130 {
1131 gcc_assert (logical_immediate_p (x, mode));
1132 constant_to_array (mode, x, arr);
1133 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1134 val = trunc_int_for_mode (val, SImode);
1135 switch (which_logical_immediate (val))
1136 {
1137 case SPU_ORI:
1138 break;
1139 case SPU_ORHI:
1140 fprintf (file, "h");
1141 break;
1142 case SPU_ORBI:
1143 fprintf (file, "b");
1144 break;
1145 default:
1146 gcc_unreachable();
1147 }
1148 }
1149 else
1150 gcc_unreachable();
1151 return;
1152
1153 case 'J': /* 32 bits, signed */
1154 case 'K': /* 64 bits, signed */
1155 case 'L': /* 128 bits, signed */
1156 if (xcode == CONST_INT
1157 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1158 {
1159 gcc_assert (logical_immediate_p (x, mode)
1160 || iohl_immediate_p (x, mode));
1161 constant_to_array (mode, x, arr);
1162 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1163 val = trunc_int_for_mode (val, SImode);
1164 switch (which_logical_immediate (val))
1165 {
1166 case SPU_ORI:
1167 case SPU_IOHL:
1168 break;
1169 case SPU_ORHI:
1170 val = trunc_int_for_mode (val, HImode);
1171 break;
1172 case SPU_ORBI:
1173 val = trunc_int_for_mode (val, QImode);
1174 break;
1175 default:
1176 gcc_unreachable();
1177 }
1178 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1179 }
1180 else
1181 gcc_unreachable();
1182 return;
1183
1184 case 't': /* 128 bits, signed */
1185 case 'd': /* 64 bits, signed */
1186 case 's': /* 32 bits, signed */
dea01258 1187 if (CONSTANT_P (x))
644459d0 1188 {
dea01258 1189 enum immediate_class c = classify_immediate (x, mode);
1190 switch (c)
1191 {
1192 case IC_IL1:
1193 constant_to_array (mode, x, arr);
1194 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1195 val = trunc_int_for_mode (val, SImode);
1196 switch (which_immediate_load (val))
1197 {
1198 case SPU_IL:
1199 break;
1200 case SPU_ILA:
1201 fprintf (file, "a");
1202 break;
1203 case SPU_ILH:
1204 fprintf (file, "h");
1205 break;
1206 case SPU_ILHU:
1207 fprintf (file, "hu");
1208 break;
1209 default:
1210 gcc_unreachable ();
1211 }
1212 break;
1213 case IC_CPAT:
1214 constant_to_array (mode, x, arr);
1215 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1216 if (info == 1)
1217 fprintf (file, "b");
1218 else if (info == 2)
1219 fprintf (file, "h");
1220 else if (info == 4)
1221 fprintf (file, "w");
1222 else if (info == 8)
1223 fprintf (file, "d");
1224 break;
1225 case IC_IL1s:
1226 if (xcode == CONST_VECTOR)
1227 {
1228 x = CONST_VECTOR_ELT (x, 0);
1229 xcode = GET_CODE (x);
1230 }
1231 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1232 fprintf (file, "a");
1233 else if (xcode == HIGH)
1234 fprintf (file, "hu");
1235 break;
1236 case IC_FSMBI:
5df189be 1237 case IC_FSMBI2:
dea01258 1238 case IC_IL2:
1239 case IC_IL2s:
1240 case IC_POOL:
1241 abort ();
1242 }
644459d0 1243 }
644459d0 1244 else
1245 gcc_unreachable ();
1246 return;
1247
1248 case 'T': /* 128 bits, signed */
1249 case 'D': /* 64 bits, signed */
1250 case 'S': /* 32 bits, signed */
dea01258 1251 if (CONSTANT_P (x))
644459d0 1252 {
dea01258 1253 enum immediate_class c = classify_immediate (x, mode);
1254 switch (c)
644459d0 1255 {
dea01258 1256 case IC_IL1:
1257 constant_to_array (mode, x, arr);
1258 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1259 val = trunc_int_for_mode (val, SImode);
1260 switch (which_immediate_load (val))
1261 {
1262 case SPU_IL:
1263 case SPU_ILA:
1264 break;
1265 case SPU_ILH:
1266 case SPU_ILHU:
1267 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1268 break;
1269 default:
1270 gcc_unreachable ();
1271 }
1272 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1273 break;
1274 case IC_FSMBI:
1275 constant_to_array (mode, x, arr);
1276 val = 0;
1277 for (i = 0; i < 16; i++)
1278 {
1279 val <<= 1;
1280 val |= arr[i] & 1;
1281 }
1282 print_operand (file, GEN_INT (val), 0);
1283 break;
1284 case IC_CPAT:
1285 constant_to_array (mode, x, arr);
1286 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1288 break;
dea01258 1289 case IC_IL1s:
dea01258 1290 if (xcode == HIGH)
5df189be 1291 x = XEXP (x, 0);
1292 if (GET_CODE (x) == CONST_VECTOR)
1293 x = CONST_VECTOR_ELT (x, 0);
1294 output_addr_const (file, x);
1295 if (xcode == HIGH)
1296 fprintf (file, "@h");
644459d0 1297 break;
dea01258 1298 case IC_IL2:
1299 case IC_IL2s:
5df189be 1300 case IC_FSMBI2:
dea01258 1301 case IC_POOL:
1302 abort ();
644459d0 1303 }
c8befdb9 1304 }
644459d0 1305 else
1306 gcc_unreachable ();
1307 return;
1308
644459d0 1309 case 'C':
1310 if (xcode == CONST_INT)
1311 {
1312 /* Only 4 least significant bits are relevant for generate
1313 control word instructions. */
1314 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1315 return;
1316 }
1317 break;
1318
1319 case 'M': /* print code for c*d */
1320 if (GET_CODE (x) == CONST_INT)
1321 switch (INTVAL (x))
1322 {
1323 case 1:
1324 fprintf (file, "b");
1325 break;
1326 case 2:
1327 fprintf (file, "h");
1328 break;
1329 case 4:
1330 fprintf (file, "w");
1331 break;
1332 case 8:
1333 fprintf (file, "d");
1334 break;
1335 default:
1336 gcc_unreachable();
1337 }
1338 else
1339 gcc_unreachable();
1340 return;
1341
1342 case 'N': /* Negate the operand */
1343 if (xcode == CONST_INT)
1344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1345 else if (xcode == CONST_VECTOR)
1346 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1347 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1348 return;
1349
1350 case 'I': /* enable/disable interrupts */
1351 if (xcode == CONST_INT)
1352 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1353 return;
1354
1355 case 'b': /* branch modifiers */
1356 if (xcode == REG)
1357 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1358 else if (COMPARISON_P (x))
1359 fprintf (file, "%s", xcode == NE ? "n" : "");
1360 return;
1361
1362 case 'i': /* indirect call */
1363 if (xcode == MEM)
1364 {
1365 if (GET_CODE (XEXP (x, 0)) == REG)
1366 /* Used in indirect function calls. */
1367 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1368 else
1369 output_address (XEXP (x, 0));
1370 }
1371 return;
1372
1373 case 'p': /* load/store */
1374 if (xcode == MEM)
1375 {
1376 x = XEXP (x, 0);
1377 xcode = GET_CODE (x);
1378 }
e04cf423 1379 if (xcode == AND)
1380 {
1381 x = XEXP (x, 0);
1382 xcode = GET_CODE (x);
1383 }
644459d0 1384 if (xcode == REG)
1385 fprintf (file, "d");
1386 else if (xcode == CONST_INT)
1387 fprintf (file, "a");
1388 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1389 fprintf (file, "r");
1390 else if (xcode == PLUS || xcode == LO_SUM)
1391 {
1392 if (GET_CODE (XEXP (x, 1)) == REG)
1393 fprintf (file, "x");
1394 else
1395 fprintf (file, "d");
1396 }
1397 return;
1398
5df189be 1399 case 'e':
1400 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1401 val &= 0x7;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1404
1405 case 'f':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val &= 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'g':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val &= 0x3f;
1414 output_addr_const (file, GEN_INT (val));
1415 return;
1416
1417 case 'h':
1418 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1419 val = (val >> 3) & 0x1f;
1420 output_addr_const (file, GEN_INT (val));
1421 return;
1422
1423 case 'E':
1424 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1425 val = -val;
1426 val &= 0x7;
1427 output_addr_const (file, GEN_INT (val));
1428 return;
1429
1430 case 'F':
1431 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1432 val = -val;
1433 val &= 0x1f;
1434 output_addr_const (file, GEN_INT (val));
1435 return;
1436
1437 case 'G':
1438 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1439 val = -val;
1440 val &= 0x3f;
1441 output_addr_const (file, GEN_INT (val));
1442 return;
1443
1444 case 'H':
1445 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1446 val = -(val & -8ll);
1447 val = (val >> 3) & 0x1f;
1448 output_addr_const (file, GEN_INT (val));
1449 return;
1450
56c7bfc2 1451 case 'v':
1452 case 'w':
1453 constant_to_array (mode, x, arr);
1454 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1455 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1456 return;
1457
644459d0 1458 case 0:
1459 if (xcode == REG)
1460 fprintf (file, "%s", reg_names[REGNO (x)]);
1461 else if (xcode == MEM)
1462 output_address (XEXP (x, 0));
1463 else if (xcode == CONST_VECTOR)
dea01258 1464 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1465 else
1466 output_addr_const (file, x);
1467 return;
1468
f6a0d06f 1469 /* unused letters
56c7bfc2 1470 o qr u yz
5df189be 1471 AB OPQR UVWXYZ */
644459d0 1472 default:
1473 output_operand_lossage ("invalid %%xn code");
1474 }
1475 gcc_unreachable ();
1476}
1477
644459d0 1478/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1479 caller saved register. For leaf functions it is more efficient to
1480 use a volatile register because we won't need to save and restore the
1481 pic register. This routine is only valid after register allocation
1482 is completed, so we can pick an unused register. */
1483static rtx
1484get_pic_reg (void)
1485{
644459d0 1486 if (!reload_completed && !reload_in_progress)
1487 abort ();
5eb28709 1488
1489 /* If we've already made the decision, we need to keep with it. Once we've
1490 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1491 return true since the register is now live; this should not cause us to
1492 "switch back" to using pic_offset_table_rtx. */
1493 if (!cfun->machine->pic_reg)
1494 {
d5bf7b64 1495 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
5eb28709 1496 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1497 else
1498 cfun->machine->pic_reg = pic_offset_table_rtx;
1499 }
1500
1501 return cfun->machine->pic_reg;
644459d0 1502}
1503
5df189be 1504/* Split constant addresses to handle cases that are too large.
1505 Add in the pic register when in PIC mode.
1506 Split immediates that require more than 1 instruction. */
dea01258 1507int
1508spu_split_immediate (rtx * ops)
c8befdb9 1509{
3754d046 1510 machine_mode mode = GET_MODE (ops[0]);
dea01258 1511 enum immediate_class c = classify_immediate (ops[1], mode);
1512
1513 switch (c)
c8befdb9 1514 {
dea01258 1515 case IC_IL2:
1516 {
1517 unsigned char arrhi[16];
1518 unsigned char arrlo[16];
98bbec1e 1519 rtx to, temp, hi, lo;
dea01258 1520 int i;
3754d046 1521 machine_mode imode = mode;
98bbec1e 1522 /* We need to do reals as ints because the constant used in the
1523 IOR might not be a legitimate real constant. */
1524 imode = int_mode_for_mode (mode);
dea01258 1525 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1526 if (imode != mode)
1527 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1528 else
1529 to = ops[0];
1530 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1531 for (i = 0; i < 16; i += 4)
1532 {
1533 arrlo[i + 2] = arrhi[i + 2];
1534 arrlo[i + 3] = arrhi[i + 3];
1535 arrlo[i + 0] = arrlo[i + 1] = 0;
1536 arrhi[i + 2] = arrhi[i + 3] = 0;
1537 }
98bbec1e 1538 hi = array_to_constant (imode, arrhi);
1539 lo = array_to_constant (imode, arrlo);
1540 emit_move_insn (temp, hi);
d1f9b275 1541 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1542 return 1;
1543 }
5df189be 1544 case IC_FSMBI2:
1545 {
1546 unsigned char arr_fsmbi[16];
1547 unsigned char arr_andbi[16];
1548 rtx to, reg_fsmbi, reg_and;
1549 int i;
3754d046 1550 machine_mode imode = mode;
5df189be 1551 /* We need to do reals as ints because the constant used in the
1552 * AND might not be a legitimate real constant. */
1553 imode = int_mode_for_mode (mode);
1554 constant_to_array (mode, ops[1], arr_fsmbi);
1555 if (imode != mode)
1556 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1557 else
1558 to = ops[0];
1559 for (i = 0; i < 16; i++)
1560 if (arr_fsmbi[i] != 0)
1561 {
1562 arr_andbi[0] = arr_fsmbi[i];
1563 arr_fsmbi[i] = 0xff;
1564 }
1565 for (i = 1; i < 16; i++)
1566 arr_andbi[i] = arr_andbi[0];
1567 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1568 reg_and = array_to_constant (imode, arr_andbi);
1569 emit_move_insn (to, reg_fsmbi);
d1f9b275 1570 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
5df189be 1571 return 1;
1572 }
dea01258 1573 case IC_POOL:
1574 if (reload_in_progress || reload_completed)
1575 {
1576 rtx mem = force_const_mem (mode, ops[1]);
1577 if (TARGET_LARGE_MEM)
1578 {
1579 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1580 emit_move_insn (addr, XEXP (mem, 0));
1581 mem = replace_equiv_address (mem, addr);
1582 }
1583 emit_move_insn (ops[0], mem);
1584 return 1;
1585 }
1586 break;
1587 case IC_IL1s:
1588 case IC_IL2s:
1589 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1590 {
1591 if (c == IC_IL2s)
1592 {
5df189be 1593 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1594 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1595 }
1596 else if (flag_pic)
1597 emit_insn (gen_pic (ops[0], ops[1]));
1598 if (flag_pic)
1599 {
1600 rtx pic_reg = get_pic_reg ();
1601 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
dea01258 1602 }
1603 return flag_pic || c == IC_IL2s;
1604 }
1605 break;
1606 case IC_IL1:
1607 case IC_FSMBI:
1608 case IC_CPAT:
1609 break;
c8befdb9 1610 }
dea01258 1611 return 0;
c8befdb9 1612}
1613
644459d0 1614/* SAVING is TRUE when we are generating the actual load and store
1615 instructions for REGNO. When determining the size of the stack
1616 needed for saving register we must allocate enough space for the
1617 worst case, because we don't always have the information early enough
1618 to not allocate it. But we can at least eliminate the actual loads
1619 and stores during the prologue/epilogue. */
1620static int
1621need_to_save_reg (int regno, int saving)
1622{
3072d30e 1623 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1624 return 1;
1625 if (flag_pic
1626 && regno == PIC_OFFSET_TABLE_REGNUM
5eb28709 1627 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
644459d0 1628 return 1;
1629 return 0;
1630}
1631
1632/* This function is only correct starting with local register
1633 allocation */
1634int
1635spu_saved_regs_size (void)
1636{
1637 int reg_save_size = 0;
1638 int regno;
1639
1640 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1641 if (need_to_save_reg (regno, 0))
1642 reg_save_size += 0x10;
1643 return reg_save_size;
1644}
1645
0af56f80 1646static rtx_insn *
644459d0 1647frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1648{
1649 rtx reg = gen_rtx_REG (V4SImode, regno);
1650 rtx mem =
1651 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1652 return emit_insn (gen_movv4si (mem, reg));
1653}
1654
0af56f80 1655static rtx_insn *
644459d0 1656frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1657{
1658 rtx reg = gen_rtx_REG (V4SImode, regno);
1659 rtx mem =
1660 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1661 return emit_insn (gen_movv4si (reg, mem));
1662}
1663
1664/* This happens after reload, so we need to expand it. */
0af56f80 1665static rtx_insn *
644459d0 1666frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1667{
0af56f80 1668 rtx_insn *insn;
644459d0 1669 if (satisfies_constraint_K (GEN_INT (imm)))
1670 {
1671 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1672 }
1673 else
1674 {
3072d30e 1675 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1676 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1677 if (REGNO (src) == REGNO (scratch))
1678 abort ();
1679 }
644459d0 1680 return insn;
1681}
1682
1683/* Return nonzero if this function is known to have a null epilogue. */
1684
1685int
1686direct_return (void)
1687{
1688 if (reload_completed)
1689 {
1690 if (cfun->static_chain_decl == 0
1691 && (spu_saved_regs_size ()
1692 + get_frame_size ()
abe32cce 1693 + crtl->outgoing_args_size
1694 + crtl->args.pretend_args_size == 0)
d5bf7b64 1695 && crtl->is_leaf)
644459d0 1696 return 1;
1697 }
1698 return 0;
1699}
1700
1701/*
1702 The stack frame looks like this:
1703 +-------------+
1704 | incoming |
a8e019fa 1705 | args |
1706 AP -> +-------------+
644459d0 1707 | $lr save |
1708 +-------------+
1709 prev SP | back chain |
1710 +-------------+
1711 | var args |
abe32cce 1712 | reg save | crtl->args.pretend_args_size bytes
644459d0 1713 +-------------+
1714 | ... |
1715 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1716 FP -> +-------------+
644459d0 1717 | ... |
a8e019fa 1718 | vars | get_frame_size() bytes
1719 HFP -> +-------------+
644459d0 1720 | ... |
1721 | outgoing |
abe32cce 1722 | args | crtl->outgoing_args_size bytes
644459d0 1723 +-------------+
1724 | $lr of next |
1725 | frame |
1726 +-------------+
a8e019fa 1727 | back chain |
1728 SP -> +-------------+
644459d0 1729
1730*/
1731void
1732spu_expand_prologue (void)
1733{
1734 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1735 HOST_WIDE_INT total_size;
1736 HOST_WIDE_INT saved_regs_size;
1737 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1738 rtx scratch_reg_0, scratch_reg_1;
0af56f80 1739 rtx_insn *insn;
1740 rtx real;
644459d0 1741
5eb28709 1742 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1743 cfun->machine->pic_reg = pic_offset_table_rtx;
644459d0 1744
1745 if (spu_naked_function_p (current_function_decl))
1746 return;
1747
1748 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1749 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1750
1751 saved_regs_size = spu_saved_regs_size ();
1752 total_size = size + saved_regs_size
abe32cce 1753 + crtl->outgoing_args_size
1754 + crtl->args.pretend_args_size;
644459d0 1755
d5bf7b64 1756 if (!crtl->is_leaf
18d50ae6 1757 || cfun->calls_alloca || total_size > 0)
644459d0 1758 total_size += STACK_POINTER_OFFSET;
1759
1760 /* Save this first because code after this might use the link
1761 register as a scratch register. */
d5bf7b64 1762 if (!crtl->is_leaf)
644459d0 1763 {
1764 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1765 RTX_FRAME_RELATED_P (insn) = 1;
1766 }
1767
1768 if (total_size > 0)
1769 {
abe32cce 1770 offset = -crtl->args.pretend_args_size;
644459d0 1771 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1772 if (need_to_save_reg (regno, 1))
1773 {
1774 offset -= 16;
1775 insn = frame_emit_store (regno, sp_reg, offset);
1776 RTX_FRAME_RELATED_P (insn) = 1;
1777 }
1778 }
1779
5eb28709 1780 if (flag_pic && cfun->machine->pic_reg)
644459d0 1781 {
5eb28709 1782 rtx pic_reg = cfun->machine->pic_reg;
644459d0 1783 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1784 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1785 }
1786
1787 if (total_size > 0)
1788 {
1789 if (flag_stack_check)
1790 {
d819917f 1791 /* We compare against total_size-1 because
644459d0 1792 ($sp >= total_size) <=> ($sp > total_size-1) */
1793 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1794 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1795 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1796 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1797 {
1798 emit_move_insn (scratch_v4si, size_v4si);
1799 size_v4si = scratch_v4si;
1800 }
1801 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1802 emit_insn (gen_vec_extractv4si
1803 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1804 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1805 }
1806
1807 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1808 the value of the previous $sp because we save it as the back
1809 chain. */
1810 if (total_size <= 2000)
1811 {
1812 /* In this case we save the back chain first. */
1813 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1814 insn =
1815 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1816 }
644459d0 1817 else
1818 {
1819 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1820 insn =
1821 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1822 }
1823 RTX_FRAME_RELATED_P (insn) = 1;
1824 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1825 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1826
1827 if (total_size > 2000)
1828 {
1829 /* Save the back chain ptr */
1830 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1831 }
1832
1833 if (frame_pointer_needed)
1834 {
1835 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1836 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1837 + crtl->outgoing_args_size;
644459d0 1838 /* Set the new frame_pointer */
d8dfeb55 1839 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1840 RTX_FRAME_RELATED_P (insn) = 1;
1841 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1842 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1843 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1844 }
1845 }
1846
8c0dd614 1847 if (flag_stack_usage_info)
a512540d 1848 current_function_static_stack_size = total_size;
644459d0 1849}
1850
1851void
1852spu_expand_epilogue (bool sibcall_p)
1853{
1854 int size = get_frame_size (), offset, regno;
1855 HOST_WIDE_INT saved_regs_size, total_size;
1856 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 1857 rtx scratch_reg_0;
644459d0 1858
644459d0 1859 if (spu_naked_function_p (current_function_decl))
1860 return;
1861
1862 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1863
1864 saved_regs_size = spu_saved_regs_size ();
1865 total_size = size + saved_regs_size
abe32cce 1866 + crtl->outgoing_args_size
1867 + crtl->args.pretend_args_size;
644459d0 1868
d5bf7b64 1869 if (!crtl->is_leaf
18d50ae6 1870 || cfun->calls_alloca || total_size > 0)
644459d0 1871 total_size += STACK_POINTER_OFFSET;
1872
1873 if (total_size > 0)
1874 {
18d50ae6 1875 if (cfun->calls_alloca)
644459d0 1876 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1877 else
1878 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1879
1880
1881 if (saved_regs_size > 0)
1882 {
abe32cce 1883 offset = -crtl->args.pretend_args_size;
644459d0 1884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1885 if (need_to_save_reg (regno, 1))
1886 {
1887 offset -= 0x10;
1888 frame_emit_load (regno, sp_reg, offset);
1889 }
1890 }
1891 }
1892
d5bf7b64 1893 if (!crtl->is_leaf)
644459d0 1894 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1895
1896 if (!sibcall_p)
1897 {
18b42941 1898 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 1899 emit_jump_insn (gen__return ());
644459d0 1900 }
644459d0 1901}
1902
1903rtx
1904spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1905{
1906 if (count != 0)
1907 return 0;
1908 /* This is inefficient because it ends up copying to a save-register
1909 which then gets saved even though $lr has already been saved. But
1910 it does generate better code for leaf functions and we don't need
1911 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1912 used for __builtin_return_address anyway, so maybe we don't care if
1913 it's inefficient. */
1914 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1915}
1916\f
1917
1918/* Given VAL, generate a constant appropriate for MODE.
1919 If MODE is a vector mode, every element will be VAL.
1920 For TImode, VAL will be zero extended to 128 bits. */
1921rtx
3754d046 1922spu_const (machine_mode mode, HOST_WIDE_INT val)
644459d0 1923{
1924 rtx inner;
1925 rtvec v;
1926 int units, i;
1927
1928 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1929 || GET_MODE_CLASS (mode) == MODE_FLOAT
1930 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1931 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1932
1933 if (GET_MODE_CLASS (mode) == MODE_INT)
1934 return immed_double_const (val, 0, mode);
1935
1936 /* val is the bit representation of the float */
1937 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1938 return hwint_to_const_double (mode, val);
1939
1940 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1941 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1942 else
1943 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1944
1945 units = GET_MODE_NUNITS (mode);
1946
1947 v = rtvec_alloc (units);
1948
1949 for (i = 0; i < units; ++i)
1950 RTVEC_ELT (v, i) = inner;
1951
1952 return gen_rtx_CONST_VECTOR (mode, v);
1953}
644459d0 1954
5474166e 1955/* Create a MODE vector constant from 4 ints. */
1956rtx
3754d046 1957spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
5474166e 1958{
1959 unsigned char arr[16];
1960 arr[0] = (a >> 24) & 0xff;
1961 arr[1] = (a >> 16) & 0xff;
1962 arr[2] = (a >> 8) & 0xff;
1963 arr[3] = (a >> 0) & 0xff;
1964 arr[4] = (b >> 24) & 0xff;
1965 arr[5] = (b >> 16) & 0xff;
1966 arr[6] = (b >> 8) & 0xff;
1967 arr[7] = (b >> 0) & 0xff;
1968 arr[8] = (c >> 24) & 0xff;
1969 arr[9] = (c >> 16) & 0xff;
1970 arr[10] = (c >> 8) & 0xff;
1971 arr[11] = (c >> 0) & 0xff;
1972 arr[12] = (d >> 24) & 0xff;
1973 arr[13] = (d >> 16) & 0xff;
1974 arr[14] = (d >> 8) & 0xff;
1975 arr[15] = (d >> 0) & 0xff;
1976 return array_to_constant(mode, arr);
1977}
5a976006 1978\f
1979/* branch hint stuff */
5474166e 1980
644459d0 1981/* An array of these is used to propagate hints to predecessor blocks. */
1982struct spu_bb_info
1983{
0af56f80 1984 rtx_insn *prop_jump; /* propagated from another block */
5a976006 1985 int bb_index; /* the original block. */
644459d0 1986};
5a976006 1987static struct spu_bb_info *spu_bb_info;
644459d0 1988
5a976006 1989#define STOP_HINT_P(INSN) \
aa90bb35 1990 (CALL_P(INSN) \
5a976006 1991 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1992 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1993
1994/* 1 when RTX is a hinted branch or its target. We keep track of
1995 what has been hinted so the safe-hint code can test it easily. */
1996#define HINTED_P(RTX) \
1997 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1998
1999/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2000#define SCHED_ON_EVEN_P(RTX) \
2001 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2002
2003/* Emit a nop for INSN such that the two will dual issue. This assumes
2004 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2005 We check for TImode to handle a MULTI1 insn which has dual issued its
b1135d9a 2006 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
5a976006 2007static void
0af56f80 2008emit_nop_for_insn (rtx_insn *insn)
644459d0 2009{
5a976006 2010 int p;
0af56f80 2011 rtx_insn *new_insn;
b1135d9a 2012
2013 /* We need to handle JUMP_TABLE_DATA separately. */
2014 if (JUMP_TABLE_DATA_P (insn))
2015 {
2016 new_insn = emit_insn_after (gen_lnop(), insn);
2017 recog_memoized (new_insn);
2018 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2019 return;
2020 }
2021
5a976006 2022 p = get_pipe (insn);
2023 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2024 new_insn = emit_insn_after (gen_lnop (), insn);
2025 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2026 {
5a976006 2027 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2028 PUT_MODE (new_insn, TImode);
2029 PUT_MODE (insn, VOIDmode);
2030 }
2031 else
2032 new_insn = emit_insn_after (gen_lnop (), insn);
2033 recog_memoized (new_insn);
d53c050c 2034 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
5a976006 2035}
2036
2037/* Insert nops in basic blocks to meet dual issue alignment
2038 requirements. Also make sure hbrp and hint instructions are at least
2039 one cycle apart, possibly inserting a nop. */
2040static void
2041pad_bb(void)
2042{
0af56f80 2043 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
5a976006 2044 int length;
2045 int addr;
2046
2047 /* This sets up INSN_ADDRESSES. */
2048 shorten_branches (get_insns ());
2049
2050 /* Keep track of length added by nops. */
2051 length = 0;
2052
2053 prev_insn = 0;
2054 insn = get_insns ();
2055 if (!active_insn_p (insn))
2056 insn = next_active_insn (insn);
2057 for (; insn; insn = next_insn)
2058 {
2059 next_insn = next_active_insn (insn);
2060 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2061 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2062 {
5a976006 2063 if (hbr_insn)
2064 {
2065 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2066 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2067 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2068 || (a1 - a0 == 4))
2069 {
2070 prev_insn = emit_insn_before (gen_lnop (), insn);
2071 PUT_MODE (prev_insn, GET_MODE (insn));
2072 PUT_MODE (insn, TImode);
d53c050c 2073 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
5a976006 2074 length += 4;
2075 }
2076 }
2077 hbr_insn = insn;
2078 }
4f8e39e2 2079 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
5a976006 2080 {
2081 if (GET_MODE (insn) == TImode)
2082 PUT_MODE (next_insn, TImode);
2083 insn = next_insn;
2084 next_insn = next_active_insn (insn);
2085 }
2086 addr = INSN_ADDRESSES (INSN_UID (insn));
2087 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2088 {
2089 if (((addr + length) & 7) != 0)
2090 {
2091 emit_nop_for_insn (prev_insn);
2092 length += 4;
2093 }
644459d0 2094 }
5a976006 2095 else if (GET_MODE (insn) == TImode
2096 && ((next_insn && GET_MODE (next_insn) != TImode)
2097 || get_attr_type (insn) == TYPE_MULTI0)
2098 && ((addr + length) & 7) != 0)
2099 {
2100 /* prev_insn will always be set because the first insn is
2101 always 8-byte aligned. */
2102 emit_nop_for_insn (prev_insn);
2103 length += 4;
2104 }
2105 prev_insn = insn;
644459d0 2106 }
644459d0 2107}
2108
5a976006 2109\f
2110/* Routines for branch hints. */
2111
644459d0 2112static void
0af56f80 2113spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
5a976006 2114 int distance, sbitmap blocks)
644459d0 2115{
5a976006 2116 rtx branch_label = 0;
0af56f80 2117 rtx_insn *hint;
2118 rtx_insn *insn;
c86d86ff 2119 rtx_jump_table_data *table;
644459d0 2120
2121 if (before == 0 || branch == 0 || target == 0)
2122 return;
2123
5a976006 2124 /* While scheduling we require hints to be no further than 600, so
2125 we need to enforce that here too */
644459d0 2126 if (distance > 600)
2127 return;
2128
5a976006 2129 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2130 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2131 before = NEXT_INSN (before);
644459d0 2132
2133 branch_label = gen_label_rtx ();
2134 LABEL_NUSES (branch_label)++;
2135 LABEL_PRESERVE_P (branch_label) = 1;
2136 insn = emit_label_before (branch_label, branch);
2137 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
08b7917c 2138 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
5a976006 2139
2140 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2141 recog_memoized (hint);
d53c050c 2142 INSN_LOCATION (hint) = INSN_LOCATION (branch);
5a976006 2143 HINTED_P (branch) = 1;
644459d0 2144
5a976006 2145 if (GET_CODE (target) == LABEL_REF)
2146 HINTED_P (XEXP (target, 0)) = 1;
2147 else if (tablejump_p (branch, 0, &table))
644459d0 2148 {
5a976006 2149 rtvec vec;
2150 int j;
2151 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2152 vec = XVEC (PATTERN (table), 0);
2153 else
2154 vec = XVEC (PATTERN (table), 1);
2155 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2156 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2157 }
5a976006 2158
2159 if (distance >= 588)
644459d0 2160 {
5a976006 2161 /* Make sure the hint isn't scheduled any earlier than this point,
2162 which could make it too far for the branch offest to fit */
2fbdf9ef 2163 insn = emit_insn_before (gen_blockage (), hint);
2164 recog_memoized (insn);
d53c050c 2165 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2166 }
2167 else if (distance <= 8 * 4)
2168 {
2169 /* To guarantee at least 8 insns between the hint and branch we
2170 insert nops. */
2171 int d;
2172 for (d = distance; d < 8 * 4; d += 4)
2173 {
2174 insn =
2175 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2176 recog_memoized (insn);
d53c050c 2177 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2178 }
2179
2180 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2181 insn = emit_insn_after (gen_blockage (), hint);
2182 recog_memoized (insn);
d53c050c 2183 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2184
2185 /* Make sure any nops inserted aren't scheduled after the call. */
2186 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2187 {
2188 insn = emit_insn_before (gen_blockage (), branch);
2189 recog_memoized (insn);
d53c050c 2190 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2fbdf9ef 2191 }
644459d0 2192 }
644459d0 2193}
2194
2195/* Returns 0 if we don't want a hint for this branch. Otherwise return
2196 the rtx for the branch target. */
2197static rtx
0af56f80 2198get_branch_target (rtx_insn *branch)
644459d0 2199{
aa90bb35 2200 if (JUMP_P (branch))
644459d0 2201 {
2202 rtx set, src;
2203
2204 /* Return statements */
2205 if (GET_CODE (PATTERN (branch)) == RETURN)
2206 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2207
fcc31b99 2208 /* ASM GOTOs. */
604157f6 2209 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2210 return NULL;
2211
644459d0 2212 set = single_set (branch);
2213 src = SET_SRC (set);
2214 if (GET_CODE (SET_DEST (set)) != PC)
2215 abort ();
2216
2217 if (GET_CODE (src) == IF_THEN_ELSE)
2218 {
2219 rtx lab = 0;
2220 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2221 if (note)
2222 {
2223 /* If the more probable case is not a fall through, then
2224 try a branch hint. */
9eb946de 2225 int prob = XINT (note, 0);
644459d0 2226 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2227 && GET_CODE (XEXP (src, 1)) != PC)
2228 lab = XEXP (src, 1);
2229 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2230 && GET_CODE (XEXP (src, 2)) != PC)
2231 lab = XEXP (src, 2);
2232 }
2233 if (lab)
2234 {
2235 if (GET_CODE (lab) == RETURN)
2236 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2237 return lab;
2238 }
2239 return 0;
2240 }
2241
2242 return src;
2243 }
aa90bb35 2244 else if (CALL_P (branch))
644459d0 2245 {
2246 rtx call;
2247 /* All of our call patterns are in a PARALLEL and the CALL is
2248 the first pattern in the PARALLEL. */
2249 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2250 abort ();
2251 call = XVECEXP (PATTERN (branch), 0, 0);
2252 if (GET_CODE (call) == SET)
2253 call = SET_SRC (call);
2254 if (GET_CODE (call) != CALL)
2255 abort ();
2256 return XEXP (XEXP (call, 0), 0);
2257 }
2258 return 0;
2259}
2260
5a976006 2261/* The special $hbr register is used to prevent the insn scheduler from
2262 moving hbr insns across instructions which invalidate them. It
2263 should only be used in a clobber, and this function searches for
2264 insns which clobber it. */
2265static bool
0af56f80 2266insn_clobbers_hbr (rtx_insn *insn)
5a976006 2267{
2268 if (INSN_P (insn)
2269 && GET_CODE (PATTERN (insn)) == PARALLEL)
2270 {
2271 rtx parallel = PATTERN (insn);
2272 rtx clobber;
2273 int j;
2274 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2275 {
2276 clobber = XVECEXP (parallel, 0, j);
2277 if (GET_CODE (clobber) == CLOBBER
2278 && GET_CODE (XEXP (clobber, 0)) == REG
2279 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2280 return 1;
2281 }
2282 }
2283 return 0;
2284}
2285
2286/* Search up to 32 insns starting at FIRST:
2287 - at any kind of hinted branch, just return
2288 - at any unconditional branch in the first 15 insns, just return
2289 - at a call or indirect branch, after the first 15 insns, force it to
2290 an even address and return
2291 - at any unconditional branch, after the first 15 insns, force it to
2292 an even address.
2293 At then end of the search, insert an hbrp within 4 insns of FIRST,
2294 and an hbrp within 16 instructions of FIRST.
2295 */
644459d0 2296static void
0af56f80 2297insert_hbrp_for_ilb_runout (rtx_insn *first)
644459d0 2298{
0af56f80 2299 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
5a976006 2300 int addr = 0, length, first_addr = -1;
2301 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2302 int insert_lnop_after = 0;
2303 for (insn = first; insn; insn = NEXT_INSN (insn))
2304 if (INSN_P (insn))
2305 {
2306 if (first_addr == -1)
2307 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2308 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2309 length = get_attr_length (insn);
2310
2311 if (before_4 == 0 && addr + length >= 4 * 4)
2312 before_4 = insn;
2313 /* We test for 14 instructions because the first hbrp will add
2314 up to 2 instructions. */
2315 if (before_16 == 0 && addr + length >= 14 * 4)
2316 before_16 = insn;
2317
2318 if (INSN_CODE (insn) == CODE_FOR_hbr)
2319 {
2320 /* Make sure an hbrp is at least 2 cycles away from a hint.
2321 Insert an lnop after the hbrp when necessary. */
2322 if (before_4 == 0 && addr > 0)
2323 {
2324 before_4 = insn;
2325 insert_lnop_after |= 1;
2326 }
2327 else if (before_4 && addr <= 4 * 4)
2328 insert_lnop_after |= 1;
2329 if (before_16 == 0 && addr > 10 * 4)
2330 {
2331 before_16 = insn;
2332 insert_lnop_after |= 2;
2333 }
2334 else if (before_16 && addr <= 14 * 4)
2335 insert_lnop_after |= 2;
2336 }
644459d0 2337
5a976006 2338 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2339 {
2340 if (addr < hbrp_addr0)
2341 hbrp_addr0 = addr;
2342 else if (addr < hbrp_addr1)
2343 hbrp_addr1 = addr;
2344 }
644459d0 2345
5a976006 2346 if (CALL_P (insn) || JUMP_P (insn))
2347 {
2348 if (HINTED_P (insn))
2349 return;
2350
2351 /* Any branch after the first 15 insns should be on an even
2352 address to avoid a special case branch. There might be
2353 some nops and/or hbrps inserted, so we test after 10
2354 insns. */
2355 if (addr > 10 * 4)
2356 SCHED_ON_EVEN_P (insn) = 1;
2357 }
644459d0 2358
5a976006 2359 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2360 return;
2361
2362
2363 if (addr + length >= 32 * 4)
644459d0 2364 {
5a976006 2365 gcc_assert (before_4 && before_16);
2366 if (hbrp_addr0 > 4 * 4)
644459d0 2367 {
5a976006 2368 insn =
2369 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2370 recog_memoized (insn);
d53c050c 2371 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2372 INSN_ADDRESSES_NEW (insn,
2373 INSN_ADDRESSES (INSN_UID (before_4)));
2374 PUT_MODE (insn, GET_MODE (before_4));
2375 PUT_MODE (before_4, TImode);
2376 if (insert_lnop_after & 1)
644459d0 2377 {
5a976006 2378 insn = emit_insn_before (gen_lnop (), before_4);
2379 recog_memoized (insn);
d53c050c 2380 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2381 INSN_ADDRESSES_NEW (insn,
2382 INSN_ADDRESSES (INSN_UID (before_4)));
2383 PUT_MODE (insn, TImode);
644459d0 2384 }
644459d0 2385 }
5a976006 2386 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2387 && hbrp_addr1 > 16 * 4)
644459d0 2388 {
5a976006 2389 insn =
2390 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2391 recog_memoized (insn);
d53c050c 2392 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2393 INSN_ADDRESSES_NEW (insn,
2394 INSN_ADDRESSES (INSN_UID (before_16)));
2395 PUT_MODE (insn, GET_MODE (before_16));
2396 PUT_MODE (before_16, TImode);
2397 if (insert_lnop_after & 2)
644459d0 2398 {
5a976006 2399 insn = emit_insn_before (gen_lnop (), before_16);
2400 recog_memoized (insn);
d53c050c 2401 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2402 INSN_ADDRESSES_NEW (insn,
2403 INSN_ADDRESSES (INSN_UID
2404 (before_16)));
2405 PUT_MODE (insn, TImode);
644459d0 2406 }
2407 }
5a976006 2408 return;
644459d0 2409 }
644459d0 2410 }
5a976006 2411 else if (BARRIER_P (insn))
2412 return;
644459d0 2413
644459d0 2414}
5a976006 2415
2416/* The SPU might hang when it executes 48 inline instructions after a
2417 hinted branch jumps to its hinted target. The beginning of a
851d9296 2418 function and the return from a call might have been hinted, and
2419 must be handled as well. To prevent a hang we insert 2 hbrps. The
2420 first should be within 6 insns of the branch target. The second
2421 should be within 22 insns of the branch target. When determining
2422 if hbrps are necessary, we look for only 32 inline instructions,
2423 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2424 when inserting new hbrps, we insert them within 4 and 16 insns of
2425 the target. */
644459d0 2426static void
5a976006 2427insert_hbrp (void)
644459d0 2428{
0af56f80 2429 rtx_insn *insn;
5a976006 2430 if (TARGET_SAFE_HINTS)
644459d0 2431 {
5a976006 2432 shorten_branches (get_insns ());
2433 /* Insert hbrp at beginning of function */
2434 insn = next_active_insn (get_insns ());
2435 if (insn)
2436 insert_hbrp_for_ilb_runout (insn);
2437 /* Insert hbrp after hinted targets. */
2438 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2439 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2440 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2441 }
644459d0 2442}
2443
5a976006 2444static int in_spu_reorg;
2445
8a42230a 2446static void
2447spu_var_tracking (void)
2448{
2449 if (flag_var_tracking)
2450 {
2451 df_analyze ();
2452 timevar_push (TV_VAR_TRACKING);
2453 variable_tracking_main ();
2454 timevar_pop (TV_VAR_TRACKING);
2455 df_finish_pass (false);
2456 }
2457}
2458
5a976006 2459/* Insert branch hints. There are no branch optimizations after this
2460 pass, so it's safe to set our branch hints now. */
644459d0 2461static void
5a976006 2462spu_machine_dependent_reorg (void)
644459d0 2463{
5a976006 2464 sbitmap blocks;
2465 basic_block bb;
0af56f80 2466 rtx_insn *branch, *insn;
5a976006 2467 rtx branch_target = 0;
2468 int branch_addr = 0, insn_addr, required_dist = 0;
2469 int i;
2470 unsigned int j;
644459d0 2471
5a976006 2472 if (!TARGET_BRANCH_HINTS || optimize == 0)
2473 {
2474 /* We still do it for unoptimized code because an external
2475 function might have hinted a call or return. */
a54ca889 2476 compute_bb_for_insn ();
5a976006 2477 insert_hbrp ();
2478 pad_bb ();
8a42230a 2479 spu_var_tracking ();
a54ca889 2480 free_bb_for_insn ();
5a976006 2481 return;
2482 }
644459d0 2483
fe672ac0 2484 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
53c5d9d4 2485 bitmap_clear (blocks);
644459d0 2486
5a976006 2487 in_spu_reorg = 1;
2488 compute_bb_for_insn ();
2489
a7a0184d 2490 /* (Re-)discover loops so that bb->loop_father can be used
2491 in the analysis below. */
2492 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2493
5a976006 2494 compact_blocks ();
2495
2496 spu_bb_info =
a28770e1 2497 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
5a976006 2498 sizeof (struct spu_bb_info));
2499
2500 /* We need exact insn addresses and lengths. */
2501 shorten_branches (get_insns ());
2502
a28770e1 2503 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
644459d0 2504 {
f5a6b05f 2505 bb = BASIC_BLOCK_FOR_FN (cfun, i);
5a976006 2506 branch = 0;
2507 if (spu_bb_info[i].prop_jump)
644459d0 2508 {
5a976006 2509 branch = spu_bb_info[i].prop_jump;
2510 branch_target = get_branch_target (branch);
2511 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2512 required_dist = spu_hint_dist;
2513 }
2514 /* Search from end of a block to beginning. In this loop, find
2515 jumps which need a branch and emit them only when:
2516 - it's an indirect branch and we're at the insn which sets
2517 the register
2518 - we're at an insn that will invalidate the hint. e.g., a
2519 call, another hint insn, inline asm that clobbers $hbr, and
2520 some inlined operations (divmodsi4). Don't consider jumps
2521 because they are only at the end of a block and are
2522 considered when we are deciding whether to propagate
2523 - we're getting too far away from the branch. The hbr insns
2524 only have a signed 10 bit offset
2525 We go back as far as possible so the branch will be considered
2526 for propagation when we get to the beginning of the block. */
2527 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2528 {
2529 if (INSN_P (insn))
2530 {
2531 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2532 if (branch
2533 && ((GET_CODE (branch_target) == REG
2534 && set_of (branch_target, insn) != NULL_RTX)
2535 || insn_clobbers_hbr (insn)
2536 || branch_addr - insn_addr > 600))
2537 {
0af56f80 2538 rtx_insn *next = NEXT_INSN (insn);
5a976006 2539 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2540 if (insn != BB_END (bb)
2541 && branch_addr - next_addr >= required_dist)
2542 {
2543 if (dump_file)
2544 fprintf (dump_file,
2545 "hint for %i in block %i before %i\n",
2546 INSN_UID (branch), bb->index,
2547 INSN_UID (next));
2548 spu_emit_branch_hint (next, branch, branch_target,
2549 branch_addr - next_addr, blocks);
2550 }
2551 branch = 0;
2552 }
2553
2554 /* JUMP_P will only be true at the end of a block. When
2555 branch is already set it means we've previously decided
2556 to propagate a hint for that branch into this block. */
2557 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2558 {
2559 branch = 0;
2560 if ((branch_target = get_branch_target (insn)))
2561 {
2562 branch = insn;
2563 branch_addr = insn_addr;
2564 required_dist = spu_hint_dist;
2565 }
2566 }
2567 }
2568 if (insn == BB_HEAD (bb))
2569 break;
2570 }
2571
2572 if (branch)
2573 {
2574 /* If we haven't emitted a hint for this branch yet, it might
2575 be profitable to emit it in one of the predecessor blocks,
2576 especially for loops. */
0af56f80 2577 rtx_insn *bbend;
5a976006 2578 basic_block prev = 0, prop = 0, prev2 = 0;
2579 int loop_exit = 0, simple_loop = 0;
2580 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2581
2582 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2583 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2584 prev = EDGE_PRED (bb, j)->src;
2585 else
2586 prev2 = EDGE_PRED (bb, j)->src;
2587
2588 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2589 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2590 loop_exit = 1;
2591 else if (EDGE_SUCC (bb, j)->dest == bb)
2592 simple_loop = 1;
2593
2594 /* If this branch is a loop exit then propagate to previous
2595 fallthru block. This catches the cases when it is a simple
2596 loop or when there is an initial branch into the loop. */
2597 if (prev && (loop_exit || simple_loop)
a7a0184d 2598 && bb_loop_depth (prev) <= bb_loop_depth (bb))
5a976006 2599 prop = prev;
2600
2601 /* If there is only one adjacent predecessor. Don't propagate
a7a0184d 2602 outside this loop. */
5a976006 2603 else if (prev && single_pred_p (bb)
a7a0184d 2604 && prev->loop_father == bb->loop_father)
5a976006 2605 prop = prev;
2606
2607 /* If this is the JOIN block of a simple IF-THEN then
9d75589a 2608 propagate the hint to the HEADER block. */
5a976006 2609 else if (prev && prev2
2610 && EDGE_COUNT (bb->preds) == 2
2611 && EDGE_COUNT (prev->preds) == 1
2612 && EDGE_PRED (prev, 0)->src == prev2
a7a0184d 2613 && prev2->loop_father == bb->loop_father
5a976006 2614 && GET_CODE (branch_target) != REG)
2615 prop = prev;
2616
2617 /* Don't propagate when:
2618 - this is a simple loop and the hint would be too far
2619 - this is not a simple loop and there are 16 insns in
2620 this block already
2621 - the predecessor block ends in a branch that will be
2622 hinted
2623 - the predecessor block ends in an insn that invalidates
2624 the hint */
2625 if (prop
2626 && prop->index >= 0
2627 && (bbend = BB_END (prop))
2628 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2629 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2630 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2631 {
2632 if (dump_file)
2633 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2634 "for %i (loop_exit %i simple_loop %i dist %i)\n",
a7a0184d 2635 bb->index, prop->index, bb_loop_depth (bb),
5a976006 2636 INSN_UID (branch), loop_exit, simple_loop,
2637 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2638
2639 spu_bb_info[prop->index].prop_jump = branch;
2640 spu_bb_info[prop->index].bb_index = i;
2641 }
2642 else if (branch_addr - next_addr >= required_dist)
2643 {
2644 if (dump_file)
2645 fprintf (dump_file, "hint for %i in block %i before %i\n",
2646 INSN_UID (branch), bb->index,
2647 INSN_UID (NEXT_INSN (insn)));
2648 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2649 branch_addr - next_addr, blocks);
2650 }
2651 branch = 0;
644459d0 2652 }
644459d0 2653 }
5a976006 2654 free (spu_bb_info);
644459d0 2655
53c5d9d4 2656 if (!bitmap_empty_p (blocks))
5a976006 2657 find_many_sub_basic_blocks (blocks);
2658
2659 /* We have to schedule to make sure alignment is ok. */
fc00614f 2660 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
5a976006 2661
2662 /* The hints need to be scheduled, so call it again. */
2663 schedule_insns ();
2fbdf9ef 2664 df_finish_pass (true);
5a976006 2665
2666 insert_hbrp ();
2667
2668 pad_bb ();
2669
8f1d58ad 2670 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2671 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2672 {
2673 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2674 between its branch label and the branch . We don't move the
2675 label because GCC expects it at the beginning of the block. */
2676 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2677 rtx label_ref = XVECEXP (unspec, 0, 0);
4cd001d5 2678 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2679 rtx_insn *branch;
8f1d58ad 2680 int offset = 0;
2681 for (branch = NEXT_INSN (label);
2682 !JUMP_P (branch) && !CALL_P (branch);
2683 branch = NEXT_INSN (branch))
2684 if (NONJUMP_INSN_P (branch))
2685 offset += get_attr_length (branch);
2686 if (offset > 0)
29c05e22 2687 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
8f1d58ad 2688 }
5a976006 2689
8a42230a 2690 spu_var_tracking ();
5a976006 2691
a7a0184d 2692 loop_optimizer_finalize ();
2693
5a976006 2694 free_bb_for_insn ();
2695
2696 in_spu_reorg = 0;
644459d0 2697}
2698\f
2699
2700/* Insn scheduling routines, primarily for dual issue. */
2701static int
2702spu_sched_issue_rate (void)
2703{
2704 return 2;
2705}
2706
2707static int
0af56f80 2708uses_ls_unit(rtx_insn *insn)
644459d0 2709{
5a976006 2710 rtx set = single_set (insn);
2711 if (set != 0
2712 && (GET_CODE (SET_DEST (set)) == MEM
2713 || GET_CODE (SET_SRC (set)) == MEM))
2714 return 1;
2715 return 0;
644459d0 2716}
2717
2718static int
0af56f80 2719get_pipe (rtx_insn *insn)
644459d0 2720{
2721 enum attr_type t;
2722 /* Handle inline asm */
2723 if (INSN_CODE (insn) == -1)
2724 return -1;
2725 t = get_attr_type (insn);
2726 switch (t)
2727 {
2728 case TYPE_CONVERT:
2729 return -2;
2730 case TYPE_MULTI0:
2731 return -1;
2732
2733 case TYPE_FX2:
2734 case TYPE_FX3:
2735 case TYPE_SPR:
2736 case TYPE_NOP:
2737 case TYPE_FXB:
2738 case TYPE_FPD:
2739 case TYPE_FP6:
2740 case TYPE_FP7:
644459d0 2741 return 0;
2742
2743 case TYPE_LNOP:
2744 case TYPE_SHUF:
2745 case TYPE_LOAD:
2746 case TYPE_STORE:
2747 case TYPE_BR:
2748 case TYPE_MULTI1:
2749 case TYPE_HBR:
5a976006 2750 case TYPE_IPREFETCH:
644459d0 2751 return 1;
2752 default:
2753 abort ();
2754 }
2755}
2756
5a976006 2757
2758/* haifa-sched.c has a static variable that keeps track of the current
2759 cycle. It is passed to spu_sched_reorder, and we record it here for
2760 use by spu_sched_variable_issue. It won't be accurate if the
2761 scheduler updates it's clock_var between the two calls. */
2762static int clock_var;
2763
2764/* This is used to keep track of insn alignment. Set to 0 at the
2765 beginning of each block and increased by the "length" attr of each
2766 insn scheduled. */
2767static int spu_sched_length;
2768
2769/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2770 ready list appropriately in spu_sched_reorder(). */
2771static int pipe0_clock;
2772static int pipe1_clock;
2773
2774static int prev_clock_var;
2775
2776static int prev_priority;
2777
2778/* The SPU needs to load the next ilb sometime during the execution of
2779 the previous ilb. There is a potential conflict if every cycle has a
2780 load or store. To avoid the conflict we make sure the load/store
2781 unit is free for at least one cycle during the execution of insns in
2782 the previous ilb. */
2783static int spu_ls_first;
2784static int prev_ls_clock;
2785
2786static void
2787spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2788 int max_ready ATTRIBUTE_UNUSED)
2789{
2790 spu_sched_length = 0;
2791}
2792
2793static void
2794spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2795 int max_ready ATTRIBUTE_UNUSED)
2796{
2797 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2798 {
2799 /* When any block might be at least 8-byte aligned, assume they
2800 will all be at least 8-byte aligned to make sure dual issue
2801 works out correctly. */
2802 spu_sched_length = 0;
2803 }
2804 spu_ls_first = INT_MAX;
2805 clock_var = -1;
2806 prev_ls_clock = -1;
2807 pipe0_clock = -1;
2808 pipe1_clock = -1;
2809 prev_clock_var = -1;
2810 prev_priority = -1;
2811}
2812
644459d0 2813static int
5a976006 2814spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
0af56f80 2815 int verbose ATTRIBUTE_UNUSED,
18282db0 2816 rtx_insn *insn, int more)
644459d0 2817{
5a976006 2818 int len;
2819 int p;
644459d0 2820 if (GET_CODE (PATTERN (insn)) == USE
2821 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2822 || (len = get_attr_length (insn)) == 0)
2823 return more;
2824
2825 spu_sched_length += len;
2826
2827 /* Reset on inline asm */
2828 if (INSN_CODE (insn) == -1)
2829 {
2830 spu_ls_first = INT_MAX;
2831 pipe0_clock = -1;
2832 pipe1_clock = -1;
2833 return 0;
2834 }
2835 p = get_pipe (insn);
2836 if (p == 0)
2837 pipe0_clock = clock_var;
2838 else
2839 pipe1_clock = clock_var;
2840
2841 if (in_spu_reorg)
2842 {
2843 if (clock_var - prev_ls_clock > 1
2844 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2845 spu_ls_first = INT_MAX;
2846 if (uses_ls_unit (insn))
2847 {
2848 if (spu_ls_first == INT_MAX)
2849 spu_ls_first = spu_sched_length;
2850 prev_ls_clock = clock_var;
2851 }
2852
2853 /* The scheduler hasn't inserted the nop, but we will later on.
2854 Include those nops in spu_sched_length. */
2855 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2856 spu_sched_length += 4;
2857 prev_clock_var = clock_var;
2858
2859 /* more is -1 when called from spu_sched_reorder for new insns
2860 that don't have INSN_PRIORITY */
2861 if (more >= 0)
2862 prev_priority = INSN_PRIORITY (insn);
2863 }
2864
9d75589a 2865 /* Always try issuing more insns. spu_sched_reorder will decide
5a976006 2866 when the cycle should be advanced. */
2867 return 1;
2868}
2869
2870/* This function is called for both TARGET_SCHED_REORDER and
2871 TARGET_SCHED_REORDER2. */
2872static int
2873spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
b24ef467 2874 rtx_insn **ready, int *nreadyp, int clock)
5a976006 2875{
2876 int i, nready = *nreadyp;
2877 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
b24ef467 2878 rtx_insn *insn;
5a976006 2879
2880 clock_var = clock;
2881
2882 if (nready <= 0 || pipe1_clock >= clock)
2883 return 0;
2884
2885 /* Find any rtl insns that don't generate assembly insns and schedule
2886 them first. */
2887 for (i = nready - 1; i >= 0; i--)
2888 {
2889 insn = ready[i];
2890 if (INSN_CODE (insn) == -1
2891 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 2892 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 2893 {
2894 ready[i] = ready[nready - 1];
2895 ready[nready - 1] = insn;
2896 return 1;
2897 }
2898 }
2899
2900 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2901 for (i = 0; i < nready; i++)
2902 if (INSN_CODE (ready[i]) != -1)
2903 {
2904 insn = ready[i];
2905 switch (get_attr_type (insn))
2906 {
2907 default:
2908 case TYPE_MULTI0:
2909 case TYPE_CONVERT:
2910 case TYPE_FX2:
2911 case TYPE_FX3:
2912 case TYPE_SPR:
2913 case TYPE_NOP:
2914 case TYPE_FXB:
2915 case TYPE_FPD:
2916 case TYPE_FP6:
2917 case TYPE_FP7:
2918 pipe_0 = i;
2919 break;
2920 case TYPE_LOAD:
2921 case TYPE_STORE:
2922 pipe_ls = i;
2923 case TYPE_LNOP:
2924 case TYPE_SHUF:
2925 case TYPE_BR:
2926 case TYPE_MULTI1:
2927 case TYPE_HBR:
2928 pipe_1 = i;
2929 break;
2930 case TYPE_IPREFETCH:
2931 pipe_hbrp = i;
2932 break;
2933 }
2934 }
2935
2936 /* In the first scheduling phase, schedule loads and stores together
2937 to increase the chance they will get merged during postreload CSE. */
2938 if (!reload_completed && pipe_ls >= 0)
2939 {
2940 insn = ready[pipe_ls];
2941 ready[pipe_ls] = ready[nready - 1];
2942 ready[nready - 1] = insn;
2943 return 1;
2944 }
2945
2946 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2947 if (pipe_hbrp >= 0)
2948 pipe_1 = pipe_hbrp;
2949
2950 /* When we have loads/stores in every cycle of the last 15 insns and
2951 we are about to schedule another load/store, emit an hbrp insn
2952 instead. */
2953 if (in_spu_reorg
2954 && spu_sched_length - spu_ls_first >= 4 * 15
2955 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2956 {
2957 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2958 recog_memoized (insn);
2959 if (pipe0_clock < clock)
2960 PUT_MODE (insn, TImode);
2961 spu_sched_variable_issue (file, verbose, insn, -1);
2962 return 0;
2963 }
2964
2965 /* In general, we want to emit nops to increase dual issue, but dual
2966 issue isn't faster when one of the insns could be scheduled later
2967 without effecting the critical path. We look at INSN_PRIORITY to
2968 make a good guess, but it isn't perfect so -mdual-nops=n can be
2969 used to effect it. */
2970 if (in_spu_reorg && spu_dual_nops < 10)
2971 {
9d75589a 2972 /* When we are at an even address and we are not issuing nops to
5a976006 2973 improve scheduling then we need to advance the cycle. */
2974 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2975 && (spu_dual_nops == 0
2976 || (pipe_1 != -1
2977 && prev_priority >
2978 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2979 return 0;
2980
2981 /* When at an odd address, schedule the highest priority insn
2982 without considering pipeline. */
2983 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2984 && (spu_dual_nops == 0
2985 || (prev_priority >
2986 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2987 return 1;
2988 }
2989
2990
2991 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2992 pipe0 insn in the ready list, schedule it. */
2993 if (pipe0_clock < clock && pipe_0 >= 0)
2994 schedule_i = pipe_0;
2995
2996 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2997 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2998 else
2999 schedule_i = pipe_1;
3000
3001 if (schedule_i > -1)
3002 {
3003 insn = ready[schedule_i];
3004 ready[schedule_i] = ready[nready - 1];
3005 ready[nready - 1] = insn;
3006 return 1;
3007 }
3008 return 0;
644459d0 3009}
3010
3011/* INSN is dependent on DEP_INSN. */
3012static int
18282db0 3013spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
644459d0 3014{
5a976006 3015 rtx set;
3016
3017 /* The blockage pattern is used to prevent instructions from being
3018 moved across it and has no cost. */
3019 if (INSN_CODE (insn) == CODE_FOR_blockage
3020 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3021 return 0;
3022
9d98604b 3023 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3024 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3025 return 0;
3026
3027 /* Make sure hbrps are spread out. */
3028 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3029 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3030 return 8;
3031
3032 /* Make sure hints and hbrps are 2 cycles apart. */
3033 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3034 || INSN_CODE (insn) == CODE_FOR_hbr)
3035 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3036 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3037 return 2;
3038
3039 /* An hbrp has no real dependency on other insns. */
3040 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3041 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3042 return 0;
3043
3044 /* Assuming that it is unlikely an argument register will be used in
3045 the first cycle of the called function, we reduce the cost for
3046 slightly better scheduling of dep_insn. When not hinted, the
3047 mispredicted branch would hide the cost as well. */
3048 if (CALL_P (insn))
3049 {
3050 rtx target = get_branch_target (insn);
3051 if (GET_CODE (target) != REG || !set_of (target, insn))
3052 return cost - 2;
3053 return cost;
3054 }
3055
3056 /* And when returning from a function, let's assume the return values
3057 are completed sooner too. */
3058 if (CALL_P (dep_insn))
644459d0 3059 return cost - 2;
5a976006 3060
3061 /* Make sure an instruction that loads from the back chain is schedule
3062 away from the return instruction so a hint is more likely to get
3063 issued. */
3064 if (INSN_CODE (insn) == CODE_FOR__return
3065 && (set = single_set (dep_insn))
3066 && GET_CODE (SET_DEST (set)) == REG
3067 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3068 return 20;
3069
644459d0 3070 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3071 scheduler makes every insn in a block anti-dependent on the final
3072 jump_insn. We adjust here so higher cost insns will get scheduled
3073 earlier. */
5a976006 3074 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3075 return insn_cost (dep_insn) - 3;
5a976006 3076
644459d0 3077 return cost;
3078}
3079\f
3080/* Create a CONST_DOUBLE from a string. */
842ae815 3081rtx
3754d046 3082spu_float_const (const char *string, machine_mode mode)
644459d0 3083{
3084 REAL_VALUE_TYPE value;
3085 value = REAL_VALUE_ATOF (string, mode);
3086 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3087}
3088
644459d0 3089int
3090spu_constant_address_p (rtx x)
3091{
3092 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3093 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3094 || GET_CODE (x) == HIGH);
3095}
3096
3097static enum spu_immediate
3098which_immediate_load (HOST_WIDE_INT val)
3099{
3100 gcc_assert (val == trunc_int_for_mode (val, SImode));
3101
3102 if (val >= -0x8000 && val <= 0x7fff)
3103 return SPU_IL;
3104 if (val >= 0 && val <= 0x3ffff)
3105 return SPU_ILA;
3106 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3107 return SPU_ILH;
3108 if ((val & 0xffff) == 0)
3109 return SPU_ILHU;
3110
3111 return SPU_NONE;
3112}
3113
dea01258 3114/* Return true when OP can be loaded by one of the il instructions, or
3115 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3116int
3754d046 3117immediate_load_p (rtx op, machine_mode mode)
dea01258 3118{
3119 if (CONSTANT_P (op))
3120 {
3121 enum immediate_class c = classify_immediate (op, mode);
5df189be 3122 return c == IC_IL1 || c == IC_IL1s
3072d30e 3123 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3124 }
3125 return 0;
3126}
3127
3128/* Return true if the first SIZE bytes of arr is a constant that can be
3129 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3130 represent the size and offset of the instruction to use. */
3131static int
3132cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3133{
3134 int cpat, run, i, start;
3135 cpat = 1;
3136 run = 0;
3137 start = -1;
3138 for (i = 0; i < size && cpat; i++)
3139 if (arr[i] != i+16)
3140 {
3141 if (!run)
3142 {
3143 start = i;
3144 if (arr[i] == 3)
3145 run = 1;
3146 else if (arr[i] == 2 && arr[i+1] == 3)
3147 run = 2;
3148 else if (arr[i] == 0)
3149 {
3150 while (arr[i+run] == run && i+run < 16)
3151 run++;
3152 if (run != 4 && run != 8)
3153 cpat = 0;
3154 }
3155 else
3156 cpat = 0;
3157 if ((i & (run-1)) != 0)
3158 cpat = 0;
3159 i += run;
3160 }
3161 else
3162 cpat = 0;
3163 }
b01a6dc3 3164 if (cpat && (run || size < 16))
dea01258 3165 {
3166 if (run == 0)
3167 run = 1;
3168 if (prun)
3169 *prun = run;
3170 if (pstart)
3171 *pstart = start == -1 ? 16-run : start;
3172 return 1;
3173 }
3174 return 0;
3175}
3176
3177/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3178 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3179static enum immediate_class
3754d046 3180classify_immediate (rtx op, machine_mode mode)
644459d0 3181{
3182 HOST_WIDE_INT val;
3183 unsigned char arr[16];
5df189be 3184 int i, j, repeated, fsmbi, repeat;
dea01258 3185
3186 gcc_assert (CONSTANT_P (op));
3187
644459d0 3188 if (GET_MODE (op) != VOIDmode)
3189 mode = GET_MODE (op);
3190
dea01258 3191 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3192 if (!flag_pic
3193 && mode == V4SImode
dea01258 3194 && GET_CODE (op) == CONST_VECTOR
3195 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3196 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3197 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3198 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3199 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3200 op = CONST_VECTOR_ELT (op, 0);
644459d0 3201
dea01258 3202 switch (GET_CODE (op))
3203 {
3204 case SYMBOL_REF:
3205 case LABEL_REF:
3206 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3207
dea01258 3208 case CONST:
0cfc65d4 3209 /* We can never know if the resulting address fits in 18 bits and can be
3210 loaded with ila. For now, assume the address will not overflow if
3211 the displacement is "small" (fits 'K' constraint). */
3212 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3213 {
3214 rtx sym = XEXP (XEXP (op, 0), 0);
3215 rtx cst = XEXP (XEXP (op, 0), 1);
3216
3217 if (GET_CODE (sym) == SYMBOL_REF
3218 && GET_CODE (cst) == CONST_INT
3219 && satisfies_constraint_K (cst))
3220 return IC_IL1s;
3221 }
3222 return IC_IL2s;
644459d0 3223
dea01258 3224 case HIGH:
3225 return IC_IL1s;
3226
3227 case CONST_VECTOR:
3228 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3229 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3230 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3231 return IC_POOL;
3232 /* Fall through. */
3233
3234 case CONST_INT:
3235 case CONST_DOUBLE:
3236 constant_to_array (mode, op, arr);
644459d0 3237
dea01258 3238 /* Check that each 4-byte slot is identical. */
3239 repeated = 1;
3240 for (i = 4; i < 16; i += 4)
3241 for (j = 0; j < 4; j++)
3242 if (arr[j] != arr[i + j])
3243 repeated = 0;
3244
3245 if (repeated)
3246 {
3247 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3248 val = trunc_int_for_mode (val, SImode);
3249
3250 if (which_immediate_load (val) != SPU_NONE)
3251 return IC_IL1;
3252 }
3253
3254 /* Any mode of 2 bytes or smaller can be loaded with an il
3255 instruction. */
3256 gcc_assert (GET_MODE_SIZE (mode) > 2);
3257
3258 fsmbi = 1;
5df189be 3259 repeat = 0;
dea01258 3260 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3261 if (arr[i] != 0 && repeat == 0)
3262 repeat = arr[i];
3263 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3264 fsmbi = 0;
3265 if (fsmbi)
5df189be 3266 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3267
3268 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3269 return IC_CPAT;
3270
3271 if (repeated)
3272 return IC_IL2;
3273
3274 return IC_POOL;
3275 default:
3276 break;
3277 }
3278 gcc_unreachable ();
644459d0 3279}
3280
3281static enum spu_immediate
3282which_logical_immediate (HOST_WIDE_INT val)
3283{
3284 gcc_assert (val == trunc_int_for_mode (val, SImode));
3285
3286 if (val >= -0x200 && val <= 0x1ff)
3287 return SPU_ORI;
3288 if (val >= 0 && val <= 0xffff)
3289 return SPU_IOHL;
3290 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3291 {
3292 val = trunc_int_for_mode (val, HImode);
3293 if (val >= -0x200 && val <= 0x1ff)
3294 return SPU_ORHI;
3295 if ((val & 0xff) == ((val >> 8) & 0xff))
3296 {
3297 val = trunc_int_for_mode (val, QImode);
3298 if (val >= -0x200 && val <= 0x1ff)
3299 return SPU_ORBI;
3300 }
3301 }
3302 return SPU_NONE;
3303}
3304
5df189be 3305/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3306 CONST_DOUBLEs. */
3307static int
3308const_vector_immediate_p (rtx x)
3309{
3310 int i;
3311 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3312 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3313 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3314 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3315 return 0;
3316 return 1;
3317}
3318
644459d0 3319int
3754d046 3320logical_immediate_p (rtx op, machine_mode mode)
644459d0 3321{
3322 HOST_WIDE_INT val;
3323 unsigned char arr[16];
3324 int i, j;
3325
3326 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3327 || GET_CODE (op) == CONST_VECTOR);
3328
5df189be 3329 if (GET_CODE (op) == CONST_VECTOR
3330 && !const_vector_immediate_p (op))
3331 return 0;
3332
644459d0 3333 if (GET_MODE (op) != VOIDmode)
3334 mode = GET_MODE (op);
3335
3336 constant_to_array (mode, op, arr);
3337
3338 /* Check that bytes are repeated. */
3339 for (i = 4; i < 16; i += 4)
3340 for (j = 0; j < 4; j++)
3341 if (arr[j] != arr[i + j])
3342 return 0;
3343
3344 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3345 val = trunc_int_for_mode (val, SImode);
3346
3347 i = which_logical_immediate (val);
3348 return i != SPU_NONE && i != SPU_IOHL;
3349}
3350
3351int
3754d046 3352iohl_immediate_p (rtx op, machine_mode mode)
644459d0 3353{
3354 HOST_WIDE_INT val;
3355 unsigned char arr[16];
3356 int i, j;
3357
3358 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3359 || GET_CODE (op) == CONST_VECTOR);
3360
5df189be 3361 if (GET_CODE (op) == CONST_VECTOR
3362 && !const_vector_immediate_p (op))
3363 return 0;
3364
644459d0 3365 if (GET_MODE (op) != VOIDmode)
3366 mode = GET_MODE (op);
3367
3368 constant_to_array (mode, op, arr);
3369
3370 /* Check that bytes are repeated. */
3371 for (i = 4; i < 16; i += 4)
3372 for (j = 0; j < 4; j++)
3373 if (arr[j] != arr[i + j])
3374 return 0;
3375
3376 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3377 val = trunc_int_for_mode (val, SImode);
3378
3379 return val >= 0 && val <= 0xffff;
3380}
3381
3382int
3754d046 3383arith_immediate_p (rtx op, machine_mode mode,
644459d0 3384 HOST_WIDE_INT low, HOST_WIDE_INT high)
3385{
3386 HOST_WIDE_INT val;
3387 unsigned char arr[16];
3388 int bytes, i, j;
3389
3390 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3391 || GET_CODE (op) == CONST_VECTOR);
3392
5df189be 3393 if (GET_CODE (op) == CONST_VECTOR
3394 && !const_vector_immediate_p (op))
3395 return 0;
3396
644459d0 3397 if (GET_MODE (op) != VOIDmode)
3398 mode = GET_MODE (op);
3399
3400 constant_to_array (mode, op, arr);
3401
3402 if (VECTOR_MODE_P (mode))
3403 mode = GET_MODE_INNER (mode);
3404
3405 bytes = GET_MODE_SIZE (mode);
3406 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3407
3408 /* Check that bytes are repeated. */
3409 for (i = bytes; i < 16; i += bytes)
3410 for (j = 0; j < bytes; j++)
3411 if (arr[j] != arr[i + j])
3412 return 0;
3413
3414 val = arr[0];
3415 for (j = 1; j < bytes; j++)
3416 val = (val << 8) | arr[j];
3417
3418 val = trunc_int_for_mode (val, mode);
3419
3420 return val >= low && val <= high;
3421}
3422
56c7bfc2 3423/* TRUE when op is an immediate and an exact power of 2, and given that
3424 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3425 all entries must be the same. */
3426bool
3754d046 3427exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
56c7bfc2 3428{
3754d046 3429 machine_mode int_mode;
56c7bfc2 3430 HOST_WIDE_INT val;
3431 unsigned char arr[16];
3432 int bytes, i, j;
3433
3434 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3435 || GET_CODE (op) == CONST_VECTOR);
3436
3437 if (GET_CODE (op) == CONST_VECTOR
3438 && !const_vector_immediate_p (op))
3439 return 0;
3440
3441 if (GET_MODE (op) != VOIDmode)
3442 mode = GET_MODE (op);
3443
3444 constant_to_array (mode, op, arr);
3445
3446 if (VECTOR_MODE_P (mode))
3447 mode = GET_MODE_INNER (mode);
3448
3449 bytes = GET_MODE_SIZE (mode);
3450 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3451
3452 /* Check that bytes are repeated. */
3453 for (i = bytes; i < 16; i += bytes)
3454 for (j = 0; j < bytes; j++)
3455 if (arr[j] != arr[i + j])
3456 return 0;
3457
3458 val = arr[0];
3459 for (j = 1; j < bytes; j++)
3460 val = (val << 8) | arr[j];
3461
3462 val = trunc_int_for_mode (val, int_mode);
3463
3464 /* Currently, we only handle SFmode */
3465 gcc_assert (mode == SFmode);
3466 if (mode == SFmode)
3467 {
3468 int exp = (val >> 23) - 127;
3469 return val > 0 && (val & 0x007fffff) == 0
3470 && exp >= low && exp <= high;
3471 }
3472 return FALSE;
3473}
3474
6cf5579e 3475/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3476
6f4e40cd 3477static bool
3478ea_symbol_ref_p (const_rtx x)
6cf5579e 3479{
6cf5579e 3480 tree decl;
3481
3482 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3483 {
3484 rtx plus = XEXP (x, 0);
3485 rtx op0 = XEXP (plus, 0);
3486 rtx op1 = XEXP (plus, 1);
3487 if (GET_CODE (op1) == CONST_INT)
3488 x = op0;
3489 }
3490
3491 return (GET_CODE (x) == SYMBOL_REF
3492 && (decl = SYMBOL_REF_DECL (x)) != 0
3493 && TREE_CODE (decl) == VAR_DECL
3494 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3495}
3496
644459d0 3497/* We accept:
5b865faf 3498 - any 32-bit constant (SImode, SFmode)
644459d0 3499 - any constant that can be generated with fsmbi (any mode)
5b865faf 3500 - a 64-bit constant where the high and low bits are identical
644459d0 3501 (DImode, DFmode)
5b865faf 3502 - a 128-bit constant where the four 32-bit words match. */
ca316360 3503bool
3754d046 3504spu_legitimate_constant_p (machine_mode mode, rtx x)
644459d0 3505{
6f4e40cd 3506 subrtx_iterator::array_type array;
5df189be 3507 if (GET_CODE (x) == HIGH)
3508 x = XEXP (x, 0);
6cf5579e 3509
3510 /* Reject any __ea qualified reference. These can't appear in
3511 instructions but must be forced to the constant pool. */
6f4e40cd 3512 FOR_EACH_SUBRTX (iter, array, x, ALL)
3513 if (ea_symbol_ref_p (*iter))
3514 return 0;
6cf5579e 3515
644459d0 3516 /* V4SI with all identical symbols is valid. */
5df189be 3517 if (!flag_pic
ca316360 3518 && mode == V4SImode
644459d0 3519 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3520 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3521 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3522 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3523 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3524 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3525
5df189be 3526 if (GET_CODE (x) == CONST_VECTOR
3527 && !const_vector_immediate_p (x))
3528 return 0;
644459d0 3529 return 1;
3530}
3531
3532/* Valid address are:
3533 - symbol_ref, label_ref, const
3534 - reg
9d98604b 3535 - reg + const_int, where const_int is 16 byte aligned
644459d0 3536 - reg + reg, alignment doesn't matter
3537 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3538 ignore the 4 least significant bits of the const. We only care about
3539 16 byte modes because the expand phase will change all smaller MEM
3540 references to TImode. */
3541static bool
3754d046 3542spu_legitimate_address_p (machine_mode mode,
fd50b071 3543 rtx x, bool reg_ok_strict)
644459d0 3544{
9d98604b 3545 int aligned = GET_MODE_SIZE (mode) >= 16;
3546 if (aligned
3547 && GET_CODE (x) == AND
644459d0 3548 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3549 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3550 x = XEXP (x, 0);
3551 switch (GET_CODE (x))
3552 {
644459d0 3553 case LABEL_REF:
6cf5579e 3554 return !TARGET_LARGE_MEM;
3555
9d98604b 3556 case SYMBOL_REF:
644459d0 3557 case CONST:
6cf5579e 3558 /* Keep __ea references until reload so that spu_expand_mov can see them
3559 in MEMs. */
6f4e40cd 3560 if (ea_symbol_ref_p (x))
6cf5579e 3561 return !reload_in_progress && !reload_completed;
9d98604b 3562 return !TARGET_LARGE_MEM;
644459d0 3563
3564 case CONST_INT:
3565 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3566
3567 case SUBREG:
3568 x = XEXP (x, 0);
9d98604b 3569 if (REG_P (x))
3570 return 0;
644459d0 3571
3572 case REG:
3573 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3574
3575 case PLUS:
3576 case LO_SUM:
3577 {
3578 rtx op0 = XEXP (x, 0);
3579 rtx op1 = XEXP (x, 1);
3580 if (GET_CODE (op0) == SUBREG)
3581 op0 = XEXP (op0, 0);
3582 if (GET_CODE (op1) == SUBREG)
3583 op1 = XEXP (op1, 0);
644459d0 3584 if (GET_CODE (op0) == REG
3585 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3586 && GET_CODE (op1) == CONST_INT
fa695424 3587 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3588 /* If virtual registers are involved, the displacement will
3589 change later on anyway, so checking would be premature.
3590 Reload will make sure the final displacement after
3591 register elimination is OK. */
3592 || op0 == arg_pointer_rtx
3593 || op0 == frame_pointer_rtx
3594 || op0 == virtual_stack_vars_rtx)
9d98604b 3595 && (!aligned || (INTVAL (op1) & 15) == 0))
3596 return TRUE;
644459d0 3597 if (GET_CODE (op0) == REG
3598 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3599 && GET_CODE (op1) == REG
3600 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3601 return TRUE;
644459d0 3602 }
3603 break;
3604
3605 default:
3606 break;
3607 }
9d98604b 3608 return FALSE;
644459d0 3609}
3610
6cf5579e 3611/* Like spu_legitimate_address_p, except with named addresses. */
3612static bool
3754d046 3613spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
6cf5579e 3614 bool reg_ok_strict, addr_space_t as)
3615{
3616 if (as == ADDR_SPACE_EA)
3617 return (REG_P (x) && (GET_MODE (x) == EAmode));
3618
3619 else if (as != ADDR_SPACE_GENERIC)
3620 gcc_unreachable ();
3621
3622 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3623}
3624
644459d0 3625/* When the address is reg + const_int, force the const_int into a
fa7637bd 3626 register. */
3defb88e 3627static rtx
644459d0 3628spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3754d046 3629 machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3630{
3631 rtx op0, op1;
3632 /* Make sure both operands are registers. */
3633 if (GET_CODE (x) == PLUS)
3634 {
3635 op0 = XEXP (x, 0);
3636 op1 = XEXP (x, 1);
3637 if (ALIGNED_SYMBOL_REF_P (op0))
3638 {
3639 op0 = force_reg (Pmode, op0);
3640 mark_reg_pointer (op0, 128);
3641 }
3642 else if (GET_CODE (op0) != REG)
3643 op0 = force_reg (Pmode, op0);
3644 if (ALIGNED_SYMBOL_REF_P (op1))
3645 {
3646 op1 = force_reg (Pmode, op1);
3647 mark_reg_pointer (op1, 128);
3648 }
3649 else if (GET_CODE (op1) != REG)
3650 op1 = force_reg (Pmode, op1);
3651 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3652 }
41e3a0c7 3653 return x;
644459d0 3654}
3655
6cf5579e 3656/* Like spu_legitimate_address, except with named address support. */
3657static rtx
3754d046 3658spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
6cf5579e 3659 addr_space_t as)
3660{
3661 if (as != ADDR_SPACE_GENERIC)
3662 return x;
3663
3664 return spu_legitimize_address (x, oldx, mode);
3665}
3666
fa695424 3667/* Reload reg + const_int for out-of-range displacements. */
3668rtx
3754d046 3669spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
fa695424 3670 int opnum, int type)
3671{
3672 bool removed_and = false;
3673
3674 if (GET_CODE (ad) == AND
3675 && CONST_INT_P (XEXP (ad, 1))
3676 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3677 {
3678 ad = XEXP (ad, 0);
3679 removed_and = true;
3680 }
3681
3682 if (GET_CODE (ad) == PLUS
3683 && REG_P (XEXP (ad, 0))
3684 && CONST_INT_P (XEXP (ad, 1))
3685 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3686 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3687 {
3688 /* Unshare the sum. */
3689 ad = copy_rtx (ad);
3690
3691 /* Reload the displacement. */
3692 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3693 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3694 opnum, (enum reload_type) type);
3695
3696 /* Add back AND for alignment if we stripped it. */
3697 if (removed_and)
3698 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3699
3700 return ad;
3701 }
3702
3703 return NULL_RTX;
3704}
3705
644459d0 3706/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3707 struct attribute_spec.handler. */
3708static tree
3709spu_handle_fndecl_attribute (tree * node,
3710 tree name,
3711 tree args ATTRIBUTE_UNUSED,
3712 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3713{
3714 if (TREE_CODE (*node) != FUNCTION_DECL)
3715 {
67a779df 3716 warning (0, "%qE attribute only applies to functions",
3717 name);
644459d0 3718 *no_add_attrs = true;
3719 }
3720
3721 return NULL_TREE;
3722}
3723
3724/* Handle the "vector" attribute. */
3725static tree
3726spu_handle_vector_attribute (tree * node, tree name,
3727 tree args ATTRIBUTE_UNUSED,
3728 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3729{
3730 tree type = *node, result = NULL_TREE;
3754d046 3731 machine_mode mode;
644459d0 3732 int unsigned_p;
3733
3734 while (POINTER_TYPE_P (type)
3735 || TREE_CODE (type) == FUNCTION_TYPE
3736 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3737 type = TREE_TYPE (type);
3738
3739 mode = TYPE_MODE (type);
3740
3741 unsigned_p = TYPE_UNSIGNED (type);
3742 switch (mode)
3743 {
3744 case DImode:
3745 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3746 break;
3747 case SImode:
3748 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3749 break;
3750 case HImode:
3751 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3752 break;
3753 case QImode:
3754 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3755 break;
3756 case SFmode:
3757 result = V4SF_type_node;
3758 break;
3759 case DFmode:
3760 result = V2DF_type_node;
3761 break;
3762 default:
3763 break;
3764 }
3765
3766 /* Propagate qualifiers attached to the element type
3767 onto the vector type. */
3768 if (result && result != type && TYPE_QUALS (type))
3769 result = build_qualified_type (result, TYPE_QUALS (type));
3770
3771 *no_add_attrs = true; /* No need to hang on to the attribute. */
3772
3773 if (!result)
67a779df 3774 warning (0, "%qE attribute ignored", name);
644459d0 3775 else
d991e6e8 3776 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3777
3778 return NULL_TREE;
3779}
3780
f2b32076 3781/* Return nonzero if FUNC is a naked function. */
644459d0 3782static int
3783spu_naked_function_p (tree func)
3784{
3785 tree a;
3786
3787 if (TREE_CODE (func) != FUNCTION_DECL)
3788 abort ();
3789
3790 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3791 return a != NULL_TREE;
3792}
3793
3794int
3795spu_initial_elimination_offset (int from, int to)
3796{
3797 int saved_regs_size = spu_saved_regs_size ();
3798 int sp_offset = 0;
d5bf7b64 3799 if (!crtl->is_leaf || crtl->outgoing_args_size
644459d0 3800 || get_frame_size () || saved_regs_size)
3801 sp_offset = STACK_POINTER_OFFSET;
3802 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3803 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3804 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3805 return get_frame_size ();
644459d0 3806 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3807 return sp_offset + crtl->outgoing_args_size
644459d0 3808 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3809 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3810 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3811 else
3812 gcc_unreachable ();
644459d0 3813}
3814
3815rtx
fb80456a 3816spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3817{
3754d046 3818 machine_mode mode = TYPE_MODE (type);
644459d0 3819 int byte_size = ((mode == BLKmode)
3820 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3821
3822 /* Make sure small structs are left justified in a register. */
3823 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3824 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3825 {
3754d046 3826 machine_mode smode;
644459d0 3827 rtvec v;
3828 int i;
3829 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3830 int n = byte_size / UNITS_PER_WORD;
3831 v = rtvec_alloc (nregs);
3832 for (i = 0; i < n; i++)
3833 {
3834 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3835 gen_rtx_REG (TImode,
3836 FIRST_RETURN_REGNUM
3837 + i),
3838 GEN_INT (UNITS_PER_WORD * i));
3839 byte_size -= UNITS_PER_WORD;
3840 }
3841
3842 if (n < nregs)
3843 {
3844 if (byte_size < 4)
3845 byte_size = 4;
3846 smode =
3847 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3848 RTVEC_ELT (v, n) =
3849 gen_rtx_EXPR_LIST (VOIDmode,
3850 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3851 GEN_INT (UNITS_PER_WORD * n));
3852 }
3853 return gen_rtx_PARALLEL (mode, v);
3854 }
3855 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3856}
3857
ee9034d4 3858static rtx
39cba157 3859spu_function_arg (cumulative_args_t cum_v,
3754d046 3860 machine_mode mode,
ee9034d4 3861 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3862{
39cba157 3863 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
644459d0 3864 int byte_size;
3865
a08c5dd0 3866 if (*cum >= MAX_REGISTER_ARGS)
644459d0 3867 return 0;
3868
3869 byte_size = ((mode == BLKmode)
3870 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3871
3872 /* The ABI does not allow parameters to be passed partially in
3873 reg and partially in stack. */
a08c5dd0 3874 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 3875 return 0;
3876
3877 /* Make sure small structs are left justified in a register. */
3878 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3879 && byte_size < UNITS_PER_WORD && byte_size > 0)
3880 {
3754d046 3881 machine_mode smode;
644459d0 3882 rtx gr_reg;
3883 if (byte_size < 4)
3884 byte_size = 4;
3885 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3886 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 3887 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 3888 const0_rtx);
3889 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3890 }
3891 else
a08c5dd0 3892 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 3893}
3894
ee9034d4 3895static void
3754d046 3896spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
ee9034d4 3897 const_tree type, bool named ATTRIBUTE_UNUSED)
3898{
39cba157 3899 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3900
ee9034d4 3901 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3902 ? 1
3903 : mode == BLKmode
3904 ? ((int_size_in_bytes (type) + 15) / 16)
3905 : mode == VOIDmode
3906 ? 1
3907 : HARD_REGNO_NREGS (cum, mode));
3908}
3909
644459d0 3910/* Variable sized types are passed by reference. */
3911static bool
39cba157 3912spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3754d046 3913 machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3914 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3915{
3916 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3917}
3918\f
3919
3920/* Var args. */
3921
3922/* Create and return the va_list datatype.
3923
3924 On SPU, va_list is an array type equivalent to
3925
3926 typedef struct __va_list_tag
3927 {
3928 void *__args __attribute__((__aligned(16)));
3929 void *__skip __attribute__((__aligned(16)));
3930
3931 } va_list[1];
3932
fa7637bd 3933 where __args points to the arg that will be returned by the next
644459d0 3934 va_arg(), and __skip points to the previous stack frame such that
3935 when __args == __skip we should advance __args by 32 bytes. */
3936static tree
3937spu_build_builtin_va_list (void)
3938{
3939 tree f_args, f_skip, record, type_decl;
3940 bool owp;
3941
3942 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3943
3944 type_decl =
54e46243 3945 build_decl (BUILTINS_LOCATION,
3946 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3947
54e46243 3948 f_args = build_decl (BUILTINS_LOCATION,
3949 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3950 f_skip = build_decl (BUILTINS_LOCATION,
3951 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3952
3953 DECL_FIELD_CONTEXT (f_args) = record;
3954 DECL_ALIGN (f_args) = 128;
3955 DECL_USER_ALIGN (f_args) = 1;
3956
3957 DECL_FIELD_CONTEXT (f_skip) = record;
3958 DECL_ALIGN (f_skip) = 128;
3959 DECL_USER_ALIGN (f_skip) = 1;
3960
bc907808 3961 TYPE_STUB_DECL (record) = type_decl;
644459d0 3962 TYPE_NAME (record) = type_decl;
3963 TYPE_FIELDS (record) = f_args;
1767a056 3964 DECL_CHAIN (f_args) = f_skip;
644459d0 3965
3966 /* We know this is being padded and we want it too. It is an internal
3967 type so hide the warnings from the user. */
3968 owp = warn_padded;
3969 warn_padded = false;
3970
3971 layout_type (record);
3972
3973 warn_padded = owp;
3974
3975 /* The correct type is an array type of one element. */
3976 return build_array_type (record, build_index_type (size_zero_node));
3977}
3978
3979/* Implement va_start by filling the va_list structure VALIST.
3980 NEXTARG points to the first anonymous stack argument.
3981
3982 The following global variables are used to initialize
3983 the va_list structure:
3984
abe32cce 3985 crtl->args.info;
644459d0 3986 the CUMULATIVE_ARGS for this function
3987
abe32cce 3988 crtl->args.arg_offset_rtx:
644459d0 3989 holds the offset of the first anonymous stack argument
3990 (relative to the virtual arg pointer). */
3991
8a58ed0a 3992static void
644459d0 3993spu_va_start (tree valist, rtx nextarg)
3994{
3995 tree f_args, f_skip;
3996 tree args, skip, t;
3997
3998 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 3999 f_skip = DECL_CHAIN (f_args);
644459d0 4000
170efcd4 4001 valist = build_simple_mem_ref (valist);
644459d0 4002 args =
4003 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4004 skip =
4005 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4006
4007 /* Find the __args area. */
4008 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4009 if (crtl->args.pretend_args_size > 0)
2cc66f2a 4010 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
75a70cf9 4011 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4012 TREE_SIDE_EFFECTS (t) = 1;
4013 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4014
4015 /* Find the __skip area. */
4016 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2cc66f2a 4017 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4018 - STACK_POINTER_OFFSET));
75a70cf9 4019 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4020 TREE_SIDE_EFFECTS (t) = 1;
4021 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4022}
4023
4024/* Gimplify va_arg by updating the va_list structure
4025 VALIST as required to retrieve an argument of type
4026 TYPE, and returning that argument.
4027
4028 ret = va_arg(VALIST, TYPE);
4029
4030 generates code equivalent to:
4031
4032 paddedsize = (sizeof(TYPE) + 15) & -16;
4033 if (VALIST.__args + paddedsize > VALIST.__skip
4034 && VALIST.__args <= VALIST.__skip)
4035 addr = VALIST.__skip + 32;
4036 else
4037 addr = VALIST.__args;
4038 VALIST.__args = addr + paddedsize;
4039 ret = *(TYPE *)addr;
4040 */
4041static tree
75a70cf9 4042spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4043 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4044{
4045 tree f_args, f_skip;
4046 tree args, skip;
4047 HOST_WIDE_INT size, rsize;
2cc66f2a 4048 tree addr, tmp;
644459d0 4049 bool pass_by_reference_p;
4050
4051 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4052 f_skip = DECL_CHAIN (f_args);
644459d0 4053
644459d0 4054 args =
4055 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4056 skip =
4057 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4058
4059 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4060
4061 /* if an object is dynamically sized, a pointer to it is passed
4062 instead of the object itself. */
27a82950 4063 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4064 false);
644459d0 4065 if (pass_by_reference_p)
4066 type = build_pointer_type (type);
4067 size = int_size_in_bytes (type);
4068 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4069
4070 /* build conditional expression to calculate addr. The expression
4071 will be gimplified later. */
2cc66f2a 4072 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
644459d0 4073 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4074 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4075 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4076 unshare_expr (skip)));
644459d0 4077
4078 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2cc66f2a 4079 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4080 unshare_expr (args));
644459d0 4081
75a70cf9 4082 gimplify_assign (addr, tmp, pre_p);
644459d0 4083
4084 /* update VALIST.__args */
2cc66f2a 4085 tmp = fold_build_pointer_plus_hwi (addr, rsize);
75a70cf9 4086 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4087
8115f0af 4088 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4089 addr);
644459d0 4090
4091 if (pass_by_reference_p)
4092 addr = build_va_arg_indirect_ref (addr);
4093
4094 return build_va_arg_indirect_ref (addr);
4095}
4096
4097/* Save parameter registers starting with the register that corresponds
4098 to the first unnamed parameters. If the first unnamed parameter is
4099 in the stack then save no registers. Set pretend_args_size to the
4100 amount of space needed to save the registers. */
39cba157 4101static void
3754d046 4102spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
644459d0 4103 tree type, int *pretend_size, int no_rtl)
4104{
4105 if (!no_rtl)
4106 {
4107 rtx tmp;
4108 int regno;
4109 int offset;
39cba157 4110 int ncum = *get_cumulative_args (cum);
644459d0 4111
4112 /* cum currently points to the last named argument, we want to
4113 start at the next argument. */
39cba157 4114 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
644459d0 4115
4116 offset = -STACK_POINTER_OFFSET;
4117 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4118 {
4119 tmp = gen_frame_mem (V4SImode,
29c05e22 4120 plus_constant (Pmode, virtual_incoming_args_rtx,
644459d0 4121 offset));
4122 emit_move_insn (tmp,
4123 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4124 offset += 16;
4125 }
4126 *pretend_size = offset + STACK_POINTER_OFFSET;
4127 }
4128}
4129\f
b2d7ede1 4130static void
644459d0 4131spu_conditional_register_usage (void)
4132{
4133 if (flag_pic)
4134 {
4135 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4136 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4137 }
644459d0 4138}
4139
9d98604b 4140/* This is called any time we inspect the alignment of a register for
4141 addresses. */
644459d0 4142static int
9d98604b 4143reg_aligned_for_addr (rtx x)
644459d0 4144{
9d98604b 4145 int regno =
4146 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4147 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4148}
4149
69ced2d6 4150/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4151 into its SYMBOL_REF_FLAGS. */
4152static void
4153spu_encode_section_info (tree decl, rtx rtl, int first)
4154{
4155 default_encode_section_info (decl, rtl, first);
4156
4157 /* If a variable has a forced alignment to < 16 bytes, mark it with
4158 SYMBOL_FLAG_ALIGN1. */
4159 if (TREE_CODE (decl) == VAR_DECL
4160 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4161 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4162}
4163
644459d0 4164/* Return TRUE if we are certain the mem refers to a complete object
4165 which is both 16-byte aligned and padded to a 16-byte boundary. This
4166 would make it safe to store with a single instruction.
4167 We guarantee the alignment and padding for static objects by aligning
4168 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4169 FIXME: We currently cannot guarantee this for objects on the stack
4170 because assign_parm_setup_stack calls assign_stack_local with the
4171 alignment of the parameter mode and in that case the alignment never
4172 gets adjusted by LOCAL_ALIGNMENT. */
4173static int
4174store_with_one_insn_p (rtx mem)
4175{
3754d046 4176 machine_mode mode = GET_MODE (mem);
644459d0 4177 rtx addr = XEXP (mem, 0);
9d98604b 4178 if (mode == BLKmode)
644459d0 4179 return 0;
9d98604b 4180 if (GET_MODE_SIZE (mode) >= 16)
4181 return 1;
644459d0 4182 /* Only static objects. */
4183 if (GET_CODE (addr) == SYMBOL_REF)
4184 {
4185 /* We use the associated declaration to make sure the access is
fa7637bd 4186 referring to the whole object.
851d9296 4187 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4188 if it is necessary. Will there be cases where one exists, and
4189 the other does not? Will there be cases where both exist, but
4190 have different types? */
4191 tree decl = MEM_EXPR (mem);
4192 if (decl
4193 && TREE_CODE (decl) == VAR_DECL
4194 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4195 return 1;
4196 decl = SYMBOL_REF_DECL (addr);
4197 if (decl
4198 && TREE_CODE (decl) == VAR_DECL
4199 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4200 return 1;
4201 }
4202 return 0;
4203}
4204
9d98604b 4205/* Return 1 when the address is not valid for a simple load and store as
4206 required by the '_mov*' patterns. We could make this less strict
4207 for loads, but we prefer mem's to look the same so they are more
4208 likely to be merged. */
4209static int
4210address_needs_split (rtx mem)
4211{
4212 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4213 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4214 || !(store_with_one_insn_p (mem)
4215 || mem_is_padded_component_ref (mem))))
4216 return 1;
4217
4218 return 0;
4219}
4220
6cf5579e 4221static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4222static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4223static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4224
4225/* MEM is known to be an __ea qualified memory access. Emit a call to
4226 fetch the ppu memory to local store, and return its address in local
4227 store. */
4228
4229static void
4230ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4231{
4232 if (is_store)
4233 {
4234 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4235 if (!cache_fetch_dirty)
4236 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4237 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4238 2, ea_addr, EAmode, ndirty, SImode);
4239 }
4240 else
4241 {
4242 if (!cache_fetch)
4243 cache_fetch = init_one_libfunc ("__cache_fetch");
4244 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4245 1, ea_addr, EAmode);
4246 }
4247}
4248
4249/* Like ea_load_store, but do the cache tag comparison and, for stores,
4250 dirty bit marking, inline.
4251
4252 The cache control data structure is an array of
4253
4254 struct __cache_tag_array
4255 {
4256 unsigned int tag_lo[4];
4257 unsigned int tag_hi[4];
4258 void *data_pointer[4];
4259 int reserved[4];
4260 vector unsigned short dirty_bits[4];
4261 } */
4262
4263static void
4264ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4265{
4266 rtx ea_addr_si;
4267 HOST_WIDE_INT v;
4268 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4269 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4270 rtx index_mask = gen_reg_rtx (SImode);
4271 rtx tag_arr = gen_reg_rtx (Pmode);
4272 rtx splat_mask = gen_reg_rtx (TImode);
4273 rtx splat = gen_reg_rtx (V4SImode);
4274 rtx splat_hi = NULL_RTX;
4275 rtx tag_index = gen_reg_rtx (Pmode);
4276 rtx block_off = gen_reg_rtx (SImode);
4277 rtx tag_addr = gen_reg_rtx (Pmode);
4278 rtx tag = gen_reg_rtx (V4SImode);
4279 rtx cache_tag = gen_reg_rtx (V4SImode);
4280 rtx cache_tag_hi = NULL_RTX;
4281 rtx cache_ptrs = gen_reg_rtx (TImode);
4282 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4283 rtx tag_equal = gen_reg_rtx (V4SImode);
4284 rtx tag_equal_hi = NULL_RTX;
4285 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4286 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4287 rtx eq_index = gen_reg_rtx (SImode);
0af56f80 4288 rtx bcomp, hit_label, hit_ref, cont_label;
4289 rtx_insn *insn;
6cf5579e 4290
4291 if (spu_ea_model != 32)
4292 {
4293 splat_hi = gen_reg_rtx (V4SImode);
4294 cache_tag_hi = gen_reg_rtx (V4SImode);
4295 tag_equal_hi = gen_reg_rtx (V4SImode);
4296 }
4297
29c05e22 4298 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
6cf5579e 4299 emit_move_insn (tag_arr, tag_arr_sym);
4300 v = 0x0001020300010203LL;
4301 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4302 ea_addr_si = ea_addr;
4303 if (spu_ea_model != 32)
4304 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4305
4306 /* tag_index = ea_addr & (tag_array_size - 128) */
4307 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4308
4309 /* splat ea_addr to all 4 slots. */
4310 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4311 /* Similarly for high 32 bits of ea_addr. */
4312 if (spu_ea_model != 32)
4313 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4314
4315 /* block_off = ea_addr & 127 */
4316 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4317
4318 /* tag_addr = tag_arr + tag_index */
4319 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4320
4321 /* Read cache tags. */
4322 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4323 if (spu_ea_model != 32)
4324 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
29c05e22 4325 plus_constant (Pmode,
4326 tag_addr, 16)));
6cf5579e 4327
4328 /* tag = ea_addr & -128 */
4329 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4330
4331 /* Read all four cache data pointers. */
4332 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
29c05e22 4333 plus_constant (Pmode,
4334 tag_addr, 32)));
6cf5579e 4335
4336 /* Compare tags. */
4337 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4338 if (spu_ea_model != 32)
4339 {
4340 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4341 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4342 }
4343
4344 /* At most one of the tags compare equal, so tag_equal has one
4345 32-bit slot set to all 1's, with the other slots all zero.
4346 gbb picks off low bit from each byte in the 128-bit registers,
4347 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4348 we have a hit. */
4349 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4350 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4351
4352 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4353 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4354
4355 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4356 (rotating eq_index mod 16 bytes). */
4357 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4358 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4359
4360 /* Add block offset to form final data address. */
4361 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4362
4363 /* Check that we did hit. */
4364 hit_label = gen_label_rtx ();
4365 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4366 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
d1f9b275 4367 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
6cf5579e 4368 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4369 hit_ref, pc_rtx)));
4370 /* Say that this branch is very likely to happen. */
4371 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
9eb946de 4372 add_int_reg_note (insn, REG_BR_PROB, v);
6cf5579e 4373
4374 ea_load_store (mem, is_store, ea_addr, data_addr);
4375 cont_label = gen_label_rtx ();
4376 emit_jump_insn (gen_jump (cont_label));
4377 emit_barrier ();
4378
4379 emit_label (hit_label);
4380
4381 if (is_store)
4382 {
4383 HOST_WIDE_INT v_hi;
4384 rtx dirty_bits = gen_reg_rtx (TImode);
4385 rtx dirty_off = gen_reg_rtx (SImode);
4386 rtx dirty_128 = gen_reg_rtx (TImode);
4387 rtx neg_block_off = gen_reg_rtx (SImode);
4388
4389 /* Set up mask with one dirty bit per byte of the mem we are
4390 writing, starting from top bit. */
4391 v_hi = v = -1;
4392 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4393 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4394 {
4395 v_hi = v;
4396 v = 0;
4397 }
4398 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4399
4400 /* Form index into cache dirty_bits. eq_index is one of
4401 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4402 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4403 offset to each of the four dirty_bits elements. */
4404 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4405
4406 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4407
4408 /* Rotate bit mask to proper bit. */
4409 emit_insn (gen_negsi2 (neg_block_off, block_off));
4410 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4411 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4412
4413 /* Or in the new dirty bits. */
4414 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4415
4416 /* Store. */
4417 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4418 }
4419
4420 emit_label (cont_label);
4421}
4422
4423static rtx
4424expand_ea_mem (rtx mem, bool is_store)
4425{
4426 rtx ea_addr;
4427 rtx data_addr = gen_reg_rtx (Pmode);
4428 rtx new_mem;
4429
4430 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4431 if (optimize_size || optimize == 0)
4432 ea_load_store (mem, is_store, ea_addr, data_addr);
4433 else
4434 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4435
4436 if (ea_alias_set == -1)
4437 ea_alias_set = new_alias_set ();
4438
4439 /* We generate a new MEM RTX to refer to the copy of the data
4440 in the cache. We do not copy memory attributes (except the
4441 alignment) from the original MEM, as they may no longer apply
4442 to the cache copy. */
4443 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4444 set_mem_alias_set (new_mem, ea_alias_set);
4445 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4446
4447 return new_mem;
4448}
4449
644459d0 4450int
3754d046 4451spu_expand_mov (rtx * ops, machine_mode mode)
644459d0 4452{
4453 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4454 {
4455 /* Perform the move in the destination SUBREG's inner mode. */
4456 ops[0] = SUBREG_REG (ops[0]);
4457 mode = GET_MODE (ops[0]);
4458 ops[1] = gen_lowpart_common (mode, ops[1]);
4459 gcc_assert (ops[1]);
4460 }
644459d0 4461
4462 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4463 {
4464 rtx from = SUBREG_REG (ops[1]);
3754d046 4465 machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4466
4467 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4468 && GET_MODE_CLASS (imode) == MODE_INT
4469 && subreg_lowpart_p (ops[1]));
4470
4471 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4472 imode = SImode;
4473 if (imode != GET_MODE (from))
4474 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4475
4476 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4477 {
d6bf3b14 4478 enum insn_code icode = convert_optab_handler (trunc_optab,
4479 mode, imode);
644459d0 4480 emit_insn (GEN_FCN (icode) (ops[0], from));
4481 }
4482 else
4483 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4484 return 1;
4485 }
4486
4487 /* At least one of the operands needs to be a register. */
4488 if ((reload_in_progress | reload_completed) == 0
4489 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4490 {
4491 rtx temp = force_reg (mode, ops[1]);
4492 emit_move_insn (ops[0], temp);
4493 return 1;
4494 }
4495 if (reload_in_progress || reload_completed)
4496 {
dea01258 4497 if (CONSTANT_P (ops[1]))
4498 return spu_split_immediate (ops);
644459d0 4499 return 0;
4500 }
9d98604b 4501
4502 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4503 extend them. */
4504 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4505 {
9d98604b 4506 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4507 if (val != INTVAL (ops[1]))
644459d0 4508 {
9d98604b 4509 emit_move_insn (ops[0], GEN_INT (val));
4510 return 1;
644459d0 4511 }
4512 }
9d98604b 4513 if (MEM_P (ops[0]))
6cf5579e 4514 {
4515 if (MEM_ADDR_SPACE (ops[0]))
4516 ops[0] = expand_ea_mem (ops[0], true);
4517 return spu_split_store (ops);
4518 }
9d98604b 4519 if (MEM_P (ops[1]))
6cf5579e 4520 {
4521 if (MEM_ADDR_SPACE (ops[1]))
4522 ops[1] = expand_ea_mem (ops[1], false);
4523 return spu_split_load (ops);
4524 }
9d98604b 4525
644459d0 4526 return 0;
4527}
4528
9d98604b 4529static void
4530spu_convert_move (rtx dst, rtx src)
644459d0 4531{
3754d046 4532 machine_mode mode = GET_MODE (dst);
4533 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
9d98604b 4534 rtx reg;
4535 gcc_assert (GET_MODE (src) == TImode);
4536 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
d1f9b275 4537 emit_insn (gen_rtx_SET (reg,
9d98604b 4538 gen_rtx_TRUNCATE (int_mode,
4539 gen_rtx_LSHIFTRT (TImode, src,
4540 GEN_INT (int_mode == DImode ? 64 : 96)))));
4541 if (int_mode != mode)
4542 {
4543 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4544 emit_move_insn (dst, reg);
4545 }
4546}
644459d0 4547
9d98604b 4548/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4549 the address from SRC and SRC+16. Return a REG or CONST_INT that
4550 specifies how many bytes to rotate the loaded registers, plus any
4551 extra from EXTRA_ROTQBY. The address and rotate amounts are
4552 normalized to improve merging of loads and rotate computations. */
4553static rtx
4554spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4555{
4556 rtx addr = XEXP (src, 0);
4557 rtx p0, p1, rot, addr0, addr1;
4558 int rot_amt;
644459d0 4559
4560 rot = 0;
4561 rot_amt = 0;
9d98604b 4562
4563 if (MEM_ALIGN (src) >= 128)
4564 /* Address is already aligned; simply perform a TImode load. */ ;
4565 else if (GET_CODE (addr) == PLUS)
644459d0 4566 {
4567 /* 8 cases:
4568 aligned reg + aligned reg => lqx
4569 aligned reg + unaligned reg => lqx, rotqby
4570 aligned reg + aligned const => lqd
4571 aligned reg + unaligned const => lqd, rotqbyi
4572 unaligned reg + aligned reg => lqx, rotqby
4573 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4574 unaligned reg + aligned const => lqd, rotqby
4575 unaligned reg + unaligned const -> not allowed by legitimate address
4576 */
4577 p0 = XEXP (addr, 0);
4578 p1 = XEXP (addr, 1);
9d98604b 4579 if (!reg_aligned_for_addr (p0))
644459d0 4580 {
9d98604b 4581 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4582 {
9d98604b 4583 rot = gen_reg_rtx (SImode);
4584 emit_insn (gen_addsi3 (rot, p0, p1));
4585 }
4586 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4587 {
4588 if (INTVAL (p1) > 0
4589 && REG_POINTER (p0)
4590 && INTVAL (p1) * BITS_PER_UNIT
4591 < REGNO_POINTER_ALIGN (REGNO (p0)))
4592 {
4593 rot = gen_reg_rtx (SImode);
4594 emit_insn (gen_addsi3 (rot, p0, p1));
4595 addr = p0;
4596 }
4597 else
4598 {
4599 rtx x = gen_reg_rtx (SImode);
4600 emit_move_insn (x, p1);
4601 if (!spu_arith_operand (p1, SImode))
4602 p1 = x;
4603 rot = gen_reg_rtx (SImode);
4604 emit_insn (gen_addsi3 (rot, p0, p1));
4605 addr = gen_rtx_PLUS (Pmode, p0, x);
4606 }
644459d0 4607 }
4608 else
4609 rot = p0;
4610 }
4611 else
4612 {
4613 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4614 {
4615 rot_amt = INTVAL (p1) & 15;
9d98604b 4616 if (INTVAL (p1) & -16)
4617 {
4618 p1 = GEN_INT (INTVAL (p1) & -16);
4619 addr = gen_rtx_PLUS (SImode, p0, p1);
4620 }
4621 else
4622 addr = p0;
644459d0 4623 }
9d98604b 4624 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4625 rot = p1;
4626 }
4627 }
9d98604b 4628 else if (REG_P (addr))
644459d0 4629 {
9d98604b 4630 if (!reg_aligned_for_addr (addr))
644459d0 4631 rot = addr;
4632 }
4633 else if (GET_CODE (addr) == CONST)
4634 {
4635 if (GET_CODE (XEXP (addr, 0)) == PLUS
4636 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4637 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4638 {
4639 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4640 if (rot_amt & -16)
4641 addr = gen_rtx_CONST (Pmode,
4642 gen_rtx_PLUS (Pmode,
4643 XEXP (XEXP (addr, 0), 0),
4644 GEN_INT (rot_amt & -16)));
4645 else
4646 addr = XEXP (XEXP (addr, 0), 0);
4647 }
4648 else
9d98604b 4649 {
4650 rot = gen_reg_rtx (Pmode);
4651 emit_move_insn (rot, addr);
4652 }
644459d0 4653 }
4654 else if (GET_CODE (addr) == CONST_INT)
4655 {
4656 rot_amt = INTVAL (addr);
4657 addr = GEN_INT (rot_amt & -16);
4658 }
4659 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4660 {
4661 rot = gen_reg_rtx (Pmode);
4662 emit_move_insn (rot, addr);
4663 }
644459d0 4664
9d98604b 4665 rot_amt += extra_rotby;
644459d0 4666
4667 rot_amt &= 15;
4668
4669 if (rot && rot_amt)
4670 {
9d98604b 4671 rtx x = gen_reg_rtx (SImode);
4672 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4673 rot = x;
644459d0 4674 rot_amt = 0;
4675 }
9d98604b 4676 if (!rot && rot_amt)
4677 rot = GEN_INT (rot_amt);
4678
4679 addr0 = copy_rtx (addr);
4680 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4681 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4682
4683 if (dst1)
4684 {
29c05e22 4685 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
9d98604b 4686 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4687 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4688 }
644459d0 4689
9d98604b 4690 return rot;
4691}
4692
4693int
4694spu_split_load (rtx * ops)
4695{
3754d046 4696 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4697 rtx addr, load, rot;
4698 int rot_amt;
644459d0 4699
9d98604b 4700 if (GET_MODE_SIZE (mode) >= 16)
4701 return 0;
644459d0 4702
9d98604b 4703 addr = XEXP (ops[1], 0);
4704 gcc_assert (GET_CODE (addr) != AND);
4705
4706 if (!address_needs_split (ops[1]))
4707 {
4708 ops[1] = change_address (ops[1], TImode, addr);
4709 load = gen_reg_rtx (TImode);
4710 emit_insn (gen__movti (load, ops[1]));
4711 spu_convert_move (ops[0], load);
4712 return 1;
4713 }
4714
4715 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4716
4717 load = gen_reg_rtx (TImode);
4718 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4719
4720 if (rot)
4721 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4722
9d98604b 4723 spu_convert_move (ops[0], load);
4724 return 1;
644459d0 4725}
4726
9d98604b 4727int
644459d0 4728spu_split_store (rtx * ops)
4729{
3754d046 4730 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4731 rtx reg;
644459d0 4732 rtx addr, p0, p1, p1_lo, smem;
4733 int aform;
4734 int scalar;
4735
9d98604b 4736 if (GET_MODE_SIZE (mode) >= 16)
4737 return 0;
4738
644459d0 4739 addr = XEXP (ops[0], 0);
9d98604b 4740 gcc_assert (GET_CODE (addr) != AND);
4741
4742 if (!address_needs_split (ops[0]))
4743 {
4744 reg = gen_reg_rtx (TImode);
4745 emit_insn (gen_spu_convert (reg, ops[1]));
4746 ops[0] = change_address (ops[0], TImode, addr);
4747 emit_move_insn (ops[0], reg);
4748 return 1;
4749 }
644459d0 4750
4751 if (GET_CODE (addr) == PLUS)
4752 {
4753 /* 8 cases:
4754 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4755 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4756 aligned reg + aligned const => lqd, c?d, shuf, stqx
4757 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4758 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4759 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4760 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4761 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4762 */
4763 aform = 0;
4764 p0 = XEXP (addr, 0);
4765 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4766 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4767 {
4768 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4769 if (reg_aligned_for_addr (p0))
4770 {
4771 p1 = GEN_INT (INTVAL (p1) & -16);
4772 if (p1 == const0_rtx)
4773 addr = p0;
4774 else
4775 addr = gen_rtx_PLUS (SImode, p0, p1);
4776 }
4777 else
4778 {
4779 rtx x = gen_reg_rtx (SImode);
4780 emit_move_insn (x, p1);
4781 addr = gen_rtx_PLUS (SImode, p0, x);
4782 }
644459d0 4783 }
4784 }
9d98604b 4785 else if (REG_P (addr))
644459d0 4786 {
4787 aform = 0;
4788 p0 = addr;
4789 p1 = p1_lo = const0_rtx;
4790 }
4791 else
4792 {
4793 aform = 1;
4794 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4795 p1 = 0; /* aform doesn't use p1 */
4796 p1_lo = addr;
4797 if (ALIGNED_SYMBOL_REF_P (addr))
4798 p1_lo = const0_rtx;
9d98604b 4799 else if (GET_CODE (addr) == CONST
4800 && GET_CODE (XEXP (addr, 0)) == PLUS
4801 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4802 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4803 {
9d98604b 4804 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4805 if ((v & -16) != 0)
4806 addr = gen_rtx_CONST (Pmode,
4807 gen_rtx_PLUS (Pmode,
4808 XEXP (XEXP (addr, 0), 0),
4809 GEN_INT (v & -16)));
4810 else
4811 addr = XEXP (XEXP (addr, 0), 0);
4812 p1_lo = GEN_INT (v & 15);
644459d0 4813 }
4814 else if (GET_CODE (addr) == CONST_INT)
4815 {
4816 p1_lo = GEN_INT (INTVAL (addr) & 15);
4817 addr = GEN_INT (INTVAL (addr) & -16);
4818 }
9d98604b 4819 else
4820 {
4821 p1_lo = gen_reg_rtx (SImode);
4822 emit_move_insn (p1_lo, addr);
4823 }
644459d0 4824 }
4825
4cbad5bb 4826 gcc_assert (aform == 0 || aform == 1);
9d98604b 4827 reg = gen_reg_rtx (TImode);
e04cf423 4828
644459d0 4829 scalar = store_with_one_insn_p (ops[0]);
4830 if (!scalar)
4831 {
4832 /* We could copy the flags from the ops[0] MEM to mem here,
4833 We don't because we want this load to be optimized away if
4834 possible, and copying the flags will prevent that in certain
4835 cases, e.g. consider the volatile flag. */
4836
9d98604b 4837 rtx pat = gen_reg_rtx (TImode);
e04cf423 4838 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4839 set_mem_alias_set (lmem, 0);
4840 emit_insn (gen_movti (reg, lmem));
644459d0 4841
9d98604b 4842 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4843 p0 = stack_pointer_rtx;
4844 if (!p1_lo)
4845 p1_lo = const0_rtx;
4846
4847 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4848 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4849 }
644459d0 4850 else
4851 {
4852 if (GET_CODE (ops[1]) == REG)
4853 emit_insn (gen_spu_convert (reg, ops[1]));
4854 else if (GET_CODE (ops[1]) == SUBREG)
4855 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4856 else
4857 abort ();
4858 }
4859
4860 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4861 emit_insn (gen_ashlti3
4862 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4863
9d98604b 4864 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4865 /* We can't use the previous alias set because the memory has changed
4866 size and can potentially overlap objects of other types. */
4867 set_mem_alias_set (smem, 0);
4868
e04cf423 4869 emit_insn (gen_movti (smem, reg));
9d98604b 4870 return 1;
644459d0 4871}
4872
4873/* Return TRUE if X is MEM which is a struct member reference
4874 and the member can safely be loaded and stored with a single
4875 instruction because it is padded. */
4876static int
4877mem_is_padded_component_ref (rtx x)
4878{
4879 tree t = MEM_EXPR (x);
4880 tree r;
4881 if (!t || TREE_CODE (t) != COMPONENT_REF)
4882 return 0;
4883 t = TREE_OPERAND (t, 1);
4884 if (!t || TREE_CODE (t) != FIELD_DECL
4885 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4886 return 0;
4887 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4888 r = DECL_FIELD_CONTEXT (t);
4889 if (!r || TREE_CODE (r) != RECORD_TYPE)
4890 return 0;
4891 /* Make sure they are the same mode */
4892 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4893 return 0;
4894 /* If there are no following fields then the field alignment assures
fa7637bd 4895 the structure is padded to the alignment which means this field is
4896 padded too. */
644459d0 4897 if (TREE_CHAIN (t) == 0)
4898 return 1;
4899 /* If the following field is also aligned then this field will be
4900 padded. */
4901 t = TREE_CHAIN (t);
4902 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4903 return 1;
4904 return 0;
4905}
4906
c7b91b14 4907/* Parse the -mfixed-range= option string. */
4908static void
4909fix_range (const char *const_str)
4910{
4911 int i, first, last;
4912 char *str, *dash, *comma;
4913
4914 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4915 REG2 are either register names or register numbers. The effect
4916 of this option is to mark the registers in the range from REG1 to
4917 REG2 as ``fixed'' so they won't be used by the compiler. */
4918
4919 i = strlen (const_str);
4920 str = (char *) alloca (i + 1);
4921 memcpy (str, const_str, i + 1);
4922
4923 while (1)
4924 {
4925 dash = strchr (str, '-');
4926 if (!dash)
4927 {
4928 warning (0, "value of -mfixed-range must have form REG1-REG2");
4929 return;
4930 }
4931 *dash = '\0';
4932 comma = strchr (dash + 1, ',');
4933 if (comma)
4934 *comma = '\0';
4935
4936 first = decode_reg_name (str);
4937 if (first < 0)
4938 {
4939 warning (0, "unknown register name: %s", str);
4940 return;
4941 }
4942
4943 last = decode_reg_name (dash + 1);
4944 if (last < 0)
4945 {
4946 warning (0, "unknown register name: %s", dash + 1);
4947 return;
4948 }
4949
4950 *dash = '-';
4951
4952 if (first > last)
4953 {
4954 warning (0, "%s-%s is an empty range", str, dash + 1);
4955 return;
4956 }
4957
4958 for (i = first; i <= last; ++i)
4959 fixed_regs[i] = call_used_regs[i] = 1;
4960
4961 if (!comma)
4962 break;
4963
4964 *comma = ',';
4965 str = comma + 1;
4966 }
4967}
4968
644459d0 4969/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4970 can be generated using the fsmbi instruction. */
4971int
4972fsmbi_const_p (rtx x)
4973{
dea01258 4974 if (CONSTANT_P (x))
4975 {
5df189be 4976 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4977 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4978 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4979 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4980 }
4981 return 0;
4982}
4983
4984/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4985 can be generated using the cbd, chd, cwd or cdd instruction. */
4986int
3754d046 4987cpat_const_p (rtx x, machine_mode mode)
dea01258 4988{
4989 if (CONSTANT_P (x))
4990 {
4991 enum immediate_class c = classify_immediate (x, mode);
4992 return c == IC_CPAT;
4993 }
4994 return 0;
4995}
644459d0 4996
dea01258 4997rtx
4998gen_cpat_const (rtx * ops)
4999{
5000 unsigned char dst[16];
5001 int i, offset, shift, isize;
5002 if (GET_CODE (ops[3]) != CONST_INT
5003 || GET_CODE (ops[2]) != CONST_INT
5004 || (GET_CODE (ops[1]) != CONST_INT
5005 && GET_CODE (ops[1]) != REG))
5006 return 0;
5007 if (GET_CODE (ops[1]) == REG
5008 && (!REG_POINTER (ops[1])
5009 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5010 return 0;
644459d0 5011
5012 for (i = 0; i < 16; i++)
dea01258 5013 dst[i] = i + 16;
5014 isize = INTVAL (ops[3]);
5015 if (isize == 1)
5016 shift = 3;
5017 else if (isize == 2)
5018 shift = 2;
5019 else
5020 shift = 0;
5021 offset = (INTVAL (ops[2]) +
5022 (GET_CODE (ops[1]) ==
5023 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5024 for (i = 0; i < isize; i++)
5025 dst[offset + i] = i + shift;
5026 return array_to_constant (TImode, dst);
644459d0 5027}
5028
5029/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5030 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5031 than 16 bytes, the value is repeated across the rest of the array. */
5032void
3754d046 5033constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
644459d0 5034{
5035 HOST_WIDE_INT val;
5036 int i, j, first;
5037
5038 memset (arr, 0, 16);
5039 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5040 if (GET_CODE (x) == CONST_INT
5041 || (GET_CODE (x) == CONST_DOUBLE
5042 && (mode == SFmode || mode == DFmode)))
5043 {
5044 gcc_assert (mode != VOIDmode && mode != BLKmode);
5045
5046 if (GET_CODE (x) == CONST_DOUBLE)
5047 val = const_double_to_hwint (x);
5048 else
5049 val = INTVAL (x);
5050 first = GET_MODE_SIZE (mode) - 1;
5051 for (i = first; i >= 0; i--)
5052 {
5053 arr[i] = val & 0xff;
5054 val >>= 8;
5055 }
5056 /* Splat the constant across the whole array. */
5057 for (j = 0, i = first + 1; i < 16; i++)
5058 {
5059 arr[i] = arr[j];
5060 j = (j == first) ? 0 : j + 1;
5061 }
5062 }
5063 else if (GET_CODE (x) == CONST_DOUBLE)
5064 {
5065 val = CONST_DOUBLE_LOW (x);
5066 for (i = 15; i >= 8; i--)
5067 {
5068 arr[i] = val & 0xff;
5069 val >>= 8;
5070 }
5071 val = CONST_DOUBLE_HIGH (x);
5072 for (i = 7; i >= 0; i--)
5073 {
5074 arr[i] = val & 0xff;
5075 val >>= 8;
5076 }
5077 }
5078 else if (GET_CODE (x) == CONST_VECTOR)
5079 {
5080 int units;
5081 rtx elt;
5082 mode = GET_MODE_INNER (mode);
5083 units = CONST_VECTOR_NUNITS (x);
5084 for (i = 0; i < units; i++)
5085 {
5086 elt = CONST_VECTOR_ELT (x, i);
5087 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5088 {
5089 if (GET_CODE (elt) == CONST_DOUBLE)
5090 val = const_double_to_hwint (elt);
5091 else
5092 val = INTVAL (elt);
5093 first = GET_MODE_SIZE (mode) - 1;
5094 if (first + i * GET_MODE_SIZE (mode) > 16)
5095 abort ();
5096 for (j = first; j >= 0; j--)
5097 {
5098 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5099 val >>= 8;
5100 }
5101 }
5102 }
5103 }
5104 else
5105 gcc_unreachable();
5106}
5107
5108/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5109 smaller than 16 bytes, use the bytes that would represent that value
5110 in a register, e.g., for QImode return the value of arr[3]. */
5111rtx
3754d046 5112array_to_constant (machine_mode mode, const unsigned char arr[16])
644459d0 5113{
3754d046 5114 machine_mode inner_mode;
644459d0 5115 rtvec v;
5116 int units, size, i, j, k;
5117 HOST_WIDE_INT val;
5118
5119 if (GET_MODE_CLASS (mode) == MODE_INT
5120 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5121 {
5122 j = GET_MODE_SIZE (mode);
5123 i = j < 4 ? 4 - j : 0;
5124 for (val = 0; i < j; i++)
5125 val = (val << 8) | arr[i];
5126 val = trunc_int_for_mode (val, mode);
5127 return GEN_INT (val);
5128 }
5129
5130 if (mode == TImode)
5131 {
5132 HOST_WIDE_INT high;
5133 for (i = high = 0; i < 8; i++)
5134 high = (high << 8) | arr[i];
5135 for (i = 8, val = 0; i < 16; i++)
5136 val = (val << 8) | arr[i];
5137 return immed_double_const (val, high, TImode);
5138 }
5139 if (mode == SFmode)
5140 {
5141 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5142 val = trunc_int_for_mode (val, SImode);
171b6d22 5143 return hwint_to_const_double (SFmode, val);
644459d0 5144 }
5145 if (mode == DFmode)
5146 {
1f915911 5147 for (i = 0, val = 0; i < 8; i++)
5148 val = (val << 8) | arr[i];
171b6d22 5149 return hwint_to_const_double (DFmode, val);
644459d0 5150 }
5151
5152 if (!VECTOR_MODE_P (mode))
5153 abort ();
5154
5155 units = GET_MODE_NUNITS (mode);
5156 size = GET_MODE_UNIT_SIZE (mode);
5157 inner_mode = GET_MODE_INNER (mode);
5158 v = rtvec_alloc (units);
5159
5160 for (k = i = 0; i < units; ++i)
5161 {
5162 val = 0;
5163 for (j = 0; j < size; j++, k++)
5164 val = (val << 8) | arr[k];
5165
5166 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5167 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5168 else
5169 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5170 }
5171 if (k > 16)
5172 abort ();
5173
5174 return gen_rtx_CONST_VECTOR (mode, v);
5175}
5176
5177static void
5178reloc_diagnostic (rtx x)
5179{
712d2297 5180 tree decl = 0;
644459d0 5181 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5182 return;
5183
5184 if (GET_CODE (x) == SYMBOL_REF)
5185 decl = SYMBOL_REF_DECL (x);
5186 else if (GET_CODE (x) == CONST
5187 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5188 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5189
5190 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5191 if (decl && !DECL_P (decl))
5192 decl = 0;
5193
644459d0 5194 /* The decl could be a string constant. */
5195 if (decl && DECL_P (decl))
712d2297 5196 {
5197 location_t loc;
5198 /* We use last_assemble_variable_decl to get line information. It's
5199 not always going to be right and might not even be close, but will
5200 be right for the more common cases. */
5201 if (!last_assemble_variable_decl || in_section == ctors_section)
5202 loc = DECL_SOURCE_LOCATION (decl);
5203 else
5204 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5205
712d2297 5206 if (TARGET_WARN_RELOC)
5207 warning_at (loc, 0,
5208 "creating run-time relocation for %qD", decl);
5209 else
5210 error_at (loc,
5211 "creating run-time relocation for %qD", decl);
5212 }
5213 else
5214 {
5215 if (TARGET_WARN_RELOC)
5216 warning_at (input_location, 0, "creating run-time relocation");
5217 else
5218 error_at (input_location, "creating run-time relocation");
5219 }
644459d0 5220}
5221
5222/* Hook into assemble_integer so we can generate an error for run-time
5223 relocations. The SPU ABI disallows them. */
5224static bool
5225spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5226{
5227 /* By default run-time relocations aren't supported, but we allow them
5228 in case users support it in their own run-time loader. And we provide
5229 a warning for those users that don't. */
5230 if ((GET_CODE (x) == SYMBOL_REF)
5231 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5232 reloc_diagnostic (x);
5233
5234 return default_assemble_integer (x, size, aligned_p);
5235}
5236
5237static void
5238spu_asm_globalize_label (FILE * file, const char *name)
5239{
5240 fputs ("\t.global\t", file);
5241 assemble_name (file, name);
5242 fputs ("\n", file);
5243}
5244
5245static bool
20d892d1 5246spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5247 int opno ATTRIBUTE_UNUSED, int *total,
f529eb25 5248 bool speed ATTRIBUTE_UNUSED)
644459d0 5249{
3754d046 5250 machine_mode mode = GET_MODE (x);
644459d0 5251 int cost = COSTS_N_INSNS (2);
5252
5253 /* Folding to a CONST_VECTOR will use extra space but there might
5254 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5255 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5256 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5257 because this cost will only be compared against a single insn.
5258 if (code == CONST_VECTOR)
ca316360 5259 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5260 */
5261
5262 /* Use defaults for float operations. Not accurate but good enough. */
5263 if (mode == DFmode)
5264 {
5265 *total = COSTS_N_INSNS (13);
5266 return true;
5267 }
5268 if (mode == SFmode)
5269 {
5270 *total = COSTS_N_INSNS (6);
5271 return true;
5272 }
5273 switch (code)
5274 {
5275 case CONST_INT:
5276 if (satisfies_constraint_K (x))
5277 *total = 0;
5278 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5279 *total = COSTS_N_INSNS (1);
5280 else
5281 *total = COSTS_N_INSNS (3);
5282 return true;
5283
5284 case CONST:
5285 *total = COSTS_N_INSNS (3);
5286 return true;
5287
5288 case LABEL_REF:
5289 case SYMBOL_REF:
5290 *total = COSTS_N_INSNS (0);
5291 return true;
5292
5293 case CONST_DOUBLE:
5294 *total = COSTS_N_INSNS (5);
5295 return true;
5296
5297 case FLOAT_EXTEND:
5298 case FLOAT_TRUNCATE:
5299 case FLOAT:
5300 case UNSIGNED_FLOAT:
5301 case FIX:
5302 case UNSIGNED_FIX:
5303 *total = COSTS_N_INSNS (7);
5304 return true;
5305
5306 case PLUS:
5307 if (mode == TImode)
5308 {
5309 *total = COSTS_N_INSNS (9);
5310 return true;
5311 }
5312 break;
5313
5314 case MULT:
5315 cost =
5316 GET_CODE (XEXP (x, 0)) ==
5317 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5318 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5319 {
5320 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5321 {
5322 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5323 cost = COSTS_N_INSNS (14);
5324 if ((val & 0xffff) == 0)
5325 cost = COSTS_N_INSNS (9);
5326 else if (val > 0 && val < 0x10000)
5327 cost = COSTS_N_INSNS (11);
5328 }
5329 }
5330 *total = cost;
5331 return true;
5332 case DIV:
5333 case UDIV:
5334 case MOD:
5335 case UMOD:
5336 *total = COSTS_N_INSNS (20);
5337 return true;
5338 case ROTATE:
5339 case ROTATERT:
5340 case ASHIFT:
5341 case ASHIFTRT:
5342 case LSHIFTRT:
5343 *total = COSTS_N_INSNS (4);
5344 return true;
5345 case UNSPEC:
5346 if (XINT (x, 1) == UNSPEC_CONVERT)
5347 *total = COSTS_N_INSNS (0);
5348 else
5349 *total = COSTS_N_INSNS (4);
5350 return true;
5351 }
5352 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5353 if (GET_MODE_CLASS (mode) == MODE_INT
5354 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5355 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5356 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5357 *total = cost;
5358 return true;
5359}
5360
3754d046 5361static machine_mode
1bd43494 5362spu_unwind_word_mode (void)
644459d0 5363{
1bd43494 5364 return SImode;
644459d0 5365}
5366
5367/* Decide whether we can make a sibling call to a function. DECL is the
5368 declaration of the function being targeted by the call and EXP is the
5369 CALL_EXPR representing the call. */
5370static bool
5371spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5372{
5373 return decl && !TARGET_LARGE_MEM;
5374}
5375
5376/* We need to correctly update the back chain pointer and the Available
5377 Stack Size (which is in the second slot of the sp register.) */
5378void
5379spu_allocate_stack (rtx op0, rtx op1)
5380{
5381 HOST_WIDE_INT v;
5382 rtx chain = gen_reg_rtx (V4SImode);
5383 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5384 rtx sp = gen_reg_rtx (V4SImode);
5385 rtx splatted = gen_reg_rtx (V4SImode);
5386 rtx pat = gen_reg_rtx (TImode);
5387
5388 /* copy the back chain so we can save it back again. */
5389 emit_move_insn (chain, stack_bot);
5390
5391 op1 = force_reg (SImode, op1);
5392
5393 v = 0x1020300010203ll;
5394 emit_move_insn (pat, immed_double_const (v, v, TImode));
5395 emit_insn (gen_shufb (splatted, op1, op1, pat));
5396
5397 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5398 emit_insn (gen_subv4si3 (sp, sp, splatted));
5399
5400 if (flag_stack_check)
5401 {
5402 rtx avail = gen_reg_rtx(SImode);
5403 rtx result = gen_reg_rtx(SImode);
5404 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5405 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5406 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5407 }
5408
5409 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5410
5411 emit_move_insn (stack_bot, chain);
5412
5413 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5414}
5415
5416void
5417spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5418{
5419 static unsigned char arr[16] =
5420 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5421 rtx temp = gen_reg_rtx (SImode);
5422 rtx temp2 = gen_reg_rtx (SImode);
5423 rtx temp3 = gen_reg_rtx (V4SImode);
5424 rtx temp4 = gen_reg_rtx (V4SImode);
5425 rtx pat = gen_reg_rtx (TImode);
5426 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5427
5428 /* Restore the backchain from the first word, sp from the second. */
5429 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5430 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5431
5432 emit_move_insn (pat, array_to_constant (TImode, arr));
5433
5434 /* Compute Available Stack Size for sp */
5435 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5436 emit_insn (gen_shufb (temp3, temp, temp, pat));
5437
5438 /* Compute Available Stack Size for back chain */
5439 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5440 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5441 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5442
5443 emit_insn (gen_addv4si3 (sp, sp, temp3));
5444 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5445}
5446
5447static void
5448spu_init_libfuncs (void)
5449{
5450 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5451 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5452 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5453 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5454 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5455 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5456 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5457 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5458 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4d3aeb29 5459 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
644459d0 5460 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5461 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5462
5463 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5464 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5465
5825ec3f 5466 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5467 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5468 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5469 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5470 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5471 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5472 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5473 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5474 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5475 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5476 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5477 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5478
19a53068 5479 set_optab_libfunc (smul_optab, TImode, "__multi3");
5480 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5481 set_optab_libfunc (smod_optab, TImode, "__modti3");
5482 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5483 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5484 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5485}
5486
5487/* Make a subreg, stripping any existing subreg. We could possibly just
5488 call simplify_subreg, but in this case we know what we want. */
5489rtx
3754d046 5490spu_gen_subreg (machine_mode mode, rtx x)
644459d0 5491{
5492 if (GET_CODE (x) == SUBREG)
5493 x = SUBREG_REG (x);
5494 if (GET_MODE (x) == mode)
5495 return x;
5496 return gen_rtx_SUBREG (mode, x, 0);
5497}
5498
5499static bool
fb80456a 5500spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5501{
5502 return (TYPE_MODE (type) == BLKmode
5503 && ((type) == 0
5504 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5505 || int_size_in_bytes (type) >
5506 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5507}
5508\f
5509/* Create the built-in types and functions */
5510
c2233b46 5511enum spu_function_code
5512{
5513#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5514#include "spu-builtins.def"
5515#undef DEF_BUILTIN
5516 NUM_SPU_BUILTINS
5517};
5518
5519extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5520
644459d0 5521struct spu_builtin_description spu_builtins[] = {
5522#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5523 {fcode, icode, name, type, params},
644459d0 5524#include "spu-builtins.def"
5525#undef DEF_BUILTIN
5526};
5527
0c5c4d59 5528static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5529
5530/* Returns the spu builtin decl for CODE. */
e6925042 5531
5532static tree
5533spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5534{
5535 if (code >= NUM_SPU_BUILTINS)
5536 return error_mark_node;
5537
0c5c4d59 5538 return spu_builtin_decls[code];
e6925042 5539}
5540
5541
644459d0 5542static void
5543spu_init_builtins (void)
5544{
5545 struct spu_builtin_description *d;
5546 unsigned int i;
5547
5548 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5549 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5550 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5551 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5552 V4SF_type_node = build_vector_type (float_type_node, 4);
5553 V2DF_type_node = build_vector_type (double_type_node, 2);
5554
5555 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5556 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5557 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5558 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5559
c4ecce0c 5560 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5561
5562 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5564 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5566 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5567 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5568 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5569 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5570 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5571 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5572 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5573 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5574
5575 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5576 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5577 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5578 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5579 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5580 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5581 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5582 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5583
5584 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5585 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5586
5587 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5588
5589 spu_builtin_types[SPU_BTI_PTR] =
5590 build_pointer_type (build_qualified_type
5591 (void_type_node,
5592 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5593
5594 /* For each builtin we build a new prototype. The tree code will make
5595 sure nodes are shared. */
5596 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5597 {
5598 tree p;
5599 char name[64]; /* build_function will make a copy. */
5600 int parm;
5601
5602 if (d->name == 0)
5603 continue;
5604
5dfbd18f 5605 /* Find last parm. */
644459d0 5606 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5607 ;
644459d0 5608
5609 p = void_list_node;
5610 while (parm > 1)
5611 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5612
5613 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5614
5615 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5616 spu_builtin_decls[i] =
3726fe5e 5617 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5618 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5619 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5620
5621 /* These builtins don't throw. */
0c5c4d59 5622 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5623 }
5624}
5625
cf31d486 5626void
5627spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5628{
5629 static unsigned char arr[16] =
5630 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5631
5632 rtx temp = gen_reg_rtx (Pmode);
5633 rtx temp2 = gen_reg_rtx (V4SImode);
5634 rtx temp3 = gen_reg_rtx (V4SImode);
5635 rtx pat = gen_reg_rtx (TImode);
5636 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5637
5638 emit_move_insn (pat, array_to_constant (TImode, arr));
5639
5640 /* Restore the sp. */
5641 emit_move_insn (temp, op1);
5642 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5643
5644 /* Compute available stack size for sp. */
5645 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5646 emit_insn (gen_shufb (temp3, temp, temp, pat));
5647
5648 emit_insn (gen_addv4si3 (sp, sp, temp3));
5649 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5650}
5651
644459d0 5652int
5653spu_safe_dma (HOST_WIDE_INT channel)
5654{
006e4b96 5655 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5656}
5657
5658void
5659spu_builtin_splats (rtx ops[])
5660{
3754d046 5661 machine_mode mode = GET_MODE (ops[0]);
644459d0 5662 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5663 {
5664 unsigned char arr[16];
5665 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5666 emit_move_insn (ops[0], array_to_constant (mode, arr));
5667 }
644459d0 5668 else
5669 {
5670 rtx reg = gen_reg_rtx (TImode);
5671 rtx shuf;
5672 if (GET_CODE (ops[1]) != REG
5673 && GET_CODE (ops[1]) != SUBREG)
5674 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5675 switch (mode)
5676 {
5677 case V2DImode:
5678 case V2DFmode:
5679 shuf =
5680 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5681 TImode);
5682 break;
5683 case V4SImode:
5684 case V4SFmode:
5685 shuf =
5686 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5687 TImode);
5688 break;
5689 case V8HImode:
5690 shuf =
5691 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5692 TImode);
5693 break;
5694 case V16QImode:
5695 shuf =
5696 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5697 TImode);
5698 break;
5699 default:
5700 abort ();
5701 }
5702 emit_move_insn (reg, shuf);
5703 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5704 }
5705}
5706
5707void
5708spu_builtin_extract (rtx ops[])
5709{
3754d046 5710 machine_mode mode;
644459d0 5711 rtx rot, from, tmp;
5712
5713 mode = GET_MODE (ops[1]);
5714
5715 if (GET_CODE (ops[2]) == CONST_INT)
5716 {
5717 switch (mode)
5718 {
5719 case V16QImode:
5720 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5721 break;
5722 case V8HImode:
5723 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5724 break;
5725 case V4SFmode:
5726 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5727 break;
5728 case V4SImode:
5729 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5730 break;
5731 case V2DImode:
5732 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5733 break;
5734 case V2DFmode:
5735 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5736 break;
5737 default:
5738 abort ();
5739 }
5740 return;
5741 }
5742
5743 from = spu_gen_subreg (TImode, ops[1]);
5744 rot = gen_reg_rtx (TImode);
5745 tmp = gen_reg_rtx (SImode);
5746
5747 switch (mode)
5748 {
5749 case V16QImode:
5750 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5751 break;
5752 case V8HImode:
5753 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5754 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5755 break;
5756 case V4SFmode:
5757 case V4SImode:
5758 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5759 break;
5760 case V2DImode:
5761 case V2DFmode:
5762 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5763 break;
5764 default:
5765 abort ();
5766 }
5767 emit_insn (gen_rotqby_ti (rot, from, tmp));
5768
5769 emit_insn (gen_spu_convert (ops[0], rot));
5770}
5771
5772void
5773spu_builtin_insert (rtx ops[])
5774{
3754d046 5775 machine_mode mode = GET_MODE (ops[0]);
5776 machine_mode imode = GET_MODE_INNER (mode);
644459d0 5777 rtx mask = gen_reg_rtx (TImode);
5778 rtx offset;
5779
5780 if (GET_CODE (ops[3]) == CONST_INT)
5781 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5782 else
5783 {
5784 offset = gen_reg_rtx (SImode);
5785 emit_insn (gen_mulsi3
5786 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5787 }
5788 emit_insn (gen_cpat
5789 (mask, stack_pointer_rtx, offset,
5790 GEN_INT (GET_MODE_SIZE (imode))));
5791 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5792}
5793
5794void
5795spu_builtin_promote (rtx ops[])
5796{
3754d046 5797 machine_mode mode, imode;
644459d0 5798 rtx rot, from, offset;
5799 HOST_WIDE_INT pos;
5800
5801 mode = GET_MODE (ops[0]);
5802 imode = GET_MODE_INNER (mode);
5803
5804 from = gen_reg_rtx (TImode);
5805 rot = spu_gen_subreg (TImode, ops[0]);
5806
5807 emit_insn (gen_spu_convert (from, ops[1]));
5808
5809 if (GET_CODE (ops[2]) == CONST_INT)
5810 {
5811 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5812 if (GET_MODE_SIZE (imode) < 4)
5813 pos += 4 - GET_MODE_SIZE (imode);
5814 offset = GEN_INT (pos & 15);
5815 }
5816 else
5817 {
5818 offset = gen_reg_rtx (SImode);
5819 switch (mode)
5820 {
5821 case V16QImode:
5822 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5823 break;
5824 case V8HImode:
5825 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5826 emit_insn (gen_addsi3 (offset, offset, offset));
5827 break;
5828 case V4SFmode:
5829 case V4SImode:
5830 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5831 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5832 break;
5833 case V2DImode:
5834 case V2DFmode:
5835 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5836 break;
5837 default:
5838 abort ();
5839 }
5840 }
5841 emit_insn (gen_rotqby_ti (rot, from, offset));
5842}
5843
e96f2783 5844static void
5845spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5846{
e96f2783 5847 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5848 rtx shuf = gen_reg_rtx (V4SImode);
5849 rtx insn = gen_reg_rtx (V4SImode);
5850 rtx shufc;
5851 rtx insnc;
5852 rtx mem;
5853
5854 fnaddr = force_reg (SImode, fnaddr);
5855 cxt = force_reg (SImode, cxt);
5856
5857 if (TARGET_LARGE_MEM)
5858 {
5859 rtx rotl = gen_reg_rtx (V4SImode);
5860 rtx mask = gen_reg_rtx (V4SImode);
5861 rtx bi = gen_reg_rtx (SImode);
e96f2783 5862 static unsigned char const shufa[16] = {
644459d0 5863 2, 3, 0, 1, 18, 19, 16, 17,
5864 0, 1, 2, 3, 16, 17, 18, 19
5865 };
e96f2783 5866 static unsigned char const insna[16] = {
644459d0 5867 0x41, 0, 0, 79,
5868 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5869 0x60, 0x80, 0, 79,
5870 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5871 };
5872
5873 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5874 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5875
5876 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5877 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5878 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5879 emit_insn (gen_selb (insn, insnc, rotl, mask));
5880
e96f2783 5881 mem = adjust_address (m_tramp, V4SImode, 0);
5882 emit_move_insn (mem, insn);
644459d0 5883
5884 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5885 mem = adjust_address (m_tramp, Pmode, 16);
5886 emit_move_insn (mem, bi);
644459d0 5887 }
5888 else
5889 {
5890 rtx scxt = gen_reg_rtx (SImode);
5891 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5892 static unsigned char const insna[16] = {
644459d0 5893 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5894 0x30, 0, 0, 0,
5895 0, 0, 0, 0,
5896 0, 0, 0, 0
5897 };
5898
5899 shufc = gen_reg_rtx (TImode);
5900 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5901
5902 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5903 fits 18 bits and the last 4 are zeros. This will be true if
5904 the stack pointer is initialized to 0x3fff0 at program start,
5905 otherwise the ila instruction will be garbage. */
5906
5907 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5908 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5909 emit_insn (gen_cpat
5910 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5911 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5912 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5913
e96f2783 5914 mem = adjust_address (m_tramp, V4SImode, 0);
5915 emit_move_insn (mem, insn);
644459d0 5916 }
5917 emit_insn (gen_sync ());
5918}
5919
08c6cbd2 5920static bool
5921spu_warn_func_return (tree decl)
5922{
5923 /* Naked functions are implemented entirely in assembly, including the
5924 return sequence, so suppress warnings about this. */
5925 return !spu_naked_function_p (decl);
5926}
5927
644459d0 5928void
5929spu_expand_sign_extend (rtx ops[])
5930{
5931 unsigned char arr[16];
5932 rtx pat = gen_reg_rtx (TImode);
5933 rtx sign, c;
5934 int i, last;
5935 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5936 if (GET_MODE (ops[1]) == QImode)
5937 {
5938 sign = gen_reg_rtx (HImode);
5939 emit_insn (gen_extendqihi2 (sign, ops[1]));
5940 for (i = 0; i < 16; i++)
5941 arr[i] = 0x12;
5942 arr[last] = 0x13;
5943 }
5944 else
5945 {
5946 for (i = 0; i < 16; i++)
5947 arr[i] = 0x10;
5948 switch (GET_MODE (ops[1]))
5949 {
5950 case HImode:
5951 sign = gen_reg_rtx (SImode);
5952 emit_insn (gen_extendhisi2 (sign, ops[1]));
5953 arr[last] = 0x03;
5954 arr[last - 1] = 0x02;
5955 break;
5956 case SImode:
5957 sign = gen_reg_rtx (SImode);
5958 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5959 for (i = 0; i < 4; i++)
5960 arr[last - i] = 3 - i;
5961 break;
5962 case DImode:
5963 sign = gen_reg_rtx (SImode);
5964 c = gen_reg_rtx (SImode);
5965 emit_insn (gen_spu_convert (c, ops[1]));
5966 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5967 for (i = 0; i < 8; i++)
5968 arr[last - i] = 7 - i;
5969 break;
5970 default:
5971 abort ();
5972 }
5973 }
5974 emit_move_insn (pat, array_to_constant (TImode, arr));
5975 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5976}
5977
5978/* expand vector initialization. If there are any constant parts,
5979 load constant parts first. Then load any non-constant parts. */
5980void
5981spu_expand_vector_init (rtx target, rtx vals)
5982{
3754d046 5983 machine_mode mode = GET_MODE (target);
644459d0 5984 int n_elts = GET_MODE_NUNITS (mode);
5985 int n_var = 0;
5986 bool all_same = true;
790c536c 5987 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5988 int i;
5989
5990 first = XVECEXP (vals, 0, 0);
5991 for (i = 0; i < n_elts; ++i)
5992 {
5993 x = XVECEXP (vals, 0, i);
e442af0b 5994 if (!(CONST_INT_P (x)
5995 || GET_CODE (x) == CONST_DOUBLE
5996 || GET_CODE (x) == CONST_FIXED))
644459d0 5997 ++n_var;
5998 else
5999 {
6000 if (first_constant == NULL_RTX)
6001 first_constant = x;
6002 }
6003 if (i > 0 && !rtx_equal_p (x, first))
6004 all_same = false;
6005 }
6006
6007 /* if all elements are the same, use splats to repeat elements */
6008 if (all_same)
6009 {
6010 if (!CONSTANT_P (first)
6011 && !register_operand (first, GET_MODE (x)))
6012 first = force_reg (GET_MODE (first), first);
6013 emit_insn (gen_spu_splats (target, first));
6014 return;
6015 }
6016
6017 /* load constant parts */
6018 if (n_var != n_elts)
6019 {
6020 if (n_var == 0)
6021 {
6022 emit_move_insn (target,
6023 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6024 }
6025 else
6026 {
6027 rtx constant_parts_rtx = copy_rtx (vals);
6028
6029 gcc_assert (first_constant != NULL_RTX);
6030 /* fill empty slots with the first constant, this increases
6031 our chance of using splats in the recursive call below. */
6032 for (i = 0; i < n_elts; ++i)
e442af0b 6033 {
6034 x = XVECEXP (constant_parts_rtx, 0, i);
6035 if (!(CONST_INT_P (x)
6036 || GET_CODE (x) == CONST_DOUBLE
6037 || GET_CODE (x) == CONST_FIXED))
6038 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6039 }
644459d0 6040
6041 spu_expand_vector_init (target, constant_parts_rtx);
6042 }
6043 }
6044
6045 /* load variable parts */
6046 if (n_var != 0)
6047 {
6048 rtx insert_operands[4];
6049
6050 insert_operands[0] = target;
6051 insert_operands[2] = target;
6052 for (i = 0; i < n_elts; ++i)
6053 {
6054 x = XVECEXP (vals, 0, i);
e442af0b 6055 if (!(CONST_INT_P (x)
6056 || GET_CODE (x) == CONST_DOUBLE
6057 || GET_CODE (x) == CONST_FIXED))
644459d0 6058 {
6059 if (!register_operand (x, GET_MODE (x)))
6060 x = force_reg (GET_MODE (x), x);
6061 insert_operands[1] = x;
6062 insert_operands[3] = GEN_INT (i);
6063 spu_builtin_insert (insert_operands);
6064 }
6065 }
6066 }
6067}
6352eedf 6068
5474166e 6069/* Return insn index for the vector compare instruction for given CODE,
6070 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6071
6072static int
6073get_vec_cmp_insn (enum rtx_code code,
3754d046 6074 machine_mode dest_mode,
6075 machine_mode op_mode)
5474166e 6076
6077{
6078 switch (code)
6079 {
6080 case EQ:
6081 if (dest_mode == V16QImode && op_mode == V16QImode)
6082 return CODE_FOR_ceq_v16qi;
6083 if (dest_mode == V8HImode && op_mode == V8HImode)
6084 return CODE_FOR_ceq_v8hi;
6085 if (dest_mode == V4SImode && op_mode == V4SImode)
6086 return CODE_FOR_ceq_v4si;
6087 if (dest_mode == V4SImode && op_mode == V4SFmode)
6088 return CODE_FOR_ceq_v4sf;
6089 if (dest_mode == V2DImode && op_mode == V2DFmode)
6090 return CODE_FOR_ceq_v2df;
6091 break;
6092 case GT:
6093 if (dest_mode == V16QImode && op_mode == V16QImode)
6094 return CODE_FOR_cgt_v16qi;
6095 if (dest_mode == V8HImode && op_mode == V8HImode)
6096 return CODE_FOR_cgt_v8hi;
6097 if (dest_mode == V4SImode && op_mode == V4SImode)
6098 return CODE_FOR_cgt_v4si;
6099 if (dest_mode == V4SImode && op_mode == V4SFmode)
6100 return CODE_FOR_cgt_v4sf;
6101 if (dest_mode == V2DImode && op_mode == V2DFmode)
6102 return CODE_FOR_cgt_v2df;
6103 break;
6104 case GTU:
6105 if (dest_mode == V16QImode && op_mode == V16QImode)
6106 return CODE_FOR_clgt_v16qi;
6107 if (dest_mode == V8HImode && op_mode == V8HImode)
6108 return CODE_FOR_clgt_v8hi;
6109 if (dest_mode == V4SImode && op_mode == V4SImode)
6110 return CODE_FOR_clgt_v4si;
6111 break;
6112 default:
6113 break;
6114 }
6115 return -1;
6116}
6117
6118/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6119 DMODE is expected destination mode. This is a recursive function. */
6120
6121static rtx
6122spu_emit_vector_compare (enum rtx_code rcode,
6123 rtx op0, rtx op1,
3754d046 6124 machine_mode dmode)
5474166e 6125{
6126 int vec_cmp_insn;
6127 rtx mask;
3754d046 6128 machine_mode dest_mode;
6129 machine_mode op_mode = GET_MODE (op1);
5474166e 6130
6131 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6132
6133 /* Floating point vector compare instructions uses destination V4SImode.
6134 Double floating point vector compare instructions uses destination V2DImode.
6135 Move destination to appropriate mode later. */
6136 if (dmode == V4SFmode)
6137 dest_mode = V4SImode;
6138 else if (dmode == V2DFmode)
6139 dest_mode = V2DImode;
6140 else
6141 dest_mode = dmode;
6142
6143 mask = gen_reg_rtx (dest_mode);
6144 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6145
6146 if (vec_cmp_insn == -1)
6147 {
6148 bool swap_operands = false;
6149 bool try_again = false;
6150 switch (rcode)
6151 {
6152 case LT:
6153 rcode = GT;
6154 swap_operands = true;
6155 try_again = true;
6156 break;
6157 case LTU:
6158 rcode = GTU;
6159 swap_operands = true;
6160 try_again = true;
6161 break;
6162 case NE:
e20943d4 6163 case UNEQ:
6164 case UNLE:
6165 case UNLT:
6166 case UNGE:
6167 case UNGT:
6168 case UNORDERED:
5474166e 6169 /* Treat A != B as ~(A==B). */
6170 {
e20943d4 6171 enum rtx_code rev_code;
5474166e 6172 enum insn_code nor_code;
e20943d4 6173 rtx rev_mask;
6174
6175 rev_code = reverse_condition_maybe_unordered (rcode);
6176 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6177
d6bf3b14 6178 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6179 gcc_assert (nor_code != CODE_FOR_nothing);
e20943d4 6180 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
5474166e 6181 if (dmode != dest_mode)
6182 {
6183 rtx temp = gen_reg_rtx (dest_mode);
6184 convert_move (temp, mask, 0);
6185 return temp;
6186 }
6187 return mask;
6188 }
6189 break;
6190 case GE:
6191 case GEU:
6192 case LE:
6193 case LEU:
6194 /* Try GT/GTU/LT/LTU OR EQ */
6195 {
6196 rtx c_rtx, eq_rtx;
6197 enum insn_code ior_code;
6198 enum rtx_code new_code;
6199
6200 switch (rcode)
6201 {
6202 case GE: new_code = GT; break;
6203 case GEU: new_code = GTU; break;
6204 case LE: new_code = LT; break;
6205 case LEU: new_code = LTU; break;
6206 default:
6207 gcc_unreachable ();
6208 }
6209
6210 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6211 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6212
d6bf3b14 6213 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6214 gcc_assert (ior_code != CODE_FOR_nothing);
6215 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6216 if (dmode != dest_mode)
6217 {
6218 rtx temp = gen_reg_rtx (dest_mode);
6219 convert_move (temp, mask, 0);
6220 return temp;
6221 }
6222 return mask;
6223 }
6224 break;
e20943d4 6225 case LTGT:
6226 /* Try LT OR GT */
6227 {
6228 rtx lt_rtx, gt_rtx;
6229 enum insn_code ior_code;
6230
6231 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6232 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6233
6234 ior_code = optab_handler (ior_optab, dest_mode);
6235 gcc_assert (ior_code != CODE_FOR_nothing);
6236 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6237 if (dmode != dest_mode)
6238 {
6239 rtx temp = gen_reg_rtx (dest_mode);
6240 convert_move (temp, mask, 0);
6241 return temp;
6242 }
6243 return mask;
6244 }
6245 break;
6246 case ORDERED:
6247 /* Implement as (A==A) & (B==B) */
6248 {
6249 rtx a_rtx, b_rtx;
6250 enum insn_code and_code;
6251
6252 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6253 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6254
6255 and_code = optab_handler (and_optab, dest_mode);
6256 gcc_assert (and_code != CODE_FOR_nothing);
6257 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6258 if (dmode != dest_mode)
6259 {
6260 rtx temp = gen_reg_rtx (dest_mode);
6261 convert_move (temp, mask, 0);
6262 return temp;
6263 }
6264 return mask;
6265 }
6266 break;
5474166e 6267 default:
6268 gcc_unreachable ();
6269 }
6270
6271 /* You only get two chances. */
6272 if (try_again)
6273 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6274
6275 gcc_assert (vec_cmp_insn != -1);
6276
6277 if (swap_operands)
6278 {
6279 rtx tmp;
6280 tmp = op0;
6281 op0 = op1;
6282 op1 = tmp;
6283 }
6284 }
6285
6286 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6287 if (dmode != dest_mode)
6288 {
6289 rtx temp = gen_reg_rtx (dest_mode);
6290 convert_move (temp, mask, 0);
6291 return temp;
6292 }
6293 return mask;
6294}
6295
6296
6297/* Emit vector conditional expression.
6298 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6299 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6300
6301int
6302spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6303 rtx cond, rtx cc_op0, rtx cc_op1)
6304{
3754d046 6305 machine_mode dest_mode = GET_MODE (dest);
5474166e 6306 enum rtx_code rcode = GET_CODE (cond);
6307 rtx mask;
6308
6309 /* Get the vector mask for the given relational operations. */
6310 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6311
6312 emit_insn(gen_selb (dest, op2, op1, mask));
6313
6314 return 1;
6315}
6316
6352eedf 6317static rtx
3754d046 6318spu_force_reg (machine_mode mode, rtx op)
6352eedf 6319{
6320 rtx x, r;
6321 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6322 {
6323 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6324 || GET_MODE (op) == BLKmode)
6325 return force_reg (mode, convert_to_mode (mode, op, 0));
6326 abort ();
6327 }
6328
6329 r = force_reg (GET_MODE (op), op);
6330 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6331 {
6332 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6333 if (x)
6334 return x;
6335 }
6336
6337 x = gen_reg_rtx (mode);
6338 emit_insn (gen_spu_convert (x, r));
6339 return x;
6340}
6341
6342static void
6343spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6344{
6345 HOST_WIDE_INT v = 0;
6346 int lsbits;
6347 /* Check the range of immediate operands. */
6348 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6349 {
6350 int range = p - SPU_BTI_7;
5df189be 6351
6352 if (!CONSTANT_P (op))
bf776685 6353 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6354 d->name,
6355 spu_builtin_range[range].low, spu_builtin_range[range].high);
6356
6357 if (GET_CODE (op) == CONST
6358 && (GET_CODE (XEXP (op, 0)) == PLUS
6359 || GET_CODE (XEXP (op, 0)) == MINUS))
6360 {
6361 v = INTVAL (XEXP (XEXP (op, 0), 1));
6362 op = XEXP (XEXP (op, 0), 0);
6363 }
6364 else if (GET_CODE (op) == CONST_INT)
6365 v = INTVAL (op);
5df189be 6366 else if (GET_CODE (op) == CONST_VECTOR
6367 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6368 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6369
6370 /* The default for v is 0 which is valid in every range. */
6371 if (v < spu_builtin_range[range].low
6372 || v > spu_builtin_range[range].high)
bf776685 6373 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6374 d->name,
6375 spu_builtin_range[range].low, spu_builtin_range[range].high,
6376 v);
6352eedf 6377
6378 switch (p)
6379 {
6380 case SPU_BTI_S10_4:
6381 lsbits = 4;
6382 break;
6383 case SPU_BTI_U16_2:
6384 /* This is only used in lqa, and stqa. Even though the insns
6385 encode 16 bits of the address (all but the 2 least
6386 significant), only 14 bits are used because it is masked to
6387 be 16 byte aligned. */
6388 lsbits = 4;
6389 break;
6390 case SPU_BTI_S16_2:
6391 /* This is used for lqr and stqr. */
6392 lsbits = 2;
6393 break;
6394 default:
6395 lsbits = 0;
6396 }
6397
6398 if (GET_CODE (op) == LABEL_REF
6399 || (GET_CODE (op) == SYMBOL_REF
6400 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6401 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6402 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6403 d->name);
6404 }
6405}
6406
6407
70ca06f8 6408static int
5df189be 6409expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6410 rtx target, rtx ops[])
6411{
bc620c5c 6412 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6413 int i = 0, a;
6352eedf 6414
6415 /* Expand the arguments into rtl. */
6416
6417 if (d->parm[0] != SPU_BTI_VOID)
6418 ops[i++] = target;
6419
70ca06f8 6420 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6421 {
5df189be 6422 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6423 if (arg == 0)
6424 abort ();
b9c74b4d 6425 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6426 }
70ca06f8 6427
32f79657 6428 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6429 return i;
6352eedf 6430}
6431
6432static rtx
6433spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6434 tree exp, rtx target)
6352eedf 6435{
6436 rtx pat;
6437 rtx ops[8];
bc620c5c 6438 enum insn_code icode = (enum insn_code) d->icode;
3754d046 6439 machine_mode mode, tmode;
6352eedf 6440 int i, p;
70ca06f8 6441 int n_operands;
6352eedf 6442 tree return_type;
6443
6444 /* Set up ops[] with values from arglist. */
70ca06f8 6445 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6446
6447 /* Handle the target operand which must be operand 0. */
6448 i = 0;
6449 if (d->parm[0] != SPU_BTI_VOID)
6450 {
6451
6452 /* We prefer the mode specified for the match_operand otherwise
6453 use the mode from the builtin function prototype. */
6454 tmode = insn_data[d->icode].operand[0].mode;
6455 if (tmode == VOIDmode)
6456 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6457
6458 /* Try to use target because not using it can lead to extra copies
6459 and when we are using all of the registers extra copies leads
6460 to extra spills. */
6461 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6462 ops[0] = target;
6463 else
6464 target = ops[0] = gen_reg_rtx (tmode);
6465
6466 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6467 abort ();
6468
6469 i++;
6470 }
6471
a76866d3 6472 if (d->fcode == SPU_MASK_FOR_LOAD)
6473 {
3754d046 6474 machine_mode mode = insn_data[icode].operand[1].mode;
a76866d3 6475 tree arg;
6476 rtx addr, op, pat;
6477
6478 /* get addr */
5df189be 6479 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6480 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6481 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6482 addr = memory_address (mode, op);
6483
6484 /* negate addr */
6485 op = gen_reg_rtx (GET_MODE (addr));
d1f9b275 6486 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
a76866d3 6487 op = gen_rtx_MEM (mode, op);
6488
6489 pat = GEN_FCN (icode) (target, op);
6490 if (!pat)
6491 return 0;
6492 emit_insn (pat);
6493 return target;
6494 }
6495
6352eedf 6496 /* Ignore align_hint, but still expand it's args in case they have
6497 side effects. */
6498 if (icode == CODE_FOR_spu_align_hint)
6499 return 0;
6500
6501 /* Handle the rest of the operands. */
70ca06f8 6502 for (p = 1; i < n_operands; i++, p++)
6352eedf 6503 {
6504 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6505 mode = insn_data[d->icode].operand[i].mode;
6506 else
6507 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6508
6509 /* mode can be VOIDmode here for labels */
6510
6511 /* For specific intrinsics with an immediate operand, e.g.,
6512 si_ai(), we sometimes need to convert the scalar argument to a
6513 vector argument by splatting the scalar. */
6514 if (VECTOR_MODE_P (mode)
6515 && (GET_CODE (ops[i]) == CONST_INT
6516 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6517 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6518 {
6519 if (GET_CODE (ops[i]) == CONST_INT)
6520 ops[i] = spu_const (mode, INTVAL (ops[i]));
6521 else
6522 {
6523 rtx reg = gen_reg_rtx (mode);
3754d046 6524 machine_mode imode = GET_MODE_INNER (mode);
6352eedf 6525 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6526 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6527 if (imode != GET_MODE (ops[i]))
6528 ops[i] = convert_to_mode (imode, ops[i],
6529 TYPE_UNSIGNED (spu_builtin_types
6530 [d->parm[i]]));
6531 emit_insn (gen_spu_splats (reg, ops[i]));
6532 ops[i] = reg;
6533 }
6534 }
6535
5df189be 6536 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6537
6352eedf 6538 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6539 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6540 }
6541
70ca06f8 6542 switch (n_operands)
6352eedf 6543 {
6544 case 0:
6545 pat = GEN_FCN (icode) (0);
6546 break;
6547 case 1:
6548 pat = GEN_FCN (icode) (ops[0]);
6549 break;
6550 case 2:
6551 pat = GEN_FCN (icode) (ops[0], ops[1]);
6552 break;
6553 case 3:
6554 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6555 break;
6556 case 4:
6557 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6558 break;
6559 case 5:
6560 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6561 break;
6562 case 6:
6563 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6564 break;
6565 default:
6566 abort ();
6567 }
6568
6569 if (!pat)
6570 abort ();
6571
6572 if (d->type == B_CALL || d->type == B_BISLED)
6573 emit_call_insn (pat);
6574 else if (d->type == B_JUMP)
6575 {
6576 emit_jump_insn (pat);
6577 emit_barrier ();
6578 }
6579 else
6580 emit_insn (pat);
6581
6582 return_type = spu_builtin_types[d->parm[0]];
6583 if (d->parm[0] != SPU_BTI_VOID
6584 && GET_MODE (target) != TYPE_MODE (return_type))
6585 {
6586 /* target is the return value. It should always be the mode of
6587 the builtin function prototype. */
6588 target = spu_force_reg (TYPE_MODE (return_type), target);
6589 }
6590
6591 return target;
6592}
6593
6594rtx
6595spu_expand_builtin (tree exp,
6596 rtx target,
6597 rtx subtarget ATTRIBUTE_UNUSED,
3754d046 6598 machine_mode mode ATTRIBUTE_UNUSED,
6352eedf 6599 int ignore ATTRIBUTE_UNUSED)
6600{
5df189be 6601 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6602 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6603 struct spu_builtin_description *d;
6604
6605 if (fcode < NUM_SPU_BUILTINS)
6606 {
6607 d = &spu_builtins[fcode];
6608
5df189be 6609 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6610 }
6611 abort ();
6612}
6613
a76866d3 6614/* Implement targetm.vectorize.builtin_mask_for_load. */
6615static tree
6616spu_builtin_mask_for_load (void)
6617{
0c5c4d59 6618 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6619}
5df189be 6620
a28df51d 6621/* Implement targetm.vectorize.builtin_vectorization_cost. */
6622static int
0822b158 6623spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
d13adc77 6624 tree vectype,
0822b158 6625 int misalign ATTRIBUTE_UNUSED)
559093aa 6626{
d13adc77 6627 unsigned elements;
6628
559093aa 6629 switch (type_of_cost)
6630 {
6631 case scalar_stmt:
6632 case vector_stmt:
6633 case vector_load:
6634 case vector_store:
6635 case vec_to_scalar:
6636 case scalar_to_vec:
6637 case cond_branch_not_taken:
6638 case vec_perm:
5df2530b 6639 case vec_promote_demote:
559093aa 6640 return 1;
6641
6642 case scalar_store:
6643 return 10;
6644
6645 case scalar_load:
6646 /* Load + rotate. */
6647 return 2;
6648
6649 case unaligned_load:
6650 return 2;
6651
6652 case cond_branch_taken:
6653 return 6;
6654
d13adc77 6655 case vec_construct:
6656 elements = TYPE_VECTOR_SUBPARTS (vectype);
6657 return elements / 2 + 1;
6658
559093aa 6659 default:
6660 gcc_unreachable ();
6661 }
a28df51d 6662}
6663
4db2b577 6664/* Implement targetm.vectorize.init_cost. */
6665
61b33788 6666static void *
4db2b577 6667spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6668{
f97dec81 6669 unsigned *cost = XNEWVEC (unsigned, 3);
6670 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
4db2b577 6671 return cost;
6672}
6673
6674/* Implement targetm.vectorize.add_stmt_cost. */
6675
61b33788 6676static unsigned
4db2b577 6677spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
f97dec81 6678 struct _stmt_vec_info *stmt_info, int misalign,
6679 enum vect_cost_model_location where)
4db2b577 6680{
6681 unsigned *cost = (unsigned *) data;
6682 unsigned retval = 0;
6683
6684 if (flag_vect_cost_model)
6685 {
f97dec81 6686 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4db2b577 6687 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6688
6689 /* Statements in an inner loop relative to the loop being
6690 vectorized are weighted more heavily. The value here is
6691 arbitrary and could potentially be improved with analysis. */
f97dec81 6692 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4db2b577 6693 count *= 50; /* FIXME. */
6694
6695 retval = (unsigned) (count * stmt_cost);
f97dec81 6696 cost[where] += retval;
4db2b577 6697 }
6698
6699 return retval;
6700}
6701
6702/* Implement targetm.vectorize.finish_cost. */
6703
f97dec81 6704static void
6705spu_finish_cost (void *data, unsigned *prologue_cost,
6706 unsigned *body_cost, unsigned *epilogue_cost)
4db2b577 6707{
f97dec81 6708 unsigned *cost = (unsigned *) data;
6709 *prologue_cost = cost[vect_prologue];
6710 *body_cost = cost[vect_body];
6711 *epilogue_cost = cost[vect_epilogue];
4db2b577 6712}
6713
6714/* Implement targetm.vectorize.destroy_cost_data. */
6715
61b33788 6716static void
4db2b577 6717spu_destroy_cost_data (void *data)
6718{
6719 free (data);
6720}
6721
0e87db76 6722/* Return true iff, data reference of TYPE can reach vector alignment (16)
6723 after applying N number of iterations. This routine does not determine
6724 how may iterations are required to reach desired alignment. */
6725
6726static bool
a9f1838b 6727spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6728{
6729 if (is_packed)
6730 return false;
6731
6732 /* All other types are naturally aligned. */
6733 return true;
6734}
6735
6cf5579e 6736/* Return the appropriate mode for a named address pointer. */
3754d046 6737static machine_mode
6cf5579e 6738spu_addr_space_pointer_mode (addr_space_t addrspace)
6739{
6740 switch (addrspace)
6741 {
6742 case ADDR_SPACE_GENERIC:
6743 return ptr_mode;
6744 case ADDR_SPACE_EA:
6745 return EAmode;
6746 default:
6747 gcc_unreachable ();
6748 }
6749}
6750
6751/* Return the appropriate mode for a named address address. */
3754d046 6752static machine_mode
6cf5579e 6753spu_addr_space_address_mode (addr_space_t addrspace)
6754{
6755 switch (addrspace)
6756 {
6757 case ADDR_SPACE_GENERIC:
6758 return Pmode;
6759 case ADDR_SPACE_EA:
6760 return EAmode;
6761 default:
6762 gcc_unreachable ();
6763 }
6764}
6765
6766/* Determine if one named address space is a subset of another. */
6767
6768static bool
6769spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6770{
6771 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6772 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6773
6774 if (subset == superset)
6775 return true;
6776
6777 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6778 being subsets but instead as disjoint address spaces. */
6779 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6780 return false;
6781
6782 else
6783 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6784}
6785
6786/* Convert from one address space to another. */
6787static rtx
6788spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6789{
6790 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6791 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6792
6793 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6794 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6795
6796 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6797 {
6798 rtx result, ls;
6799
6800 ls = gen_const_mem (DImode,
6801 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6802 set_mem_align (ls, 128);
6803
6804 result = gen_reg_rtx (Pmode);
6805 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6806 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6807 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6808 ls, const0_rtx, Pmode, 1);
6809
6810 emit_insn (gen_subsi3 (result, op, ls));
6811
6812 return result;
6813 }
6814
6815 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6816 {
6817 rtx result, ls;
6818
6819 ls = gen_const_mem (DImode,
6820 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6821 set_mem_align (ls, 128);
6822
6823 result = gen_reg_rtx (EAmode);
6824 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6825 op = force_reg (Pmode, op);
6826 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6827 ls, const0_rtx, EAmode, 1);
6828 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6829
6830 if (EAmode == SImode)
6831 emit_insn (gen_addsi3 (result, op, ls));
6832 else
6833 emit_insn (gen_adddi3 (result, op, ls));
6834
6835 return result;
6836 }
6837
6838 else
6839 gcc_unreachable ();
6840}
6841
6842
d52fd16a 6843/* Count the total number of instructions in each pipe and return the
6844 maximum, which is used as the Minimum Iteration Interval (MII)
6845 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6846 -2 are instructions that can go in pipe0 or pipe1. */
6847static int
6848spu_sms_res_mii (struct ddg *g)
6849{
6850 int i;
6851 unsigned t[4] = {0, 0, 0, 0};
6852
6853 for (i = 0; i < g->num_nodes; i++)
6854 {
0af56f80 6855 rtx_insn *insn = g->nodes[i].insn;
d52fd16a 6856 int p = get_pipe (insn) + 2;
6857
1e944a0b 6858 gcc_assert (p >= 0);
6859 gcc_assert (p < 4);
d52fd16a 6860
6861 t[p]++;
6862 if (dump_file && INSN_P (insn))
6863 fprintf (dump_file, "i%d %s %d %d\n",
6864 INSN_UID (insn),
6865 insn_data[INSN_CODE(insn)].name,
6866 p, t[p]);
6867 }
6868 if (dump_file)
6869 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6870
6871 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6872}
6873
6874
5df189be 6875void
6876spu_init_expanders (void)
9d98604b 6877{
5df189be 6878 if (cfun)
9d98604b 6879 {
6880 rtx r0, r1;
6881 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6882 frame_pointer_needed is true. We don't know that until we're
6883 expanding the prologue. */
6884 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6885
6886 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6887 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6888 to be treated as aligned, so generate them here. */
6889 r0 = gen_reg_rtx (SImode);
6890 r1 = gen_reg_rtx (SImode);
6891 mark_reg_pointer (r0, 128);
6892 mark_reg_pointer (r1, 128);
6893 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6894 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6895 }
ea32e033 6896}
6897
3754d046 6898static machine_mode
ea32e033 6899spu_libgcc_cmp_return_mode (void)
6900{
6901
6902/* For SPU word mode is TI mode so it is better to use SImode
6903 for compare returns. */
6904 return SImode;
6905}
6906
3754d046 6907static machine_mode
ea32e033 6908spu_libgcc_shift_count_mode (void)
6909{
6910/* For SPU word mode is TI mode so it is better to use SImode
6911 for shift counts. */
6912 return SImode;
6913}
5a976006 6914
a08dfd55 6915/* Implement targetm.section_type_flags. */
6916static unsigned int
6917spu_section_type_flags (tree decl, const char *name, int reloc)
6918{
6919 /* .toe needs to have type @nobits. */
6920 if (strcmp (name, ".toe") == 0)
6921 return SECTION_BSS;
6cf5579e 6922 /* Don't load _ea into the current address space. */
6923 if (strcmp (name, "._ea") == 0)
6924 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6925 return default_section_type_flags (decl, name, reloc);
6926}
c2233b46 6927
6cf5579e 6928/* Implement targetm.select_section. */
6929static section *
6930spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6931{
6932 /* Variables and constants defined in the __ea address space
6933 go into a special section named "._ea". */
6934 if (TREE_TYPE (decl) != error_mark_node
6935 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6936 {
6937 /* We might get called with string constants, but get_named_section
6938 doesn't like them as they are not DECLs. Also, we need to set
6939 flags in that case. */
6940 if (!DECL_P (decl))
6941 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6942
6943 return get_named_section (decl, "._ea", reloc);
6944 }
6945
6946 return default_elf_select_section (decl, reloc, align);
6947}
6948
6949/* Implement targetm.unique_section. */
6950static void
6951spu_unique_section (tree decl, int reloc)
6952{
6953 /* We don't support unique section names in the __ea address
6954 space for now. */
6955 if (TREE_TYPE (decl) != error_mark_node
6956 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6957 return;
6958
6959 default_unique_section (decl, reloc);
6960}
6961
56c7bfc2 6962/* Generate a constant or register which contains 2^SCALE. We assume
6963 the result is valid for MODE. Currently, MODE must be V4SFmode and
6964 SCALE must be SImode. */
6965rtx
3754d046 6966spu_gen_exp2 (machine_mode mode, rtx scale)
56c7bfc2 6967{
6968 gcc_assert (mode == V4SFmode);
6969 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6970 if (GET_CODE (scale) != CONST_INT)
6971 {
6972 /* unsigned int exp = (127 + scale) << 23;
6973 __vector float m = (__vector float) spu_splats (exp); */
6974 rtx reg = force_reg (SImode, scale);
6975 rtx exp = gen_reg_rtx (SImode);
6976 rtx mul = gen_reg_rtx (mode);
6977 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6978 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6979 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6980 return mul;
6981 }
6982 else
6983 {
6984 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6985 unsigned char arr[16];
6986 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6987 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6988 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6989 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6990 return array_to_constant (mode, arr);
6991 }
6992}
6993
9d98604b 6994/* After reload, just change the convert into a move instruction
6995 or a dead instruction. */
6996void
6997spu_split_convert (rtx ops[])
6998{
6999 if (REGNO (ops[0]) == REGNO (ops[1]))
7000 emit_note (NOTE_INSN_DELETED);
7001 else
7002 {
7003 /* Use TImode always as this might help hard reg copyprop. */
7004 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7005 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7006 emit_insn (gen_move_insn (op0, op1));
7007 }
7008}
7009
b3878a6c 7010void
4cbad5bb 7011spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7012{
7013 fprintf (file, "# profile\n");
7014 fprintf (file, "brsl $75, _mcount\n");
7015}
7016
329c1e4e 7017/* Implement targetm.ref_may_alias_errno. */
7018static bool
7019spu_ref_may_alias_errno (ao_ref *ref)
7020{
7021 tree base = ao_ref_base (ref);
7022
7023 /* With SPU newlib, errno is defined as something like
7024 _impure_data._errno
7025 The default implementation of this target macro does not
7026 recognize such expressions, so special-code for it here. */
7027
7028 if (TREE_CODE (base) == VAR_DECL
7029 && !TREE_STATIC (base)
7030 && DECL_EXTERNAL (base)
7031 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7032 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7033 "_impure_data") == 0
7034 /* _errno is the first member of _impure_data. */
7035 && ref->offset == 0)
7036 return true;
7037
7038 return default_ref_may_alias_errno (ref);
7039}
7040
f17d2d13 7041/* Output thunk to FILE that implements a C++ virtual function call (with
7042 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7043 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7044 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7045 relative to the resulting this pointer. */
7046
7047static void
7048spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7049 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7050 tree function)
7051{
7052 rtx op[8];
7053
7054 /* Make sure unwind info is emitted for the thunk if needed. */
7055 final_start_function (emit_barrier (), file, 1);
7056
7057 /* Operand 0 is the target function. */
7058 op[0] = XEXP (DECL_RTL (function), 0);
7059
7060 /* Operand 1 is the 'this' pointer. */
7061 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7062 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7063 else
7064 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7065
7066 /* Operands 2/3 are the low/high halfwords of delta. */
7067 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7068 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7069
7070 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7071 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7072 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7073
7074 /* Operands 6/7 are temporary registers. */
7075 op[6] = gen_rtx_REG (Pmode, 79);
7076 op[7] = gen_rtx_REG (Pmode, 78);
7077
7078 /* Add DELTA to this pointer. */
7079 if (delta)
7080 {
7081 if (delta >= -0x200 && delta < 0x200)
7082 output_asm_insn ("ai\t%1,%1,%2", op);
7083 else if (delta >= -0x8000 && delta < 0x8000)
7084 {
7085 output_asm_insn ("il\t%6,%2", op);
7086 output_asm_insn ("a\t%1,%1,%6", op);
7087 }
7088 else
7089 {
7090 output_asm_insn ("ilhu\t%6,%3", op);
7091 output_asm_insn ("iohl\t%6,%2", op);
7092 output_asm_insn ("a\t%1,%1,%6", op);
7093 }
7094 }
7095
7096 /* Perform vcall adjustment. */
7097 if (vcall_offset)
7098 {
7099 output_asm_insn ("lqd\t%7,0(%1)", op);
7100 output_asm_insn ("rotqby\t%7,%7,%1", op);
7101
7102 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7103 output_asm_insn ("ai\t%7,%7,%4", op);
7104 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7105 {
7106 output_asm_insn ("il\t%6,%4", op);
7107 output_asm_insn ("a\t%7,%7,%6", op);
7108 }
7109 else
7110 {
7111 output_asm_insn ("ilhu\t%6,%5", op);
7112 output_asm_insn ("iohl\t%6,%4", op);
7113 output_asm_insn ("a\t%7,%7,%6", op);
7114 }
7115
7116 output_asm_insn ("lqd\t%6,0(%7)", op);
7117 output_asm_insn ("rotqby\t%6,%6,%7", op);
7118 output_asm_insn ("a\t%1,%1,%6", op);
7119 }
7120
7121 /* Jump to target. */
7122 output_asm_insn ("br\t%0", op);
7123
7124 final_end_function ();
7125}
7126
d5065e6e 7127/* Canonicalize a comparison from one we don't have to one we do have. */
7128static void
7129spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7130 bool op0_preserve_value)
7131{
7132 if (!op0_preserve_value
7133 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7134 {
7135 rtx tem = *op0;
7136 *op0 = *op1;
7137 *op1 = tem;
7138 *code = (int)swap_condition ((enum rtx_code)*code);
7139 }
7140}
3defb88e 7141\f
7142/* Table of machine attributes. */
7143static const struct attribute_spec spu_attribute_table[] =
7144{
7145 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7146 affects_type_identity } */
7147 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7148 false },
7149 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7150 false },
7151 { NULL, 0, 0, false, false, false, NULL, false }
7152};
7153
7154/* TARGET overrides. */
7155
7156#undef TARGET_ADDR_SPACE_POINTER_MODE
7157#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7158
7159#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7160#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7161
7162#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7163#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7164 spu_addr_space_legitimate_address_p
7165
7166#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7167#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7168
7169#undef TARGET_ADDR_SPACE_SUBSET_P
7170#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7171
7172#undef TARGET_ADDR_SPACE_CONVERT
7173#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7174
7175#undef TARGET_INIT_BUILTINS
7176#define TARGET_INIT_BUILTINS spu_init_builtins
7177#undef TARGET_BUILTIN_DECL
7178#define TARGET_BUILTIN_DECL spu_builtin_decl
7179
7180#undef TARGET_EXPAND_BUILTIN
7181#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7182
7183#undef TARGET_UNWIND_WORD_MODE
7184#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7185
7186#undef TARGET_LEGITIMIZE_ADDRESS
7187#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7188
7189/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7190 and .quad for the debugger. When it is known that the assembler is fixed,
7191 these can be removed. */
7192#undef TARGET_ASM_UNALIGNED_SI_OP
7193#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7194
7195#undef TARGET_ASM_ALIGNED_DI_OP
7196#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7197
7198/* The .8byte directive doesn't seem to work well for a 32 bit
7199 architecture. */
7200#undef TARGET_ASM_UNALIGNED_DI_OP
7201#define TARGET_ASM_UNALIGNED_DI_OP NULL
7202
7203#undef TARGET_RTX_COSTS
7204#define TARGET_RTX_COSTS spu_rtx_costs
7205
7206#undef TARGET_ADDRESS_COST
d9c5e5f4 7207#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3defb88e 7208
7209#undef TARGET_SCHED_ISSUE_RATE
7210#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7211
7212#undef TARGET_SCHED_INIT_GLOBAL
7213#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7214
7215#undef TARGET_SCHED_INIT
7216#define TARGET_SCHED_INIT spu_sched_init
7217
7218#undef TARGET_SCHED_VARIABLE_ISSUE
7219#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7220
7221#undef TARGET_SCHED_REORDER
7222#define TARGET_SCHED_REORDER spu_sched_reorder
7223
7224#undef TARGET_SCHED_REORDER2
7225#define TARGET_SCHED_REORDER2 spu_sched_reorder
7226
7227#undef TARGET_SCHED_ADJUST_COST
7228#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7229
7230#undef TARGET_ATTRIBUTE_TABLE
7231#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7232
7233#undef TARGET_ASM_INTEGER
7234#define TARGET_ASM_INTEGER spu_assemble_integer
7235
7236#undef TARGET_SCALAR_MODE_SUPPORTED_P
7237#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7238
7239#undef TARGET_VECTOR_MODE_SUPPORTED_P
7240#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7241
7242#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7243#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7244
7245#undef TARGET_ASM_GLOBALIZE_LABEL
7246#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7247
7248#undef TARGET_PASS_BY_REFERENCE
7249#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7250
7251#undef TARGET_FUNCTION_ARG
7252#define TARGET_FUNCTION_ARG spu_function_arg
7253
7254#undef TARGET_FUNCTION_ARG_ADVANCE
7255#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7256
7257#undef TARGET_MUST_PASS_IN_STACK
7258#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7259
7260#undef TARGET_BUILD_BUILTIN_VA_LIST
7261#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7262
7263#undef TARGET_EXPAND_BUILTIN_VA_START
7264#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7265
7266#undef TARGET_SETUP_INCOMING_VARARGS
7267#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7268
7269#undef TARGET_MACHINE_DEPENDENT_REORG
7270#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7271
7272#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7273#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7274
7275#undef TARGET_INIT_LIBFUNCS
7276#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7277
7278#undef TARGET_RETURN_IN_MEMORY
7279#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7280
7281#undef TARGET_ENCODE_SECTION_INFO
7282#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7283
7284#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7285#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7286
7287#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7288#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7289
7290#undef TARGET_VECTORIZE_INIT_COST
7291#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7292
7293#undef TARGET_VECTORIZE_ADD_STMT_COST
7294#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7295
7296#undef TARGET_VECTORIZE_FINISH_COST
7297#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7298
7299#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7300#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7301
7302#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7303#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7304
7305#undef TARGET_LIBGCC_CMP_RETURN_MODE
7306#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7307
7308#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7309#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7310
7311#undef TARGET_SCHED_SMS_RES_MII
7312#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7313
7314#undef TARGET_SECTION_TYPE_FLAGS
7315#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7316
7317#undef TARGET_ASM_SELECT_SECTION
7318#define TARGET_ASM_SELECT_SECTION spu_select_section
7319
7320#undef TARGET_ASM_UNIQUE_SECTION
7321#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7322
7323#undef TARGET_LEGITIMATE_ADDRESS_P
7324#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7325
7326#undef TARGET_LEGITIMATE_CONSTANT_P
7327#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7328
7329#undef TARGET_TRAMPOLINE_INIT
7330#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7331
08c6cbd2 7332#undef TARGET_WARN_FUNC_RETURN
7333#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7334
3defb88e 7335#undef TARGET_OPTION_OVERRIDE
7336#define TARGET_OPTION_OVERRIDE spu_option_override
7337
7338#undef TARGET_CONDITIONAL_REGISTER_USAGE
7339#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7340
7341#undef TARGET_REF_MAY_ALIAS_ERRNO
7342#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7343
7344#undef TARGET_ASM_OUTPUT_MI_THUNK
7345#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7346#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7347#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7348
7349/* Variable tracking should be run after all optimizations which
7350 change order of insns. It also needs a valid CFG. */
7351#undef TARGET_DELAY_VARTRACK
7352#define TARGET_DELAY_VARTRACK true
7353
d5065e6e 7354#undef TARGET_CANONICALIZE_COMPARISON
7355#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7356
5f35dd0e 7357#undef TARGET_CAN_USE_DOLOOP_P
7358#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7359
3defb88e 7360struct gcc_target targetm = TARGET_INITIALIZER;
7361
c2233b46 7362#include "gt-spu.h"