]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
New jit API entrypoint: gcc_jit_context_new_rvalue_from_long
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
d353bf18 1/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
9ed99284 31#include "stringpool.h"
32#include "stor-layout.h"
33#include "calls.h"
34#include "varasm.h"
644459d0 35#include "expr.h"
34517c64 36#include "insn-codes.h"
644459d0 37#include "optabs.h"
38#include "except.h"
a3020f2f 39#include "hashtab.h"
40#include "hash-set.h"
41#include "vec.h"
42#include "machmode.h"
43#include "input.h"
644459d0 44#include "function.h"
45#include "output.h"
94ea8568 46#include "predict.h"
47#include "dominance.h"
48#include "cfg.h"
49#include "cfgrtl.h"
50#include "cfganal.h"
51#include "lcm.h"
52#include "cfgbuild.h"
53#include "cfgcleanup.h"
644459d0 54#include "basic-block.h"
0b205f4c 55#include "diagnostic-core.h"
644459d0 56#include "ggc.h"
644459d0 57#include "tm_p.h"
58#include "target.h"
59#include "target-def.h"
60#include "langhooks.h"
61#include "reload.h"
644459d0 62#include "sched-int.h"
63#include "params.h"
bc61cadb 64#include "hash-table.h"
65#include "tree-ssa-alias.h"
66#include "internal-fn.h"
67#include "gimple-fold.h"
68#include "tree-eh.h"
69#include "gimple-expr.h"
70#include "is-a.h"
e795d6e1 71#include "gimple.h"
a8783bee 72#include "gimplify.h"
644459d0 73#include "tm-constrs.h"
5a976006 74#include "sbitmap.h"
5a976006 75#include "df.h"
94ea8568 76#include "ddg.h"
77#include "timevar.h"
b9ed1410 78#include "dumpfile.h"
a7a0184d 79#include "cfgloop.h"
f7715905 80#include "builtins.h"
6f4e40cd 81#include "rtl-iter.h"
6352eedf 82
83/* Builtin types, data and prototypes. */
c2233b46 84
85enum spu_builtin_type_index
86{
87 SPU_BTI_END_OF_PARAMS,
88
89 /* We create new type nodes for these. */
90 SPU_BTI_V16QI,
91 SPU_BTI_V8HI,
92 SPU_BTI_V4SI,
93 SPU_BTI_V2DI,
94 SPU_BTI_V4SF,
95 SPU_BTI_V2DF,
96 SPU_BTI_UV16QI,
97 SPU_BTI_UV8HI,
98 SPU_BTI_UV4SI,
99 SPU_BTI_UV2DI,
100
101 /* A 16-byte type. (Implemented with V16QI_type_node) */
102 SPU_BTI_QUADWORD,
103
104 /* These all correspond to intSI_type_node */
105 SPU_BTI_7,
106 SPU_BTI_S7,
107 SPU_BTI_U7,
108 SPU_BTI_S10,
109 SPU_BTI_S10_4,
110 SPU_BTI_U14,
111 SPU_BTI_16,
112 SPU_BTI_S16,
113 SPU_BTI_S16_2,
114 SPU_BTI_U16,
115 SPU_BTI_U16_2,
116 SPU_BTI_U18,
117
118 /* These correspond to the standard types */
119 SPU_BTI_INTQI,
120 SPU_BTI_INTHI,
121 SPU_BTI_INTSI,
122 SPU_BTI_INTDI,
123
124 SPU_BTI_UINTQI,
125 SPU_BTI_UINTHI,
126 SPU_BTI_UINTSI,
127 SPU_BTI_UINTDI,
128
129 SPU_BTI_FLOAT,
130 SPU_BTI_DOUBLE,
131
132 SPU_BTI_VOID,
133 SPU_BTI_PTR,
134
135 SPU_BTI_MAX
136};
137
138#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
139#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
140#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
141#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
142#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
143#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
144#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
145#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
146#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
147#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
148
149static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
150
6352eedf 151struct spu_builtin_range
152{
153 int low, high;
154};
155
156static struct spu_builtin_range spu_builtin_range[] = {
157 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
158 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
159 {0ll, 0x7fll}, /* SPU_BTI_U7 */
160 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
161 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
162 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
163 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
164 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
165 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
166 {0ll, 0xffffll}, /* SPU_BTI_U16 */
167 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
168 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
169};
170
644459d0 171\f
172/* Target specific attribute specifications. */
173char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
174
175/* Prototypes and external defs. */
0af56f80 176static int get_pipe (rtx_insn *insn);
644459d0 177static int spu_naked_function_p (tree func);
644459d0 178static int mem_is_padded_component_ref (rtx x);
c7b91b14 179static void fix_range (const char *);
9d98604b 180static rtx spu_expand_load (rtx, rtx, rtx, int);
644459d0 181
5474166e 182/* Which instruction set architecture to use. */
183int spu_arch;
184/* Which cpu are we tuning for. */
185int spu_tune;
186
5a976006 187/* The hardware requires 8 insns between a hint and the branch it
188 effects. This variable describes how many rtl instructions the
189 compiler needs to see before inserting a hint, and then the compiler
190 will insert enough nops to make it at least 8 insns. The default is
191 for the compiler to allow up to 2 nops be emitted. The nops are
192 inserted in pairs, so we round down. */
193int spu_hint_dist = (8*4) - (2*4);
194
644459d0 195enum spu_immediate {
196 SPU_NONE,
197 SPU_IL,
198 SPU_ILA,
199 SPU_ILH,
200 SPU_ILHU,
201 SPU_ORI,
202 SPU_ORHI,
203 SPU_ORBI,
99369027 204 SPU_IOHL
644459d0 205};
dea01258 206enum immediate_class
207{
208 IC_POOL, /* constant pool */
209 IC_IL1, /* one il* instruction */
210 IC_IL2, /* both ilhu and iohl instructions */
211 IC_IL1s, /* one il* instruction */
212 IC_IL2s, /* both ilhu and iohl instructions */
213 IC_FSMBI, /* the fsmbi instruction */
214 IC_CPAT, /* one of the c*d instructions */
5df189be 215 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 216};
644459d0 217
218static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
219static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 220static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
221static enum immediate_class classify_immediate (rtx op,
3754d046 222 machine_mode mode);
644459d0 223
6cf5579e 224/* Pointer mode for __ea references. */
225#define EAmode (spu_ea_model != 32 ? DImode : SImode)
226
ef51d1e3 227\f
5eb28709 228/* Define the structure for the machine field in struct function. */
229struct GTY(()) machine_function
230{
231 /* Register to use for PIC accesses. */
232 rtx pic_reg;
233};
234
235/* How to allocate a 'struct machine_function'. */
236static struct machine_function *
237spu_init_machine_status (void)
238{
25a27413 239 return ggc_cleared_alloc<machine_function> ();
5eb28709 240}
241
4c834714 242/* Implement TARGET_OPTION_OVERRIDE. */
243static void
244spu_option_override (void)
644459d0 245{
5eb28709 246 /* Set up function hooks. */
247 init_machine_status = spu_init_machine_status;
248
14d408d9 249 /* Small loops will be unpeeled at -O3. For SPU it is more important
250 to keep code small by default. */
686e2769 251 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 252 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 253 global_options.x_param_values,
254 global_options_set.x_param_values);
14d408d9 255
644459d0 256 flag_omit_frame_pointer = 1;
257
5a976006 258 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 259 if (align_functions < 8)
260 align_functions = 8;
c7b91b14 261
5a976006 262 spu_hint_dist = 8*4 - spu_max_nops*4;
263 if (spu_hint_dist < 0)
264 spu_hint_dist = 0;
265
c7b91b14 266 if (spu_fixed_range_string)
267 fix_range (spu_fixed_range_string);
5474166e 268
269 /* Determine processor architectural level. */
270 if (spu_arch_string)
271 {
272 if (strcmp (&spu_arch_string[0], "cell") == 0)
273 spu_arch = PROCESSOR_CELL;
274 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
275 spu_arch = PROCESSOR_CELLEDP;
276 else
8e181c9d 277 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 278 }
279
280 /* Determine processor to tune for. */
281 if (spu_tune_string)
282 {
283 if (strcmp (&spu_tune_string[0], "cell") == 0)
284 spu_tune = PROCESSOR_CELL;
285 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
286 spu_tune = PROCESSOR_CELLEDP;
287 else
8e181c9d 288 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 289 }
98bbec1e 290
13684256 291 /* Change defaults according to the processor architecture. */
292 if (spu_arch == PROCESSOR_CELLEDP)
293 {
294 /* If no command line option has been otherwise specified, change
295 the default to -mno-safe-hints on celledp -- only the original
296 Cell/B.E. processors require this workaround. */
297 if (!(target_flags_explicit & MASK_SAFE_HINTS))
298 target_flags &= ~MASK_SAFE_HINTS;
299 }
300
98bbec1e 301 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 302}
303\f
304/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
305 struct attribute_spec.handler. */
306
644459d0 307/* True if MODE is valid for the target. By "valid", we mean able to
308 be manipulated in non-trivial ways. In particular, this means all
309 the arithmetic is supported. */
310static bool
3754d046 311spu_scalar_mode_supported_p (machine_mode mode)
644459d0 312{
313 switch (mode)
314 {
315 case QImode:
316 case HImode:
317 case SImode:
318 case SFmode:
319 case DImode:
320 case TImode:
321 case DFmode:
322 return true;
323
324 default:
325 return false;
326 }
327}
328
329/* Similarly for vector modes. "Supported" here is less strict. At
330 least some operations are supported; need to check optabs or builtins
331 for further details. */
332static bool
3754d046 333spu_vector_mode_supported_p (machine_mode mode)
644459d0 334{
335 switch (mode)
336 {
337 case V16QImode:
338 case V8HImode:
339 case V4SImode:
340 case V2DImode:
341 case V4SFmode:
342 case V2DFmode:
343 return true;
344
345 default:
346 return false;
347 }
348}
349
350/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
351 least significant bytes of the outer mode. This function returns
352 TRUE for the SUBREG's where this is correct. */
353int
354valid_subreg (rtx op)
355{
3754d046 356 machine_mode om = GET_MODE (op);
357 machine_mode im = GET_MODE (SUBREG_REG (op));
644459d0 358 return om != VOIDmode && im != VOIDmode
359 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 360 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
361 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 362}
363
364/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 365 and adjust the start offset. */
644459d0 366static rtx
367adjust_operand (rtx op, HOST_WIDE_INT * start)
368{
3754d046 369 machine_mode mode;
644459d0 370 int op_size;
38aca5eb 371 /* Strip any paradoxical SUBREG. */
372 if (GET_CODE (op) == SUBREG
373 && (GET_MODE_BITSIZE (GET_MODE (op))
374 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 375 {
376 if (start)
377 *start -=
378 GET_MODE_BITSIZE (GET_MODE (op)) -
379 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
380 op = SUBREG_REG (op);
381 }
382 /* If it is smaller than SI, assure a SUBREG */
383 op_size = GET_MODE_BITSIZE (GET_MODE (op));
384 if (op_size < 32)
385 {
386 if (start)
387 *start += 32 - op_size;
388 op_size = 32;
389 }
390 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
391 mode = mode_for_size (op_size, MODE_INT, 0);
392 if (mode != GET_MODE (op))
393 op = gen_rtx_SUBREG (mode, op, 0);
394 return op;
395}
396
397void
398spu_expand_extv (rtx ops[], int unsignedp)
399{
9d98604b 400 rtx dst = ops[0], src = ops[1];
644459d0 401 HOST_WIDE_INT width = INTVAL (ops[2]);
402 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 403 HOST_WIDE_INT align_mask;
404 rtx s0, s1, mask, r0;
644459d0 405
9d98604b 406 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 407
9d98604b 408 if (MEM_P (src))
644459d0 409 {
9d98604b 410 /* First, determine if we need 1 TImode load or 2. We need only 1
411 if the bits being extracted do not cross the alignment boundary
412 as determined by the MEM and its address. */
413
414 align_mask = -MEM_ALIGN (src);
415 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 416 {
9d98604b 417 /* Alignment is sufficient for 1 load. */
418 s0 = gen_reg_rtx (TImode);
419 r0 = spu_expand_load (s0, 0, src, start / 8);
420 start &= 7;
421 if (r0)
422 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 423 }
9d98604b 424 else
425 {
426 /* Need 2 loads. */
427 s0 = gen_reg_rtx (TImode);
428 s1 = gen_reg_rtx (TImode);
429 r0 = spu_expand_load (s0, s1, src, start / 8);
430 start &= 7;
431
432 gcc_assert (start + width <= 128);
433 if (r0)
434 {
435 rtx r1 = gen_reg_rtx (SImode);
436 mask = gen_reg_rtx (TImode);
437 emit_move_insn (mask, GEN_INT (-1));
438 emit_insn (gen_rotqby_ti (s0, s0, r0));
439 emit_insn (gen_rotqby_ti (s1, s1, r0));
440 if (GET_CODE (r0) == CONST_INT)
441 r1 = GEN_INT (INTVAL (r0) & 15);
442 else
443 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
444 emit_insn (gen_shlqby_ti (mask, mask, r1));
445 emit_insn (gen_selb (s0, s1, s0, mask));
446 }
447 }
448
449 }
450 else if (GET_CODE (src) == SUBREG)
451 {
452 rtx r = SUBREG_REG (src);
453 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
454 s0 = gen_reg_rtx (TImode);
455 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
456 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
457 else
458 emit_move_insn (s0, src);
459 }
460 else
461 {
462 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
463 s0 = gen_reg_rtx (TImode);
464 emit_move_insn (s0, src);
644459d0 465 }
466
9d98604b 467 /* Now s0 is TImode and contains the bits to extract at start. */
468
469 if (start)
470 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
471
472 if (128 - width)
f5ff0b21 473 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 474
9d98604b 475 emit_move_insn (dst, s0);
644459d0 476}
477
478void
479spu_expand_insv (rtx ops[])
480{
481 HOST_WIDE_INT width = INTVAL (ops[1]);
482 HOST_WIDE_INT start = INTVAL (ops[2]);
483 HOST_WIDE_INT maskbits;
3754d046 484 machine_mode dst_mode;
644459d0 485 rtx dst = ops[0], src = ops[3];
4cbad5bb 486 int dst_size;
644459d0 487 rtx mask;
488 rtx shift_reg;
489 int shift;
490
491
492 if (GET_CODE (ops[0]) == MEM)
493 dst = gen_reg_rtx (TImode);
494 else
495 dst = adjust_operand (dst, &start);
496 dst_mode = GET_MODE (dst);
497 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
498
499 if (CONSTANT_P (src))
500 {
3754d046 501 machine_mode m =
644459d0 502 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
503 src = force_reg (m, convert_to_mode (m, src, 0));
504 }
505 src = adjust_operand (src, 0);
644459d0 506
507 mask = gen_reg_rtx (dst_mode);
508 shift_reg = gen_reg_rtx (dst_mode);
509 shift = dst_size - start - width;
510
511 /* It's not safe to use subreg here because the compiler assumes
512 that the SUBREG_REG is right justified in the SUBREG. */
513 convert_move (shift_reg, src, 1);
514
515 if (shift > 0)
516 {
517 switch (dst_mode)
518 {
519 case SImode:
520 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
521 break;
522 case DImode:
523 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
524 break;
525 case TImode:
526 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
527 break;
528 default:
529 abort ();
530 }
531 }
532 else if (shift < 0)
533 abort ();
534
535 switch (dst_size)
536 {
537 case 32:
538 maskbits = (-1ll << (32 - width - start));
539 if (start)
540 maskbits += (1ll << (32 - start));
541 emit_move_insn (mask, GEN_INT (maskbits));
542 break;
543 case 64:
544 maskbits = (-1ll << (64 - width - start));
545 if (start)
546 maskbits += (1ll << (64 - start));
547 emit_move_insn (mask, GEN_INT (maskbits));
548 break;
549 case 128:
550 {
551 unsigned char arr[16];
552 int i = start / 8;
553 memset (arr, 0, sizeof (arr));
554 arr[i] = 0xff >> (start & 7);
555 for (i++; i <= (start + width - 1) / 8; i++)
556 arr[i] = 0xff;
557 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
558 emit_move_insn (mask, array_to_constant (TImode, arr));
559 }
560 break;
561 default:
562 abort ();
563 }
564 if (GET_CODE (ops[0]) == MEM)
565 {
644459d0 566 rtx low = gen_reg_rtx (SImode);
644459d0 567 rtx rotl = gen_reg_rtx (SImode);
568 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 569 rtx addr;
570 rtx addr0;
571 rtx addr1;
644459d0 572 rtx mem;
573
9d98604b 574 addr = force_reg (Pmode, XEXP (ops[0], 0));
575 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 576 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
577 emit_insn (gen_negsi2 (rotl, low));
578 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
579 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 580 mem = change_address (ops[0], TImode, addr0);
644459d0 581 set_mem_alias_set (mem, 0);
582 emit_move_insn (dst, mem);
583 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 584 if (start + width > MEM_ALIGN (ops[0]))
585 {
586 rtx shl = gen_reg_rtx (SImode);
587 rtx mask1 = gen_reg_rtx (TImode);
588 rtx dst1 = gen_reg_rtx (TImode);
589 rtx mem1;
29c05e22 590 addr1 = plus_constant (Pmode, addr, 16);
9d98604b 591 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 592 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
593 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 594 mem1 = change_address (ops[0], TImode, addr1);
644459d0 595 set_mem_alias_set (mem1, 0);
596 emit_move_insn (dst1, mem1);
597 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
598 emit_move_insn (mem1, dst1);
599 }
9d98604b 600 emit_move_insn (mem, dst);
644459d0 601 }
602 else
71cd778d 603 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 604}
605
606
607int
608spu_expand_block_move (rtx ops[])
609{
610 HOST_WIDE_INT bytes, align, offset;
611 rtx src, dst, sreg, dreg, target;
612 int i;
613 if (GET_CODE (ops[2]) != CONST_INT
614 || GET_CODE (ops[3]) != CONST_INT
48eb4342 615 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 616 return 0;
617
618 bytes = INTVAL (ops[2]);
619 align = INTVAL (ops[3]);
620
621 if (bytes <= 0)
622 return 1;
623
624 dst = ops[0];
625 src = ops[1];
626
627 if (align == 16)
628 {
629 for (offset = 0; offset + 16 <= bytes; offset += 16)
630 {
631 dst = adjust_address (ops[0], V16QImode, offset);
632 src = adjust_address (ops[1], V16QImode, offset);
633 emit_move_insn (dst, src);
634 }
635 if (offset < bytes)
636 {
637 rtx mask;
638 unsigned char arr[16] = { 0 };
639 for (i = 0; i < bytes - offset; i++)
640 arr[i] = 0xff;
641 dst = adjust_address (ops[0], V16QImode, offset);
642 src = adjust_address (ops[1], V16QImode, offset);
643 mask = gen_reg_rtx (V16QImode);
644 sreg = gen_reg_rtx (V16QImode);
645 dreg = gen_reg_rtx (V16QImode);
646 target = gen_reg_rtx (V16QImode);
647 emit_move_insn (mask, array_to_constant (V16QImode, arr));
648 emit_move_insn (dreg, dst);
649 emit_move_insn (sreg, src);
650 emit_insn (gen_selb (target, dreg, sreg, mask));
651 emit_move_insn (dst, target);
652 }
653 return 1;
654 }
655 return 0;
656}
657
658enum spu_comp_code
659{ SPU_EQ, SPU_GT, SPU_GTU };
660
5474166e 661int spu_comp_icode[12][3] = {
662 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
663 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
664 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
665 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
666 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
667 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
668 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
669 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
670 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
671 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
672 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
673 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 674};
675
676/* Generate a compare for CODE. Return a brand-new rtx that represents
677 the result of the compare. GCC can figure this out too if we don't
678 provide all variations of compares, but GCC always wants to use
679 WORD_MODE, we can generate better code in most cases if we do it
680 ourselves. */
681void
74f4459c 682spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 683{
684 int reverse_compare = 0;
685 int reverse_test = 0;
5d70b918 686 rtx compare_result, eq_result;
687 rtx comp_rtx, eq_rtx;
3754d046 688 machine_mode comp_mode;
689 machine_mode op_mode;
b9c74b4d 690 enum spu_comp_code scode, eq_code;
691 enum insn_code ior_code;
74f4459c 692 enum rtx_code code = GET_CODE (cmp);
693 rtx op0 = XEXP (cmp, 0);
694 rtx op1 = XEXP (cmp, 1);
644459d0 695 int index;
5d70b918 696 int eq_test = 0;
644459d0 697
74f4459c 698 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 699 and so on, to keep the constant in operand 1. */
74f4459c 700 if (GET_CODE (op1) == CONST_INT)
644459d0 701 {
74f4459c 702 HOST_WIDE_INT val = INTVAL (op1) - 1;
703 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 704 switch (code)
705 {
706 case GE:
74f4459c 707 op1 = GEN_INT (val);
644459d0 708 code = GT;
709 break;
710 case LT:
74f4459c 711 op1 = GEN_INT (val);
644459d0 712 code = LE;
713 break;
714 case GEU:
74f4459c 715 op1 = GEN_INT (val);
644459d0 716 code = GTU;
717 break;
718 case LTU:
74f4459c 719 op1 = GEN_INT (val);
644459d0 720 code = LEU;
721 break;
722 default:
723 break;
724 }
725 }
726
686195ea 727 /* However, if we generate an integer result, performing a reverse test
728 would require an extra negation, so avoid that where possible. */
729 if (GET_CODE (op1) == CONST_INT && is_set == 1)
730 {
731 HOST_WIDE_INT val = INTVAL (op1) + 1;
732 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
733 switch (code)
734 {
735 case LE:
736 op1 = GEN_INT (val);
737 code = LT;
738 break;
739 case LEU:
740 op1 = GEN_INT (val);
741 code = LTU;
742 break;
743 default:
744 break;
745 }
746 }
747
5d70b918 748 comp_mode = SImode;
74f4459c 749 op_mode = GET_MODE (op0);
5d70b918 750
644459d0 751 switch (code)
752 {
753 case GE:
644459d0 754 scode = SPU_GT;
07027691 755 if (HONOR_NANS (op_mode))
5d70b918 756 {
757 reverse_compare = 0;
758 reverse_test = 0;
759 eq_test = 1;
760 eq_code = SPU_EQ;
761 }
762 else
763 {
764 reverse_compare = 1;
765 reverse_test = 1;
766 }
644459d0 767 break;
768 case LE:
644459d0 769 scode = SPU_GT;
07027691 770 if (HONOR_NANS (op_mode))
5d70b918 771 {
772 reverse_compare = 1;
773 reverse_test = 0;
774 eq_test = 1;
775 eq_code = SPU_EQ;
776 }
777 else
778 {
779 reverse_compare = 0;
780 reverse_test = 1;
781 }
644459d0 782 break;
783 case LT:
784 reverse_compare = 1;
785 reverse_test = 0;
786 scode = SPU_GT;
787 break;
788 case GEU:
789 reverse_compare = 1;
790 reverse_test = 1;
791 scode = SPU_GTU;
792 break;
793 case LEU:
794 reverse_compare = 0;
795 reverse_test = 1;
796 scode = SPU_GTU;
797 break;
798 case LTU:
799 reverse_compare = 1;
800 reverse_test = 0;
801 scode = SPU_GTU;
802 break;
803 case NE:
804 reverse_compare = 0;
805 reverse_test = 1;
806 scode = SPU_EQ;
807 break;
808
809 case EQ:
810 scode = SPU_EQ;
811 break;
812 case GT:
813 scode = SPU_GT;
814 break;
815 case GTU:
816 scode = SPU_GTU;
817 break;
818 default:
819 scode = SPU_EQ;
820 break;
821 }
822
644459d0 823 switch (op_mode)
824 {
825 case QImode:
826 index = 0;
827 comp_mode = QImode;
828 break;
829 case HImode:
830 index = 1;
831 comp_mode = HImode;
832 break;
833 case SImode:
834 index = 2;
835 break;
836 case DImode:
837 index = 3;
838 break;
839 case TImode:
840 index = 4;
841 break;
842 case SFmode:
843 index = 5;
844 break;
845 case DFmode:
846 index = 6;
847 break;
848 case V16QImode:
5474166e 849 index = 7;
850 comp_mode = op_mode;
851 break;
644459d0 852 case V8HImode:
5474166e 853 index = 8;
854 comp_mode = op_mode;
855 break;
644459d0 856 case V4SImode:
5474166e 857 index = 9;
858 comp_mode = op_mode;
859 break;
644459d0 860 case V4SFmode:
5474166e 861 index = 10;
862 comp_mode = V4SImode;
863 break;
644459d0 864 case V2DFmode:
5474166e 865 index = 11;
866 comp_mode = V2DImode;
644459d0 867 break;
5474166e 868 case V2DImode:
644459d0 869 default:
870 abort ();
871 }
872
74f4459c 873 if (GET_MODE (op1) == DFmode
07027691 874 && (scode != SPU_GT && scode != SPU_EQ))
875 abort ();
644459d0 876
74f4459c 877 if (is_set == 0 && op1 == const0_rtx
878 && (GET_MODE (op0) == SImode
686195ea 879 || GET_MODE (op0) == HImode
880 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
644459d0 881 {
882 /* Don't need to set a register with the result when we are
883 comparing against zero and branching. */
884 reverse_test = !reverse_test;
74f4459c 885 compare_result = op0;
644459d0 886 }
887 else
888 {
889 compare_result = gen_reg_rtx (comp_mode);
890
891 if (reverse_compare)
892 {
74f4459c 893 rtx t = op1;
894 op1 = op0;
895 op0 = t;
644459d0 896 }
897
898 if (spu_comp_icode[index][scode] == 0)
899 abort ();
900
901 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 902 (op0, op_mode))
903 op0 = force_reg (op_mode, op0);
644459d0 904 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 905 (op1, op_mode))
906 op1 = force_reg (op_mode, op1);
644459d0 907 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 908 op0, op1);
644459d0 909 if (comp_rtx == 0)
910 abort ();
911 emit_insn (comp_rtx);
912
5d70b918 913 if (eq_test)
914 {
915 eq_result = gen_reg_rtx (comp_mode);
916 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 917 op0, op1);
5d70b918 918 if (eq_rtx == 0)
919 abort ();
920 emit_insn (eq_rtx);
d6bf3b14 921 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 922 gcc_assert (ior_code != CODE_FOR_nothing);
923 emit_insn (GEN_FCN (ior_code)
924 (compare_result, compare_result, eq_result));
925 }
644459d0 926 }
927
928 if (is_set == 0)
929 {
930 rtx bcomp;
931 rtx loc_ref;
932
933 /* We don't have branch on QI compare insns, so we convert the
934 QI compare result to a HI result. */
935 if (comp_mode == QImode)
936 {
937 rtx old_res = compare_result;
938 compare_result = gen_reg_rtx (HImode);
939 comp_mode = HImode;
940 emit_insn (gen_extendqihi2 (compare_result, old_res));
941 }
942
943 if (reverse_test)
944 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
945 else
946 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
947
74f4459c 948 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 949 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
950 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
951 loc_ref, pc_rtx)));
952 }
953 else if (is_set == 2)
954 {
74f4459c 955 rtx target = operands[0];
644459d0 956 int compare_size = GET_MODE_BITSIZE (comp_mode);
957 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
3754d046 958 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
644459d0 959 rtx select_mask;
960 rtx op_t = operands[2];
961 rtx op_f = operands[3];
962
963 /* The result of the comparison can be SI, HI or QI mode. Create a
964 mask based on that result. */
965 if (target_size > compare_size)
966 {
967 select_mask = gen_reg_rtx (mode);
968 emit_insn (gen_extend_compare (select_mask, compare_result));
969 }
970 else if (target_size < compare_size)
971 select_mask =
972 gen_rtx_SUBREG (mode, compare_result,
973 (compare_size - target_size) / BITS_PER_UNIT);
974 else if (comp_mode != mode)
975 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
976 else
977 select_mask = compare_result;
978
979 if (GET_MODE (target) != GET_MODE (op_t)
980 || GET_MODE (target) != GET_MODE (op_f))
981 abort ();
982
983 if (reverse_test)
984 emit_insn (gen_selb (target, op_t, op_f, select_mask));
985 else
986 emit_insn (gen_selb (target, op_f, op_t, select_mask));
987 }
988 else
989 {
74f4459c 990 rtx target = operands[0];
644459d0 991 if (reverse_test)
992 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
993 gen_rtx_NOT (comp_mode, compare_result)));
994 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
995 emit_insn (gen_extendhisi2 (target, compare_result));
996 else if (GET_MODE (target) == SImode
997 && GET_MODE (compare_result) == QImode)
998 emit_insn (gen_extend_compare (target, compare_result));
999 else
1000 emit_move_insn (target, compare_result);
1001 }
1002}
1003
1004HOST_WIDE_INT
1005const_double_to_hwint (rtx x)
1006{
1007 HOST_WIDE_INT val;
1008 REAL_VALUE_TYPE rv;
1009 if (GET_MODE (x) == SFmode)
1010 {
1011 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1012 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1013 }
1014 else if (GET_MODE (x) == DFmode)
1015 {
1016 long l[2];
1017 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1018 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1019 val = l[0];
1020 val = (val << 32) | (l[1] & 0xffffffff);
1021 }
1022 else
1023 abort ();
1024 return val;
1025}
1026
1027rtx
3754d046 1028hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
644459d0 1029{
1030 long tv[2];
1031 REAL_VALUE_TYPE rv;
1032 gcc_assert (mode == SFmode || mode == DFmode);
1033
1034 if (mode == SFmode)
1035 tv[0] = (v << 32) >> 32;
1036 else if (mode == DFmode)
1037 {
1038 tv[1] = (v << 32) >> 32;
1039 tv[0] = v >> 32;
1040 }
1041 real_from_target (&rv, tv, mode);
1042 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1043}
1044
1045void
1046print_operand_address (FILE * file, register rtx addr)
1047{
1048 rtx reg;
1049 rtx offset;
1050
e04cf423 1051 if (GET_CODE (addr) == AND
1052 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1053 && INTVAL (XEXP (addr, 1)) == -16)
1054 addr = XEXP (addr, 0);
1055
644459d0 1056 switch (GET_CODE (addr))
1057 {
1058 case REG:
1059 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1060 break;
1061
1062 case PLUS:
1063 reg = XEXP (addr, 0);
1064 offset = XEXP (addr, 1);
1065 if (GET_CODE (offset) == REG)
1066 {
1067 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1068 reg_names[REGNO (offset)]);
1069 }
1070 else if (GET_CODE (offset) == CONST_INT)
1071 {
1072 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1073 INTVAL (offset), reg_names[REGNO (reg)]);
1074 }
1075 else
1076 abort ();
1077 break;
1078
1079 case CONST:
1080 case LABEL_REF:
1081 case SYMBOL_REF:
1082 case CONST_INT:
1083 output_addr_const (file, addr);
1084 break;
1085
1086 default:
1087 debug_rtx (addr);
1088 abort ();
1089 }
1090}
1091
1092void
1093print_operand (FILE * file, rtx x, int code)
1094{
3754d046 1095 machine_mode mode = GET_MODE (x);
644459d0 1096 HOST_WIDE_INT val;
1097 unsigned char arr[16];
1098 int xcode = GET_CODE (x);
dea01258 1099 int i, info;
644459d0 1100 if (GET_MODE (x) == VOIDmode)
1101 switch (code)
1102 {
644459d0 1103 case 'L': /* 128 bits, signed */
1104 case 'm': /* 128 bits, signed */
1105 case 'T': /* 128 bits, signed */
1106 case 't': /* 128 bits, signed */
1107 mode = TImode;
1108 break;
644459d0 1109 case 'K': /* 64 bits, signed */
1110 case 'k': /* 64 bits, signed */
1111 case 'D': /* 64 bits, signed */
1112 case 'd': /* 64 bits, signed */
1113 mode = DImode;
1114 break;
644459d0 1115 case 'J': /* 32 bits, signed */
1116 case 'j': /* 32 bits, signed */
1117 case 's': /* 32 bits, signed */
1118 case 'S': /* 32 bits, signed */
1119 mode = SImode;
1120 break;
1121 }
1122 switch (code)
1123 {
1124
1125 case 'j': /* 32 bits, signed */
1126 case 'k': /* 64 bits, signed */
1127 case 'm': /* 128 bits, signed */
1128 if (xcode == CONST_INT
1129 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1130 {
1131 gcc_assert (logical_immediate_p (x, mode));
1132 constant_to_array (mode, x, arr);
1133 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1134 val = trunc_int_for_mode (val, SImode);
1135 switch (which_logical_immediate (val))
1136 {
1137 case SPU_ORI:
1138 break;
1139 case SPU_ORHI:
1140 fprintf (file, "h");
1141 break;
1142 case SPU_ORBI:
1143 fprintf (file, "b");
1144 break;
1145 default:
1146 gcc_unreachable();
1147 }
1148 }
1149 else
1150 gcc_unreachable();
1151 return;
1152
1153 case 'J': /* 32 bits, signed */
1154 case 'K': /* 64 bits, signed */
1155 case 'L': /* 128 bits, signed */
1156 if (xcode == CONST_INT
1157 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1158 {
1159 gcc_assert (logical_immediate_p (x, mode)
1160 || iohl_immediate_p (x, mode));
1161 constant_to_array (mode, x, arr);
1162 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1163 val = trunc_int_for_mode (val, SImode);
1164 switch (which_logical_immediate (val))
1165 {
1166 case SPU_ORI:
1167 case SPU_IOHL:
1168 break;
1169 case SPU_ORHI:
1170 val = trunc_int_for_mode (val, HImode);
1171 break;
1172 case SPU_ORBI:
1173 val = trunc_int_for_mode (val, QImode);
1174 break;
1175 default:
1176 gcc_unreachable();
1177 }
1178 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1179 }
1180 else
1181 gcc_unreachable();
1182 return;
1183
1184 case 't': /* 128 bits, signed */
1185 case 'd': /* 64 bits, signed */
1186 case 's': /* 32 bits, signed */
dea01258 1187 if (CONSTANT_P (x))
644459d0 1188 {
dea01258 1189 enum immediate_class c = classify_immediate (x, mode);
1190 switch (c)
1191 {
1192 case IC_IL1:
1193 constant_to_array (mode, x, arr);
1194 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1195 val = trunc_int_for_mode (val, SImode);
1196 switch (which_immediate_load (val))
1197 {
1198 case SPU_IL:
1199 break;
1200 case SPU_ILA:
1201 fprintf (file, "a");
1202 break;
1203 case SPU_ILH:
1204 fprintf (file, "h");
1205 break;
1206 case SPU_ILHU:
1207 fprintf (file, "hu");
1208 break;
1209 default:
1210 gcc_unreachable ();
1211 }
1212 break;
1213 case IC_CPAT:
1214 constant_to_array (mode, x, arr);
1215 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1216 if (info == 1)
1217 fprintf (file, "b");
1218 else if (info == 2)
1219 fprintf (file, "h");
1220 else if (info == 4)
1221 fprintf (file, "w");
1222 else if (info == 8)
1223 fprintf (file, "d");
1224 break;
1225 case IC_IL1s:
1226 if (xcode == CONST_VECTOR)
1227 {
1228 x = CONST_VECTOR_ELT (x, 0);
1229 xcode = GET_CODE (x);
1230 }
1231 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1232 fprintf (file, "a");
1233 else if (xcode == HIGH)
1234 fprintf (file, "hu");
1235 break;
1236 case IC_FSMBI:
5df189be 1237 case IC_FSMBI2:
dea01258 1238 case IC_IL2:
1239 case IC_IL2s:
1240 case IC_POOL:
1241 abort ();
1242 }
644459d0 1243 }
644459d0 1244 else
1245 gcc_unreachable ();
1246 return;
1247
1248 case 'T': /* 128 bits, signed */
1249 case 'D': /* 64 bits, signed */
1250 case 'S': /* 32 bits, signed */
dea01258 1251 if (CONSTANT_P (x))
644459d0 1252 {
dea01258 1253 enum immediate_class c = classify_immediate (x, mode);
1254 switch (c)
644459d0 1255 {
dea01258 1256 case IC_IL1:
1257 constant_to_array (mode, x, arr);
1258 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1259 val = trunc_int_for_mode (val, SImode);
1260 switch (which_immediate_load (val))
1261 {
1262 case SPU_IL:
1263 case SPU_ILA:
1264 break;
1265 case SPU_ILH:
1266 case SPU_ILHU:
1267 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1268 break;
1269 default:
1270 gcc_unreachable ();
1271 }
1272 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1273 break;
1274 case IC_FSMBI:
1275 constant_to_array (mode, x, arr);
1276 val = 0;
1277 for (i = 0; i < 16; i++)
1278 {
1279 val <<= 1;
1280 val |= arr[i] & 1;
1281 }
1282 print_operand (file, GEN_INT (val), 0);
1283 break;
1284 case IC_CPAT:
1285 constant_to_array (mode, x, arr);
1286 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1288 break;
dea01258 1289 case IC_IL1s:
dea01258 1290 if (xcode == HIGH)
5df189be 1291 x = XEXP (x, 0);
1292 if (GET_CODE (x) == CONST_VECTOR)
1293 x = CONST_VECTOR_ELT (x, 0);
1294 output_addr_const (file, x);
1295 if (xcode == HIGH)
1296 fprintf (file, "@h");
644459d0 1297 break;
dea01258 1298 case IC_IL2:
1299 case IC_IL2s:
5df189be 1300 case IC_FSMBI2:
dea01258 1301 case IC_POOL:
1302 abort ();
644459d0 1303 }
c8befdb9 1304 }
644459d0 1305 else
1306 gcc_unreachable ();
1307 return;
1308
644459d0 1309 case 'C':
1310 if (xcode == CONST_INT)
1311 {
1312 /* Only 4 least significant bits are relevant for generate
1313 control word instructions. */
1314 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1315 return;
1316 }
1317 break;
1318
1319 case 'M': /* print code for c*d */
1320 if (GET_CODE (x) == CONST_INT)
1321 switch (INTVAL (x))
1322 {
1323 case 1:
1324 fprintf (file, "b");
1325 break;
1326 case 2:
1327 fprintf (file, "h");
1328 break;
1329 case 4:
1330 fprintf (file, "w");
1331 break;
1332 case 8:
1333 fprintf (file, "d");
1334 break;
1335 default:
1336 gcc_unreachable();
1337 }
1338 else
1339 gcc_unreachable();
1340 return;
1341
1342 case 'N': /* Negate the operand */
1343 if (xcode == CONST_INT)
1344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1345 else if (xcode == CONST_VECTOR)
1346 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1347 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1348 return;
1349
1350 case 'I': /* enable/disable interrupts */
1351 if (xcode == CONST_INT)
1352 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1353 return;
1354
1355 case 'b': /* branch modifiers */
1356 if (xcode == REG)
1357 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1358 else if (COMPARISON_P (x))
1359 fprintf (file, "%s", xcode == NE ? "n" : "");
1360 return;
1361
1362 case 'i': /* indirect call */
1363 if (xcode == MEM)
1364 {
1365 if (GET_CODE (XEXP (x, 0)) == REG)
1366 /* Used in indirect function calls. */
1367 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1368 else
1369 output_address (XEXP (x, 0));
1370 }
1371 return;
1372
1373 case 'p': /* load/store */
1374 if (xcode == MEM)
1375 {
1376 x = XEXP (x, 0);
1377 xcode = GET_CODE (x);
1378 }
e04cf423 1379 if (xcode == AND)
1380 {
1381 x = XEXP (x, 0);
1382 xcode = GET_CODE (x);
1383 }
644459d0 1384 if (xcode == REG)
1385 fprintf (file, "d");
1386 else if (xcode == CONST_INT)
1387 fprintf (file, "a");
1388 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1389 fprintf (file, "r");
1390 else if (xcode == PLUS || xcode == LO_SUM)
1391 {
1392 if (GET_CODE (XEXP (x, 1)) == REG)
1393 fprintf (file, "x");
1394 else
1395 fprintf (file, "d");
1396 }
1397 return;
1398
5df189be 1399 case 'e':
1400 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1401 val &= 0x7;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1404
1405 case 'f':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val &= 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'g':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val &= 0x3f;
1414 output_addr_const (file, GEN_INT (val));
1415 return;
1416
1417 case 'h':
1418 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1419 val = (val >> 3) & 0x1f;
1420 output_addr_const (file, GEN_INT (val));
1421 return;
1422
1423 case 'E':
1424 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1425 val = -val;
1426 val &= 0x7;
1427 output_addr_const (file, GEN_INT (val));
1428 return;
1429
1430 case 'F':
1431 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1432 val = -val;
1433 val &= 0x1f;
1434 output_addr_const (file, GEN_INT (val));
1435 return;
1436
1437 case 'G':
1438 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1439 val = -val;
1440 val &= 0x3f;
1441 output_addr_const (file, GEN_INT (val));
1442 return;
1443
1444 case 'H':
1445 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1446 val = -(val & -8ll);
1447 val = (val >> 3) & 0x1f;
1448 output_addr_const (file, GEN_INT (val));
1449 return;
1450
56c7bfc2 1451 case 'v':
1452 case 'w':
1453 constant_to_array (mode, x, arr);
1454 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1455 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1456 return;
1457
644459d0 1458 case 0:
1459 if (xcode == REG)
1460 fprintf (file, "%s", reg_names[REGNO (x)]);
1461 else if (xcode == MEM)
1462 output_address (XEXP (x, 0));
1463 else if (xcode == CONST_VECTOR)
dea01258 1464 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1465 else
1466 output_addr_const (file, x);
1467 return;
1468
f6a0d06f 1469 /* unused letters
56c7bfc2 1470 o qr u yz
5df189be 1471 AB OPQR UVWXYZ */
644459d0 1472 default:
1473 output_operand_lossage ("invalid %%xn code");
1474 }
1475 gcc_unreachable ();
1476}
1477
644459d0 1478/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1479 caller saved register. For leaf functions it is more efficient to
1480 use a volatile register because we won't need to save and restore the
1481 pic register. This routine is only valid after register allocation
1482 is completed, so we can pick an unused register. */
1483static rtx
1484get_pic_reg (void)
1485{
644459d0 1486 if (!reload_completed && !reload_in_progress)
1487 abort ();
5eb28709 1488
1489 /* If we've already made the decision, we need to keep with it. Once we've
1490 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1491 return true since the register is now live; this should not cause us to
1492 "switch back" to using pic_offset_table_rtx. */
1493 if (!cfun->machine->pic_reg)
1494 {
d5bf7b64 1495 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
5eb28709 1496 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1497 else
1498 cfun->machine->pic_reg = pic_offset_table_rtx;
1499 }
1500
1501 return cfun->machine->pic_reg;
644459d0 1502}
1503
5df189be 1504/* Split constant addresses to handle cases that are too large.
1505 Add in the pic register when in PIC mode.
1506 Split immediates that require more than 1 instruction. */
dea01258 1507int
1508spu_split_immediate (rtx * ops)
c8befdb9 1509{
3754d046 1510 machine_mode mode = GET_MODE (ops[0]);
dea01258 1511 enum immediate_class c = classify_immediate (ops[1], mode);
1512
1513 switch (c)
c8befdb9 1514 {
dea01258 1515 case IC_IL2:
1516 {
1517 unsigned char arrhi[16];
1518 unsigned char arrlo[16];
98bbec1e 1519 rtx to, temp, hi, lo;
dea01258 1520 int i;
3754d046 1521 machine_mode imode = mode;
98bbec1e 1522 /* We need to do reals as ints because the constant used in the
1523 IOR might not be a legitimate real constant. */
1524 imode = int_mode_for_mode (mode);
dea01258 1525 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1526 if (imode != mode)
1527 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1528 else
1529 to = ops[0];
1530 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1531 for (i = 0; i < 16; i += 4)
1532 {
1533 arrlo[i + 2] = arrhi[i + 2];
1534 arrlo[i + 3] = arrhi[i + 3];
1535 arrlo[i + 0] = arrlo[i + 1] = 0;
1536 arrhi[i + 2] = arrhi[i + 3] = 0;
1537 }
98bbec1e 1538 hi = array_to_constant (imode, arrhi);
1539 lo = array_to_constant (imode, arrlo);
1540 emit_move_insn (temp, hi);
dea01258 1541 emit_insn (gen_rtx_SET
98bbec1e 1542 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1543 return 1;
1544 }
5df189be 1545 case IC_FSMBI2:
1546 {
1547 unsigned char arr_fsmbi[16];
1548 unsigned char arr_andbi[16];
1549 rtx to, reg_fsmbi, reg_and;
1550 int i;
3754d046 1551 machine_mode imode = mode;
5df189be 1552 /* We need to do reals as ints because the constant used in the
1553 * AND might not be a legitimate real constant. */
1554 imode = int_mode_for_mode (mode);
1555 constant_to_array (mode, ops[1], arr_fsmbi);
1556 if (imode != mode)
1557 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1558 else
1559 to = ops[0];
1560 for (i = 0; i < 16; i++)
1561 if (arr_fsmbi[i] != 0)
1562 {
1563 arr_andbi[0] = arr_fsmbi[i];
1564 arr_fsmbi[i] = 0xff;
1565 }
1566 for (i = 1; i < 16; i++)
1567 arr_andbi[i] = arr_andbi[0];
1568 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1569 reg_and = array_to_constant (imode, arr_andbi);
1570 emit_move_insn (to, reg_fsmbi);
1571 emit_insn (gen_rtx_SET
1572 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1573 return 1;
1574 }
dea01258 1575 case IC_POOL:
1576 if (reload_in_progress || reload_completed)
1577 {
1578 rtx mem = force_const_mem (mode, ops[1]);
1579 if (TARGET_LARGE_MEM)
1580 {
1581 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1582 emit_move_insn (addr, XEXP (mem, 0));
1583 mem = replace_equiv_address (mem, addr);
1584 }
1585 emit_move_insn (ops[0], mem);
1586 return 1;
1587 }
1588 break;
1589 case IC_IL1s:
1590 case IC_IL2s:
1591 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1592 {
1593 if (c == IC_IL2s)
1594 {
5df189be 1595 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1596 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1597 }
1598 else if (flag_pic)
1599 emit_insn (gen_pic (ops[0], ops[1]));
1600 if (flag_pic)
1601 {
1602 rtx pic_reg = get_pic_reg ();
1603 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
dea01258 1604 }
1605 return flag_pic || c == IC_IL2s;
1606 }
1607 break;
1608 case IC_IL1:
1609 case IC_FSMBI:
1610 case IC_CPAT:
1611 break;
c8befdb9 1612 }
dea01258 1613 return 0;
c8befdb9 1614}
1615
644459d0 1616/* SAVING is TRUE when we are generating the actual load and store
1617 instructions for REGNO. When determining the size of the stack
1618 needed for saving register we must allocate enough space for the
1619 worst case, because we don't always have the information early enough
1620 to not allocate it. But we can at least eliminate the actual loads
1621 and stores during the prologue/epilogue. */
1622static int
1623need_to_save_reg (int regno, int saving)
1624{
3072d30e 1625 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1626 return 1;
1627 if (flag_pic
1628 && regno == PIC_OFFSET_TABLE_REGNUM
5eb28709 1629 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
644459d0 1630 return 1;
1631 return 0;
1632}
1633
1634/* This function is only correct starting with local register
1635 allocation */
1636int
1637spu_saved_regs_size (void)
1638{
1639 int reg_save_size = 0;
1640 int regno;
1641
1642 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1643 if (need_to_save_reg (regno, 0))
1644 reg_save_size += 0x10;
1645 return reg_save_size;
1646}
1647
0af56f80 1648static rtx_insn *
644459d0 1649frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1650{
1651 rtx reg = gen_rtx_REG (V4SImode, regno);
1652 rtx mem =
1653 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1654 return emit_insn (gen_movv4si (mem, reg));
1655}
1656
0af56f80 1657static rtx_insn *
644459d0 1658frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1659{
1660 rtx reg = gen_rtx_REG (V4SImode, regno);
1661 rtx mem =
1662 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1663 return emit_insn (gen_movv4si (reg, mem));
1664}
1665
1666/* This happens after reload, so we need to expand it. */
0af56f80 1667static rtx_insn *
644459d0 1668frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1669{
0af56f80 1670 rtx_insn *insn;
644459d0 1671 if (satisfies_constraint_K (GEN_INT (imm)))
1672 {
1673 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1674 }
1675 else
1676 {
3072d30e 1677 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1678 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1679 if (REGNO (src) == REGNO (scratch))
1680 abort ();
1681 }
644459d0 1682 return insn;
1683}
1684
1685/* Return nonzero if this function is known to have a null epilogue. */
1686
1687int
1688direct_return (void)
1689{
1690 if (reload_completed)
1691 {
1692 if (cfun->static_chain_decl == 0
1693 && (spu_saved_regs_size ()
1694 + get_frame_size ()
abe32cce 1695 + crtl->outgoing_args_size
1696 + crtl->args.pretend_args_size == 0)
d5bf7b64 1697 && crtl->is_leaf)
644459d0 1698 return 1;
1699 }
1700 return 0;
1701}
1702
1703/*
1704 The stack frame looks like this:
1705 +-------------+
1706 | incoming |
a8e019fa 1707 | args |
1708 AP -> +-------------+
644459d0 1709 | $lr save |
1710 +-------------+
1711 prev SP | back chain |
1712 +-------------+
1713 | var args |
abe32cce 1714 | reg save | crtl->args.pretend_args_size bytes
644459d0 1715 +-------------+
1716 | ... |
1717 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1718 FP -> +-------------+
644459d0 1719 | ... |
a8e019fa 1720 | vars | get_frame_size() bytes
1721 HFP -> +-------------+
644459d0 1722 | ... |
1723 | outgoing |
abe32cce 1724 | args | crtl->outgoing_args_size bytes
644459d0 1725 +-------------+
1726 | $lr of next |
1727 | frame |
1728 +-------------+
a8e019fa 1729 | back chain |
1730 SP -> +-------------+
644459d0 1731
1732*/
1733void
1734spu_expand_prologue (void)
1735{
1736 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1737 HOST_WIDE_INT total_size;
1738 HOST_WIDE_INT saved_regs_size;
1739 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1740 rtx scratch_reg_0, scratch_reg_1;
0af56f80 1741 rtx_insn *insn;
1742 rtx real;
644459d0 1743
5eb28709 1744 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1745 cfun->machine->pic_reg = pic_offset_table_rtx;
644459d0 1746
1747 if (spu_naked_function_p (current_function_decl))
1748 return;
1749
1750 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1751 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1752
1753 saved_regs_size = spu_saved_regs_size ();
1754 total_size = size + saved_regs_size
abe32cce 1755 + crtl->outgoing_args_size
1756 + crtl->args.pretend_args_size;
644459d0 1757
d5bf7b64 1758 if (!crtl->is_leaf
18d50ae6 1759 || cfun->calls_alloca || total_size > 0)
644459d0 1760 total_size += STACK_POINTER_OFFSET;
1761
1762 /* Save this first because code after this might use the link
1763 register as a scratch register. */
d5bf7b64 1764 if (!crtl->is_leaf)
644459d0 1765 {
1766 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1767 RTX_FRAME_RELATED_P (insn) = 1;
1768 }
1769
1770 if (total_size > 0)
1771 {
abe32cce 1772 offset = -crtl->args.pretend_args_size;
644459d0 1773 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1774 if (need_to_save_reg (regno, 1))
1775 {
1776 offset -= 16;
1777 insn = frame_emit_store (regno, sp_reg, offset);
1778 RTX_FRAME_RELATED_P (insn) = 1;
1779 }
1780 }
1781
5eb28709 1782 if (flag_pic && cfun->machine->pic_reg)
644459d0 1783 {
5eb28709 1784 rtx pic_reg = cfun->machine->pic_reg;
644459d0 1785 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1786 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1787 }
1788
1789 if (total_size > 0)
1790 {
1791 if (flag_stack_check)
1792 {
d819917f 1793 /* We compare against total_size-1 because
644459d0 1794 ($sp >= total_size) <=> ($sp > total_size-1) */
1795 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1796 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1797 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1798 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1799 {
1800 emit_move_insn (scratch_v4si, size_v4si);
1801 size_v4si = scratch_v4si;
1802 }
1803 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1804 emit_insn (gen_vec_extractv4si
1805 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1806 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1807 }
1808
1809 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1810 the value of the previous $sp because we save it as the back
1811 chain. */
1812 if (total_size <= 2000)
1813 {
1814 /* In this case we save the back chain first. */
1815 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1816 insn =
1817 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1818 }
644459d0 1819 else
1820 {
1821 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1822 insn =
1823 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1824 }
1825 RTX_FRAME_RELATED_P (insn) = 1;
1826 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1827 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1828
1829 if (total_size > 2000)
1830 {
1831 /* Save the back chain ptr */
1832 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1833 }
1834
1835 if (frame_pointer_needed)
1836 {
1837 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1838 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1839 + crtl->outgoing_args_size;
644459d0 1840 /* Set the new frame_pointer */
d8dfeb55 1841 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1842 RTX_FRAME_RELATED_P (insn) = 1;
1843 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1844 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1845 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1846 }
1847 }
1848
8c0dd614 1849 if (flag_stack_usage_info)
a512540d 1850 current_function_static_stack_size = total_size;
644459d0 1851}
1852
1853void
1854spu_expand_epilogue (bool sibcall_p)
1855{
1856 int size = get_frame_size (), offset, regno;
1857 HOST_WIDE_INT saved_regs_size, total_size;
1858 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 1859 rtx scratch_reg_0;
644459d0 1860
644459d0 1861 if (spu_naked_function_p (current_function_decl))
1862 return;
1863
1864 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1865
1866 saved_regs_size = spu_saved_regs_size ();
1867 total_size = size + saved_regs_size
abe32cce 1868 + crtl->outgoing_args_size
1869 + crtl->args.pretend_args_size;
644459d0 1870
d5bf7b64 1871 if (!crtl->is_leaf
18d50ae6 1872 || cfun->calls_alloca || total_size > 0)
644459d0 1873 total_size += STACK_POINTER_OFFSET;
1874
1875 if (total_size > 0)
1876 {
18d50ae6 1877 if (cfun->calls_alloca)
644459d0 1878 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1879 else
1880 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1881
1882
1883 if (saved_regs_size > 0)
1884 {
abe32cce 1885 offset = -crtl->args.pretend_args_size;
644459d0 1886 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1887 if (need_to_save_reg (regno, 1))
1888 {
1889 offset -= 0x10;
1890 frame_emit_load (regno, sp_reg, offset);
1891 }
1892 }
1893 }
1894
d5bf7b64 1895 if (!crtl->is_leaf)
644459d0 1896 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1897
1898 if (!sibcall_p)
1899 {
18b42941 1900 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 1901 emit_jump_insn (gen__return ());
644459d0 1902 }
644459d0 1903}
1904
1905rtx
1906spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1907{
1908 if (count != 0)
1909 return 0;
1910 /* This is inefficient because it ends up copying to a save-register
1911 which then gets saved even though $lr has already been saved. But
1912 it does generate better code for leaf functions and we don't need
1913 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1914 used for __builtin_return_address anyway, so maybe we don't care if
1915 it's inefficient. */
1916 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1917}
1918\f
1919
1920/* Given VAL, generate a constant appropriate for MODE.
1921 If MODE is a vector mode, every element will be VAL.
1922 For TImode, VAL will be zero extended to 128 bits. */
1923rtx
3754d046 1924spu_const (machine_mode mode, HOST_WIDE_INT val)
644459d0 1925{
1926 rtx inner;
1927 rtvec v;
1928 int units, i;
1929
1930 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1931 || GET_MODE_CLASS (mode) == MODE_FLOAT
1932 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1933 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1934
1935 if (GET_MODE_CLASS (mode) == MODE_INT)
1936 return immed_double_const (val, 0, mode);
1937
1938 /* val is the bit representation of the float */
1939 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1940 return hwint_to_const_double (mode, val);
1941
1942 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1943 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1944 else
1945 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1946
1947 units = GET_MODE_NUNITS (mode);
1948
1949 v = rtvec_alloc (units);
1950
1951 for (i = 0; i < units; ++i)
1952 RTVEC_ELT (v, i) = inner;
1953
1954 return gen_rtx_CONST_VECTOR (mode, v);
1955}
644459d0 1956
5474166e 1957/* Create a MODE vector constant from 4 ints. */
1958rtx
3754d046 1959spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
5474166e 1960{
1961 unsigned char arr[16];
1962 arr[0] = (a >> 24) & 0xff;
1963 arr[1] = (a >> 16) & 0xff;
1964 arr[2] = (a >> 8) & 0xff;
1965 arr[3] = (a >> 0) & 0xff;
1966 arr[4] = (b >> 24) & 0xff;
1967 arr[5] = (b >> 16) & 0xff;
1968 arr[6] = (b >> 8) & 0xff;
1969 arr[7] = (b >> 0) & 0xff;
1970 arr[8] = (c >> 24) & 0xff;
1971 arr[9] = (c >> 16) & 0xff;
1972 arr[10] = (c >> 8) & 0xff;
1973 arr[11] = (c >> 0) & 0xff;
1974 arr[12] = (d >> 24) & 0xff;
1975 arr[13] = (d >> 16) & 0xff;
1976 arr[14] = (d >> 8) & 0xff;
1977 arr[15] = (d >> 0) & 0xff;
1978 return array_to_constant(mode, arr);
1979}
5a976006 1980\f
1981/* branch hint stuff */
5474166e 1982
644459d0 1983/* An array of these is used to propagate hints to predecessor blocks. */
1984struct spu_bb_info
1985{
0af56f80 1986 rtx_insn *prop_jump; /* propagated from another block */
5a976006 1987 int bb_index; /* the original block. */
644459d0 1988};
5a976006 1989static struct spu_bb_info *spu_bb_info;
644459d0 1990
5a976006 1991#define STOP_HINT_P(INSN) \
aa90bb35 1992 (CALL_P(INSN) \
5a976006 1993 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1994 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1995
1996/* 1 when RTX is a hinted branch or its target. We keep track of
1997 what has been hinted so the safe-hint code can test it easily. */
1998#define HINTED_P(RTX) \
1999 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2000
2001/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2002#define SCHED_ON_EVEN_P(RTX) \
2003 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2004
2005/* Emit a nop for INSN such that the two will dual issue. This assumes
2006 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2007 We check for TImode to handle a MULTI1 insn which has dual issued its
b1135d9a 2008 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
5a976006 2009static void
0af56f80 2010emit_nop_for_insn (rtx_insn *insn)
644459d0 2011{
5a976006 2012 int p;
0af56f80 2013 rtx_insn *new_insn;
b1135d9a 2014
2015 /* We need to handle JUMP_TABLE_DATA separately. */
2016 if (JUMP_TABLE_DATA_P (insn))
2017 {
2018 new_insn = emit_insn_after (gen_lnop(), insn);
2019 recog_memoized (new_insn);
2020 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2021 return;
2022 }
2023
5a976006 2024 p = get_pipe (insn);
2025 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2026 new_insn = emit_insn_after (gen_lnop (), insn);
2027 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2028 {
5a976006 2029 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2030 PUT_MODE (new_insn, TImode);
2031 PUT_MODE (insn, VOIDmode);
2032 }
2033 else
2034 new_insn = emit_insn_after (gen_lnop (), insn);
2035 recog_memoized (new_insn);
d53c050c 2036 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
5a976006 2037}
2038
2039/* Insert nops in basic blocks to meet dual issue alignment
2040 requirements. Also make sure hbrp and hint instructions are at least
2041 one cycle apart, possibly inserting a nop. */
2042static void
2043pad_bb(void)
2044{
0af56f80 2045 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
5a976006 2046 int length;
2047 int addr;
2048
2049 /* This sets up INSN_ADDRESSES. */
2050 shorten_branches (get_insns ());
2051
2052 /* Keep track of length added by nops. */
2053 length = 0;
2054
2055 prev_insn = 0;
2056 insn = get_insns ();
2057 if (!active_insn_p (insn))
2058 insn = next_active_insn (insn);
2059 for (; insn; insn = next_insn)
2060 {
2061 next_insn = next_active_insn (insn);
2062 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2063 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2064 {
5a976006 2065 if (hbr_insn)
2066 {
2067 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2068 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2069 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2070 || (a1 - a0 == 4))
2071 {
2072 prev_insn = emit_insn_before (gen_lnop (), insn);
2073 PUT_MODE (prev_insn, GET_MODE (insn));
2074 PUT_MODE (insn, TImode);
d53c050c 2075 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
5a976006 2076 length += 4;
2077 }
2078 }
2079 hbr_insn = insn;
2080 }
4f8e39e2 2081 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
5a976006 2082 {
2083 if (GET_MODE (insn) == TImode)
2084 PUT_MODE (next_insn, TImode);
2085 insn = next_insn;
2086 next_insn = next_active_insn (insn);
2087 }
2088 addr = INSN_ADDRESSES (INSN_UID (insn));
2089 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2090 {
2091 if (((addr + length) & 7) != 0)
2092 {
2093 emit_nop_for_insn (prev_insn);
2094 length += 4;
2095 }
644459d0 2096 }
5a976006 2097 else if (GET_MODE (insn) == TImode
2098 && ((next_insn && GET_MODE (next_insn) != TImode)
2099 || get_attr_type (insn) == TYPE_MULTI0)
2100 && ((addr + length) & 7) != 0)
2101 {
2102 /* prev_insn will always be set because the first insn is
2103 always 8-byte aligned. */
2104 emit_nop_for_insn (prev_insn);
2105 length += 4;
2106 }
2107 prev_insn = insn;
644459d0 2108 }
644459d0 2109}
2110
5a976006 2111\f
2112/* Routines for branch hints. */
2113
644459d0 2114static void
0af56f80 2115spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
5a976006 2116 int distance, sbitmap blocks)
644459d0 2117{
5a976006 2118 rtx branch_label = 0;
0af56f80 2119 rtx_insn *hint;
2120 rtx_insn *insn;
c86d86ff 2121 rtx_jump_table_data *table;
644459d0 2122
2123 if (before == 0 || branch == 0 || target == 0)
2124 return;
2125
5a976006 2126 /* While scheduling we require hints to be no further than 600, so
2127 we need to enforce that here too */
644459d0 2128 if (distance > 600)
2129 return;
2130
5a976006 2131 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2132 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2133 before = NEXT_INSN (before);
644459d0 2134
2135 branch_label = gen_label_rtx ();
2136 LABEL_NUSES (branch_label)++;
2137 LABEL_PRESERVE_P (branch_label) = 1;
2138 insn = emit_label_before (branch_label, branch);
2139 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
08b7917c 2140 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
5a976006 2141
2142 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2143 recog_memoized (hint);
d53c050c 2144 INSN_LOCATION (hint) = INSN_LOCATION (branch);
5a976006 2145 HINTED_P (branch) = 1;
644459d0 2146
5a976006 2147 if (GET_CODE (target) == LABEL_REF)
2148 HINTED_P (XEXP (target, 0)) = 1;
2149 else if (tablejump_p (branch, 0, &table))
644459d0 2150 {
5a976006 2151 rtvec vec;
2152 int j;
2153 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2154 vec = XVEC (PATTERN (table), 0);
2155 else
2156 vec = XVEC (PATTERN (table), 1);
2157 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2158 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2159 }
5a976006 2160
2161 if (distance >= 588)
644459d0 2162 {
5a976006 2163 /* Make sure the hint isn't scheduled any earlier than this point,
2164 which could make it too far for the branch offest to fit */
2fbdf9ef 2165 insn = emit_insn_before (gen_blockage (), hint);
2166 recog_memoized (insn);
d53c050c 2167 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2168 }
2169 else if (distance <= 8 * 4)
2170 {
2171 /* To guarantee at least 8 insns between the hint and branch we
2172 insert nops. */
2173 int d;
2174 for (d = distance; d < 8 * 4; d += 4)
2175 {
2176 insn =
2177 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2178 recog_memoized (insn);
d53c050c 2179 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2180 }
2181
2182 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2183 insn = emit_insn_after (gen_blockage (), hint);
2184 recog_memoized (insn);
d53c050c 2185 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2186
2187 /* Make sure any nops inserted aren't scheduled after the call. */
2188 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2189 {
2190 insn = emit_insn_before (gen_blockage (), branch);
2191 recog_memoized (insn);
d53c050c 2192 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2fbdf9ef 2193 }
644459d0 2194 }
644459d0 2195}
2196
2197/* Returns 0 if we don't want a hint for this branch. Otherwise return
2198 the rtx for the branch target. */
2199static rtx
0af56f80 2200get_branch_target (rtx_insn *branch)
644459d0 2201{
aa90bb35 2202 if (JUMP_P (branch))
644459d0 2203 {
2204 rtx set, src;
2205
2206 /* Return statements */
2207 if (GET_CODE (PATTERN (branch)) == RETURN)
2208 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2209
fcc31b99 2210 /* ASM GOTOs. */
604157f6 2211 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2212 return NULL;
2213
644459d0 2214 set = single_set (branch);
2215 src = SET_SRC (set);
2216 if (GET_CODE (SET_DEST (set)) != PC)
2217 abort ();
2218
2219 if (GET_CODE (src) == IF_THEN_ELSE)
2220 {
2221 rtx lab = 0;
2222 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2223 if (note)
2224 {
2225 /* If the more probable case is not a fall through, then
2226 try a branch hint. */
9eb946de 2227 int prob = XINT (note, 0);
644459d0 2228 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2229 && GET_CODE (XEXP (src, 1)) != PC)
2230 lab = XEXP (src, 1);
2231 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2232 && GET_CODE (XEXP (src, 2)) != PC)
2233 lab = XEXP (src, 2);
2234 }
2235 if (lab)
2236 {
2237 if (GET_CODE (lab) == RETURN)
2238 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2239 return lab;
2240 }
2241 return 0;
2242 }
2243
2244 return src;
2245 }
aa90bb35 2246 else if (CALL_P (branch))
644459d0 2247 {
2248 rtx call;
2249 /* All of our call patterns are in a PARALLEL and the CALL is
2250 the first pattern in the PARALLEL. */
2251 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2252 abort ();
2253 call = XVECEXP (PATTERN (branch), 0, 0);
2254 if (GET_CODE (call) == SET)
2255 call = SET_SRC (call);
2256 if (GET_CODE (call) != CALL)
2257 abort ();
2258 return XEXP (XEXP (call, 0), 0);
2259 }
2260 return 0;
2261}
2262
5a976006 2263/* The special $hbr register is used to prevent the insn scheduler from
2264 moving hbr insns across instructions which invalidate them. It
2265 should only be used in a clobber, and this function searches for
2266 insns which clobber it. */
2267static bool
0af56f80 2268insn_clobbers_hbr (rtx_insn *insn)
5a976006 2269{
2270 if (INSN_P (insn)
2271 && GET_CODE (PATTERN (insn)) == PARALLEL)
2272 {
2273 rtx parallel = PATTERN (insn);
2274 rtx clobber;
2275 int j;
2276 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2277 {
2278 clobber = XVECEXP (parallel, 0, j);
2279 if (GET_CODE (clobber) == CLOBBER
2280 && GET_CODE (XEXP (clobber, 0)) == REG
2281 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2282 return 1;
2283 }
2284 }
2285 return 0;
2286}
2287
2288/* Search up to 32 insns starting at FIRST:
2289 - at any kind of hinted branch, just return
2290 - at any unconditional branch in the first 15 insns, just return
2291 - at a call or indirect branch, after the first 15 insns, force it to
2292 an even address and return
2293 - at any unconditional branch, after the first 15 insns, force it to
2294 an even address.
2295 At then end of the search, insert an hbrp within 4 insns of FIRST,
2296 and an hbrp within 16 instructions of FIRST.
2297 */
644459d0 2298static void
0af56f80 2299insert_hbrp_for_ilb_runout (rtx_insn *first)
644459d0 2300{
0af56f80 2301 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
5a976006 2302 int addr = 0, length, first_addr = -1;
2303 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2304 int insert_lnop_after = 0;
2305 for (insn = first; insn; insn = NEXT_INSN (insn))
2306 if (INSN_P (insn))
2307 {
2308 if (first_addr == -1)
2309 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2310 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2311 length = get_attr_length (insn);
2312
2313 if (before_4 == 0 && addr + length >= 4 * 4)
2314 before_4 = insn;
2315 /* We test for 14 instructions because the first hbrp will add
2316 up to 2 instructions. */
2317 if (before_16 == 0 && addr + length >= 14 * 4)
2318 before_16 = insn;
2319
2320 if (INSN_CODE (insn) == CODE_FOR_hbr)
2321 {
2322 /* Make sure an hbrp is at least 2 cycles away from a hint.
2323 Insert an lnop after the hbrp when necessary. */
2324 if (before_4 == 0 && addr > 0)
2325 {
2326 before_4 = insn;
2327 insert_lnop_after |= 1;
2328 }
2329 else if (before_4 && addr <= 4 * 4)
2330 insert_lnop_after |= 1;
2331 if (before_16 == 0 && addr > 10 * 4)
2332 {
2333 before_16 = insn;
2334 insert_lnop_after |= 2;
2335 }
2336 else if (before_16 && addr <= 14 * 4)
2337 insert_lnop_after |= 2;
2338 }
644459d0 2339
5a976006 2340 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2341 {
2342 if (addr < hbrp_addr0)
2343 hbrp_addr0 = addr;
2344 else if (addr < hbrp_addr1)
2345 hbrp_addr1 = addr;
2346 }
644459d0 2347
5a976006 2348 if (CALL_P (insn) || JUMP_P (insn))
2349 {
2350 if (HINTED_P (insn))
2351 return;
2352
2353 /* Any branch after the first 15 insns should be on an even
2354 address to avoid a special case branch. There might be
2355 some nops and/or hbrps inserted, so we test after 10
2356 insns. */
2357 if (addr > 10 * 4)
2358 SCHED_ON_EVEN_P (insn) = 1;
2359 }
644459d0 2360
5a976006 2361 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2362 return;
2363
2364
2365 if (addr + length >= 32 * 4)
644459d0 2366 {
5a976006 2367 gcc_assert (before_4 && before_16);
2368 if (hbrp_addr0 > 4 * 4)
644459d0 2369 {
5a976006 2370 insn =
2371 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2372 recog_memoized (insn);
d53c050c 2373 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2374 INSN_ADDRESSES_NEW (insn,
2375 INSN_ADDRESSES (INSN_UID (before_4)));
2376 PUT_MODE (insn, GET_MODE (before_4));
2377 PUT_MODE (before_4, TImode);
2378 if (insert_lnop_after & 1)
644459d0 2379 {
5a976006 2380 insn = emit_insn_before (gen_lnop (), before_4);
2381 recog_memoized (insn);
d53c050c 2382 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2383 INSN_ADDRESSES_NEW (insn,
2384 INSN_ADDRESSES (INSN_UID (before_4)));
2385 PUT_MODE (insn, TImode);
644459d0 2386 }
644459d0 2387 }
5a976006 2388 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2389 && hbrp_addr1 > 16 * 4)
644459d0 2390 {
5a976006 2391 insn =
2392 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2393 recog_memoized (insn);
d53c050c 2394 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2395 INSN_ADDRESSES_NEW (insn,
2396 INSN_ADDRESSES (INSN_UID (before_16)));
2397 PUT_MODE (insn, GET_MODE (before_16));
2398 PUT_MODE (before_16, TImode);
2399 if (insert_lnop_after & 2)
644459d0 2400 {
5a976006 2401 insn = emit_insn_before (gen_lnop (), before_16);
2402 recog_memoized (insn);
d53c050c 2403 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2404 INSN_ADDRESSES_NEW (insn,
2405 INSN_ADDRESSES (INSN_UID
2406 (before_16)));
2407 PUT_MODE (insn, TImode);
644459d0 2408 }
2409 }
5a976006 2410 return;
644459d0 2411 }
644459d0 2412 }
5a976006 2413 else if (BARRIER_P (insn))
2414 return;
644459d0 2415
644459d0 2416}
5a976006 2417
2418/* The SPU might hang when it executes 48 inline instructions after a
2419 hinted branch jumps to its hinted target. The beginning of a
851d9296 2420 function and the return from a call might have been hinted, and
2421 must be handled as well. To prevent a hang we insert 2 hbrps. The
2422 first should be within 6 insns of the branch target. The second
2423 should be within 22 insns of the branch target. When determining
2424 if hbrps are necessary, we look for only 32 inline instructions,
2425 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2426 when inserting new hbrps, we insert them within 4 and 16 insns of
2427 the target. */
644459d0 2428static void
5a976006 2429insert_hbrp (void)
644459d0 2430{
0af56f80 2431 rtx_insn *insn;
5a976006 2432 if (TARGET_SAFE_HINTS)
644459d0 2433 {
5a976006 2434 shorten_branches (get_insns ());
2435 /* Insert hbrp at beginning of function */
2436 insn = next_active_insn (get_insns ());
2437 if (insn)
2438 insert_hbrp_for_ilb_runout (insn);
2439 /* Insert hbrp after hinted targets. */
2440 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2441 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2442 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2443 }
644459d0 2444}
2445
5a976006 2446static int in_spu_reorg;
2447
8a42230a 2448static void
2449spu_var_tracking (void)
2450{
2451 if (flag_var_tracking)
2452 {
2453 df_analyze ();
2454 timevar_push (TV_VAR_TRACKING);
2455 variable_tracking_main ();
2456 timevar_pop (TV_VAR_TRACKING);
2457 df_finish_pass (false);
2458 }
2459}
2460
5a976006 2461/* Insert branch hints. There are no branch optimizations after this
2462 pass, so it's safe to set our branch hints now. */
644459d0 2463static void
5a976006 2464spu_machine_dependent_reorg (void)
644459d0 2465{
5a976006 2466 sbitmap blocks;
2467 basic_block bb;
0af56f80 2468 rtx_insn *branch, *insn;
5a976006 2469 rtx branch_target = 0;
2470 int branch_addr = 0, insn_addr, required_dist = 0;
2471 int i;
2472 unsigned int j;
644459d0 2473
5a976006 2474 if (!TARGET_BRANCH_HINTS || optimize == 0)
2475 {
2476 /* We still do it for unoptimized code because an external
2477 function might have hinted a call or return. */
a54ca889 2478 compute_bb_for_insn ();
5a976006 2479 insert_hbrp ();
2480 pad_bb ();
8a42230a 2481 spu_var_tracking ();
a54ca889 2482 free_bb_for_insn ();
5a976006 2483 return;
2484 }
644459d0 2485
fe672ac0 2486 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
53c5d9d4 2487 bitmap_clear (blocks);
644459d0 2488
5a976006 2489 in_spu_reorg = 1;
2490 compute_bb_for_insn ();
2491
a7a0184d 2492 /* (Re-)discover loops so that bb->loop_father can be used
2493 in the analysis below. */
2494 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2495
5a976006 2496 compact_blocks ();
2497
2498 spu_bb_info =
a28770e1 2499 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
5a976006 2500 sizeof (struct spu_bb_info));
2501
2502 /* We need exact insn addresses and lengths. */
2503 shorten_branches (get_insns ());
2504
a28770e1 2505 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
644459d0 2506 {
f5a6b05f 2507 bb = BASIC_BLOCK_FOR_FN (cfun, i);
5a976006 2508 branch = 0;
2509 if (spu_bb_info[i].prop_jump)
644459d0 2510 {
5a976006 2511 branch = spu_bb_info[i].prop_jump;
2512 branch_target = get_branch_target (branch);
2513 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2514 required_dist = spu_hint_dist;
2515 }
2516 /* Search from end of a block to beginning. In this loop, find
2517 jumps which need a branch and emit them only when:
2518 - it's an indirect branch and we're at the insn which sets
2519 the register
2520 - we're at an insn that will invalidate the hint. e.g., a
2521 call, another hint insn, inline asm that clobbers $hbr, and
2522 some inlined operations (divmodsi4). Don't consider jumps
2523 because they are only at the end of a block and are
2524 considered when we are deciding whether to propagate
2525 - we're getting too far away from the branch. The hbr insns
2526 only have a signed 10 bit offset
2527 We go back as far as possible so the branch will be considered
2528 for propagation when we get to the beginning of the block. */
2529 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2530 {
2531 if (INSN_P (insn))
2532 {
2533 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2534 if (branch
2535 && ((GET_CODE (branch_target) == REG
2536 && set_of (branch_target, insn) != NULL_RTX)
2537 || insn_clobbers_hbr (insn)
2538 || branch_addr - insn_addr > 600))
2539 {
0af56f80 2540 rtx_insn *next = NEXT_INSN (insn);
5a976006 2541 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2542 if (insn != BB_END (bb)
2543 && branch_addr - next_addr >= required_dist)
2544 {
2545 if (dump_file)
2546 fprintf (dump_file,
2547 "hint for %i in block %i before %i\n",
2548 INSN_UID (branch), bb->index,
2549 INSN_UID (next));
2550 spu_emit_branch_hint (next, branch, branch_target,
2551 branch_addr - next_addr, blocks);
2552 }
2553 branch = 0;
2554 }
2555
2556 /* JUMP_P will only be true at the end of a block. When
2557 branch is already set it means we've previously decided
2558 to propagate a hint for that branch into this block. */
2559 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2560 {
2561 branch = 0;
2562 if ((branch_target = get_branch_target (insn)))
2563 {
2564 branch = insn;
2565 branch_addr = insn_addr;
2566 required_dist = spu_hint_dist;
2567 }
2568 }
2569 }
2570 if (insn == BB_HEAD (bb))
2571 break;
2572 }
2573
2574 if (branch)
2575 {
2576 /* If we haven't emitted a hint for this branch yet, it might
2577 be profitable to emit it in one of the predecessor blocks,
2578 especially for loops. */
0af56f80 2579 rtx_insn *bbend;
5a976006 2580 basic_block prev = 0, prop = 0, prev2 = 0;
2581 int loop_exit = 0, simple_loop = 0;
2582 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2583
2584 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2585 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2586 prev = EDGE_PRED (bb, j)->src;
2587 else
2588 prev2 = EDGE_PRED (bb, j)->src;
2589
2590 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2591 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2592 loop_exit = 1;
2593 else if (EDGE_SUCC (bb, j)->dest == bb)
2594 simple_loop = 1;
2595
2596 /* If this branch is a loop exit then propagate to previous
2597 fallthru block. This catches the cases when it is a simple
2598 loop or when there is an initial branch into the loop. */
2599 if (prev && (loop_exit || simple_loop)
a7a0184d 2600 && bb_loop_depth (prev) <= bb_loop_depth (bb))
5a976006 2601 prop = prev;
2602
2603 /* If there is only one adjacent predecessor. Don't propagate
a7a0184d 2604 outside this loop. */
5a976006 2605 else if (prev && single_pred_p (bb)
a7a0184d 2606 && prev->loop_father == bb->loop_father)
5a976006 2607 prop = prev;
2608
2609 /* If this is the JOIN block of a simple IF-THEN then
9d75589a 2610 propagate the hint to the HEADER block. */
5a976006 2611 else if (prev && prev2
2612 && EDGE_COUNT (bb->preds) == 2
2613 && EDGE_COUNT (prev->preds) == 1
2614 && EDGE_PRED (prev, 0)->src == prev2
a7a0184d 2615 && prev2->loop_father == bb->loop_father
5a976006 2616 && GET_CODE (branch_target) != REG)
2617 prop = prev;
2618
2619 /* Don't propagate when:
2620 - this is a simple loop and the hint would be too far
2621 - this is not a simple loop and there are 16 insns in
2622 this block already
2623 - the predecessor block ends in a branch that will be
2624 hinted
2625 - the predecessor block ends in an insn that invalidates
2626 the hint */
2627 if (prop
2628 && prop->index >= 0
2629 && (bbend = BB_END (prop))
2630 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2631 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2632 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2633 {
2634 if (dump_file)
2635 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2636 "for %i (loop_exit %i simple_loop %i dist %i)\n",
a7a0184d 2637 bb->index, prop->index, bb_loop_depth (bb),
5a976006 2638 INSN_UID (branch), loop_exit, simple_loop,
2639 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2640
2641 spu_bb_info[prop->index].prop_jump = branch;
2642 spu_bb_info[prop->index].bb_index = i;
2643 }
2644 else if (branch_addr - next_addr >= required_dist)
2645 {
2646 if (dump_file)
2647 fprintf (dump_file, "hint for %i in block %i before %i\n",
2648 INSN_UID (branch), bb->index,
2649 INSN_UID (NEXT_INSN (insn)));
2650 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2651 branch_addr - next_addr, blocks);
2652 }
2653 branch = 0;
644459d0 2654 }
644459d0 2655 }
5a976006 2656 free (spu_bb_info);
644459d0 2657
53c5d9d4 2658 if (!bitmap_empty_p (blocks))
5a976006 2659 find_many_sub_basic_blocks (blocks);
2660
2661 /* We have to schedule to make sure alignment is ok. */
fc00614f 2662 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
5a976006 2663
2664 /* The hints need to be scheduled, so call it again. */
2665 schedule_insns ();
2fbdf9ef 2666 df_finish_pass (true);
5a976006 2667
2668 insert_hbrp ();
2669
2670 pad_bb ();
2671
8f1d58ad 2672 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2673 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2674 {
2675 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2676 between its branch label and the branch . We don't move the
2677 label because GCC expects it at the beginning of the block. */
2678 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2679 rtx label_ref = XVECEXP (unspec, 0, 0);
4cd001d5 2680 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2681 rtx_insn *branch;
8f1d58ad 2682 int offset = 0;
2683 for (branch = NEXT_INSN (label);
2684 !JUMP_P (branch) && !CALL_P (branch);
2685 branch = NEXT_INSN (branch))
2686 if (NONJUMP_INSN_P (branch))
2687 offset += get_attr_length (branch);
2688 if (offset > 0)
29c05e22 2689 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
8f1d58ad 2690 }
5a976006 2691
8a42230a 2692 spu_var_tracking ();
5a976006 2693
a7a0184d 2694 loop_optimizer_finalize ();
2695
5a976006 2696 free_bb_for_insn ();
2697
2698 in_spu_reorg = 0;
644459d0 2699}
2700\f
2701
2702/* Insn scheduling routines, primarily for dual issue. */
2703static int
2704spu_sched_issue_rate (void)
2705{
2706 return 2;
2707}
2708
2709static int
0af56f80 2710uses_ls_unit(rtx_insn *insn)
644459d0 2711{
5a976006 2712 rtx set = single_set (insn);
2713 if (set != 0
2714 && (GET_CODE (SET_DEST (set)) == MEM
2715 || GET_CODE (SET_SRC (set)) == MEM))
2716 return 1;
2717 return 0;
644459d0 2718}
2719
2720static int
0af56f80 2721get_pipe (rtx_insn *insn)
644459d0 2722{
2723 enum attr_type t;
2724 /* Handle inline asm */
2725 if (INSN_CODE (insn) == -1)
2726 return -1;
2727 t = get_attr_type (insn);
2728 switch (t)
2729 {
2730 case TYPE_CONVERT:
2731 return -2;
2732 case TYPE_MULTI0:
2733 return -1;
2734
2735 case TYPE_FX2:
2736 case TYPE_FX3:
2737 case TYPE_SPR:
2738 case TYPE_NOP:
2739 case TYPE_FXB:
2740 case TYPE_FPD:
2741 case TYPE_FP6:
2742 case TYPE_FP7:
644459d0 2743 return 0;
2744
2745 case TYPE_LNOP:
2746 case TYPE_SHUF:
2747 case TYPE_LOAD:
2748 case TYPE_STORE:
2749 case TYPE_BR:
2750 case TYPE_MULTI1:
2751 case TYPE_HBR:
5a976006 2752 case TYPE_IPREFETCH:
644459d0 2753 return 1;
2754 default:
2755 abort ();
2756 }
2757}
2758
5a976006 2759
2760/* haifa-sched.c has a static variable that keeps track of the current
2761 cycle. It is passed to spu_sched_reorder, and we record it here for
2762 use by spu_sched_variable_issue. It won't be accurate if the
2763 scheduler updates it's clock_var between the two calls. */
2764static int clock_var;
2765
2766/* This is used to keep track of insn alignment. Set to 0 at the
2767 beginning of each block and increased by the "length" attr of each
2768 insn scheduled. */
2769static int spu_sched_length;
2770
2771/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2772 ready list appropriately in spu_sched_reorder(). */
2773static int pipe0_clock;
2774static int pipe1_clock;
2775
2776static int prev_clock_var;
2777
2778static int prev_priority;
2779
2780/* The SPU needs to load the next ilb sometime during the execution of
2781 the previous ilb. There is a potential conflict if every cycle has a
2782 load or store. To avoid the conflict we make sure the load/store
2783 unit is free for at least one cycle during the execution of insns in
2784 the previous ilb. */
2785static int spu_ls_first;
2786static int prev_ls_clock;
2787
2788static void
2789spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2790 int max_ready ATTRIBUTE_UNUSED)
2791{
2792 spu_sched_length = 0;
2793}
2794
2795static void
2796spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2797 int max_ready ATTRIBUTE_UNUSED)
2798{
2799 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2800 {
2801 /* When any block might be at least 8-byte aligned, assume they
2802 will all be at least 8-byte aligned to make sure dual issue
2803 works out correctly. */
2804 spu_sched_length = 0;
2805 }
2806 spu_ls_first = INT_MAX;
2807 clock_var = -1;
2808 prev_ls_clock = -1;
2809 pipe0_clock = -1;
2810 pipe1_clock = -1;
2811 prev_clock_var = -1;
2812 prev_priority = -1;
2813}
2814
644459d0 2815static int
5a976006 2816spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
0af56f80 2817 int verbose ATTRIBUTE_UNUSED,
18282db0 2818 rtx_insn *insn, int more)
644459d0 2819{
5a976006 2820 int len;
2821 int p;
644459d0 2822 if (GET_CODE (PATTERN (insn)) == USE
2823 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2824 || (len = get_attr_length (insn)) == 0)
2825 return more;
2826
2827 spu_sched_length += len;
2828
2829 /* Reset on inline asm */
2830 if (INSN_CODE (insn) == -1)
2831 {
2832 spu_ls_first = INT_MAX;
2833 pipe0_clock = -1;
2834 pipe1_clock = -1;
2835 return 0;
2836 }
2837 p = get_pipe (insn);
2838 if (p == 0)
2839 pipe0_clock = clock_var;
2840 else
2841 pipe1_clock = clock_var;
2842
2843 if (in_spu_reorg)
2844 {
2845 if (clock_var - prev_ls_clock > 1
2846 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2847 spu_ls_first = INT_MAX;
2848 if (uses_ls_unit (insn))
2849 {
2850 if (spu_ls_first == INT_MAX)
2851 spu_ls_first = spu_sched_length;
2852 prev_ls_clock = clock_var;
2853 }
2854
2855 /* The scheduler hasn't inserted the nop, but we will later on.
2856 Include those nops in spu_sched_length. */
2857 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2858 spu_sched_length += 4;
2859 prev_clock_var = clock_var;
2860
2861 /* more is -1 when called from spu_sched_reorder for new insns
2862 that don't have INSN_PRIORITY */
2863 if (more >= 0)
2864 prev_priority = INSN_PRIORITY (insn);
2865 }
2866
9d75589a 2867 /* Always try issuing more insns. spu_sched_reorder will decide
5a976006 2868 when the cycle should be advanced. */
2869 return 1;
2870}
2871
2872/* This function is called for both TARGET_SCHED_REORDER and
2873 TARGET_SCHED_REORDER2. */
2874static int
2875spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
b24ef467 2876 rtx_insn **ready, int *nreadyp, int clock)
5a976006 2877{
2878 int i, nready = *nreadyp;
2879 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
b24ef467 2880 rtx_insn *insn;
5a976006 2881
2882 clock_var = clock;
2883
2884 if (nready <= 0 || pipe1_clock >= clock)
2885 return 0;
2886
2887 /* Find any rtl insns that don't generate assembly insns and schedule
2888 them first. */
2889 for (i = nready - 1; i >= 0; i--)
2890 {
2891 insn = ready[i];
2892 if (INSN_CODE (insn) == -1
2893 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 2894 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 2895 {
2896 ready[i] = ready[nready - 1];
2897 ready[nready - 1] = insn;
2898 return 1;
2899 }
2900 }
2901
2902 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2903 for (i = 0; i < nready; i++)
2904 if (INSN_CODE (ready[i]) != -1)
2905 {
2906 insn = ready[i];
2907 switch (get_attr_type (insn))
2908 {
2909 default:
2910 case TYPE_MULTI0:
2911 case TYPE_CONVERT:
2912 case TYPE_FX2:
2913 case TYPE_FX3:
2914 case TYPE_SPR:
2915 case TYPE_NOP:
2916 case TYPE_FXB:
2917 case TYPE_FPD:
2918 case TYPE_FP6:
2919 case TYPE_FP7:
2920 pipe_0 = i;
2921 break;
2922 case TYPE_LOAD:
2923 case TYPE_STORE:
2924 pipe_ls = i;
2925 case TYPE_LNOP:
2926 case TYPE_SHUF:
2927 case TYPE_BR:
2928 case TYPE_MULTI1:
2929 case TYPE_HBR:
2930 pipe_1 = i;
2931 break;
2932 case TYPE_IPREFETCH:
2933 pipe_hbrp = i;
2934 break;
2935 }
2936 }
2937
2938 /* In the first scheduling phase, schedule loads and stores together
2939 to increase the chance they will get merged during postreload CSE. */
2940 if (!reload_completed && pipe_ls >= 0)
2941 {
2942 insn = ready[pipe_ls];
2943 ready[pipe_ls] = ready[nready - 1];
2944 ready[nready - 1] = insn;
2945 return 1;
2946 }
2947
2948 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2949 if (pipe_hbrp >= 0)
2950 pipe_1 = pipe_hbrp;
2951
2952 /* When we have loads/stores in every cycle of the last 15 insns and
2953 we are about to schedule another load/store, emit an hbrp insn
2954 instead. */
2955 if (in_spu_reorg
2956 && spu_sched_length - spu_ls_first >= 4 * 15
2957 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2958 {
2959 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2960 recog_memoized (insn);
2961 if (pipe0_clock < clock)
2962 PUT_MODE (insn, TImode);
2963 spu_sched_variable_issue (file, verbose, insn, -1);
2964 return 0;
2965 }
2966
2967 /* In general, we want to emit nops to increase dual issue, but dual
2968 issue isn't faster when one of the insns could be scheduled later
2969 without effecting the critical path. We look at INSN_PRIORITY to
2970 make a good guess, but it isn't perfect so -mdual-nops=n can be
2971 used to effect it. */
2972 if (in_spu_reorg && spu_dual_nops < 10)
2973 {
9d75589a 2974 /* When we are at an even address and we are not issuing nops to
5a976006 2975 improve scheduling then we need to advance the cycle. */
2976 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2977 && (spu_dual_nops == 0
2978 || (pipe_1 != -1
2979 && prev_priority >
2980 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2981 return 0;
2982
2983 /* When at an odd address, schedule the highest priority insn
2984 without considering pipeline. */
2985 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2986 && (spu_dual_nops == 0
2987 || (prev_priority >
2988 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2989 return 1;
2990 }
2991
2992
2993 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2994 pipe0 insn in the ready list, schedule it. */
2995 if (pipe0_clock < clock && pipe_0 >= 0)
2996 schedule_i = pipe_0;
2997
2998 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2999 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3000 else
3001 schedule_i = pipe_1;
3002
3003 if (schedule_i > -1)
3004 {
3005 insn = ready[schedule_i];
3006 ready[schedule_i] = ready[nready - 1];
3007 ready[nready - 1] = insn;
3008 return 1;
3009 }
3010 return 0;
644459d0 3011}
3012
3013/* INSN is dependent on DEP_INSN. */
3014static int
18282db0 3015spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
644459d0 3016{
5a976006 3017 rtx set;
3018
3019 /* The blockage pattern is used to prevent instructions from being
3020 moved across it and has no cost. */
3021 if (INSN_CODE (insn) == CODE_FOR_blockage
3022 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3023 return 0;
3024
9d98604b 3025 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3026 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3027 return 0;
3028
3029 /* Make sure hbrps are spread out. */
3030 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3031 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3032 return 8;
3033
3034 /* Make sure hints and hbrps are 2 cycles apart. */
3035 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3036 || INSN_CODE (insn) == CODE_FOR_hbr)
3037 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3038 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3039 return 2;
3040
3041 /* An hbrp has no real dependency on other insns. */
3042 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3043 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3044 return 0;
3045
3046 /* Assuming that it is unlikely an argument register will be used in
3047 the first cycle of the called function, we reduce the cost for
3048 slightly better scheduling of dep_insn. When not hinted, the
3049 mispredicted branch would hide the cost as well. */
3050 if (CALL_P (insn))
3051 {
3052 rtx target = get_branch_target (insn);
3053 if (GET_CODE (target) != REG || !set_of (target, insn))
3054 return cost - 2;
3055 return cost;
3056 }
3057
3058 /* And when returning from a function, let's assume the return values
3059 are completed sooner too. */
3060 if (CALL_P (dep_insn))
644459d0 3061 return cost - 2;
5a976006 3062
3063 /* Make sure an instruction that loads from the back chain is schedule
3064 away from the return instruction so a hint is more likely to get
3065 issued. */
3066 if (INSN_CODE (insn) == CODE_FOR__return
3067 && (set = single_set (dep_insn))
3068 && GET_CODE (SET_DEST (set)) == REG
3069 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3070 return 20;
3071
644459d0 3072 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3073 scheduler makes every insn in a block anti-dependent on the final
3074 jump_insn. We adjust here so higher cost insns will get scheduled
3075 earlier. */
5a976006 3076 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3077 return insn_cost (dep_insn) - 3;
5a976006 3078
644459d0 3079 return cost;
3080}
3081\f
3082/* Create a CONST_DOUBLE from a string. */
842ae815 3083rtx
3754d046 3084spu_float_const (const char *string, machine_mode mode)
644459d0 3085{
3086 REAL_VALUE_TYPE value;
3087 value = REAL_VALUE_ATOF (string, mode);
3088 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3089}
3090
644459d0 3091int
3092spu_constant_address_p (rtx x)
3093{
3094 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3095 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3096 || GET_CODE (x) == HIGH);
3097}
3098
3099static enum spu_immediate
3100which_immediate_load (HOST_WIDE_INT val)
3101{
3102 gcc_assert (val == trunc_int_for_mode (val, SImode));
3103
3104 if (val >= -0x8000 && val <= 0x7fff)
3105 return SPU_IL;
3106 if (val >= 0 && val <= 0x3ffff)
3107 return SPU_ILA;
3108 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3109 return SPU_ILH;
3110 if ((val & 0xffff) == 0)
3111 return SPU_ILHU;
3112
3113 return SPU_NONE;
3114}
3115
dea01258 3116/* Return true when OP can be loaded by one of the il instructions, or
3117 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3118int
3754d046 3119immediate_load_p (rtx op, machine_mode mode)
dea01258 3120{
3121 if (CONSTANT_P (op))
3122 {
3123 enum immediate_class c = classify_immediate (op, mode);
5df189be 3124 return c == IC_IL1 || c == IC_IL1s
3072d30e 3125 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3126 }
3127 return 0;
3128}
3129
3130/* Return true if the first SIZE bytes of arr is a constant that can be
3131 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3132 represent the size and offset of the instruction to use. */
3133static int
3134cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3135{
3136 int cpat, run, i, start;
3137 cpat = 1;
3138 run = 0;
3139 start = -1;
3140 for (i = 0; i < size && cpat; i++)
3141 if (arr[i] != i+16)
3142 {
3143 if (!run)
3144 {
3145 start = i;
3146 if (arr[i] == 3)
3147 run = 1;
3148 else if (arr[i] == 2 && arr[i+1] == 3)
3149 run = 2;
3150 else if (arr[i] == 0)
3151 {
3152 while (arr[i+run] == run && i+run < 16)
3153 run++;
3154 if (run != 4 && run != 8)
3155 cpat = 0;
3156 }
3157 else
3158 cpat = 0;
3159 if ((i & (run-1)) != 0)
3160 cpat = 0;
3161 i += run;
3162 }
3163 else
3164 cpat = 0;
3165 }
b01a6dc3 3166 if (cpat && (run || size < 16))
dea01258 3167 {
3168 if (run == 0)
3169 run = 1;
3170 if (prun)
3171 *prun = run;
3172 if (pstart)
3173 *pstart = start == -1 ? 16-run : start;
3174 return 1;
3175 }
3176 return 0;
3177}
3178
3179/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3180 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3181static enum immediate_class
3754d046 3182classify_immediate (rtx op, machine_mode mode)
644459d0 3183{
3184 HOST_WIDE_INT val;
3185 unsigned char arr[16];
5df189be 3186 int i, j, repeated, fsmbi, repeat;
dea01258 3187
3188 gcc_assert (CONSTANT_P (op));
3189
644459d0 3190 if (GET_MODE (op) != VOIDmode)
3191 mode = GET_MODE (op);
3192
dea01258 3193 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3194 if (!flag_pic
3195 && mode == V4SImode
dea01258 3196 && GET_CODE (op) == CONST_VECTOR
3197 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3198 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3199 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3200 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3201 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3202 op = CONST_VECTOR_ELT (op, 0);
644459d0 3203
dea01258 3204 switch (GET_CODE (op))
3205 {
3206 case SYMBOL_REF:
3207 case LABEL_REF:
3208 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3209
dea01258 3210 case CONST:
0cfc65d4 3211 /* We can never know if the resulting address fits in 18 bits and can be
3212 loaded with ila. For now, assume the address will not overflow if
3213 the displacement is "small" (fits 'K' constraint). */
3214 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3215 {
3216 rtx sym = XEXP (XEXP (op, 0), 0);
3217 rtx cst = XEXP (XEXP (op, 0), 1);
3218
3219 if (GET_CODE (sym) == SYMBOL_REF
3220 && GET_CODE (cst) == CONST_INT
3221 && satisfies_constraint_K (cst))
3222 return IC_IL1s;
3223 }
3224 return IC_IL2s;
644459d0 3225
dea01258 3226 case HIGH:
3227 return IC_IL1s;
3228
3229 case CONST_VECTOR:
3230 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3231 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3232 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3233 return IC_POOL;
3234 /* Fall through. */
3235
3236 case CONST_INT:
3237 case CONST_DOUBLE:
3238 constant_to_array (mode, op, arr);
644459d0 3239
dea01258 3240 /* Check that each 4-byte slot is identical. */
3241 repeated = 1;
3242 for (i = 4; i < 16; i += 4)
3243 for (j = 0; j < 4; j++)
3244 if (arr[j] != arr[i + j])
3245 repeated = 0;
3246
3247 if (repeated)
3248 {
3249 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3250 val = trunc_int_for_mode (val, SImode);
3251
3252 if (which_immediate_load (val) != SPU_NONE)
3253 return IC_IL1;
3254 }
3255
3256 /* Any mode of 2 bytes or smaller can be loaded with an il
3257 instruction. */
3258 gcc_assert (GET_MODE_SIZE (mode) > 2);
3259
3260 fsmbi = 1;
5df189be 3261 repeat = 0;
dea01258 3262 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3263 if (arr[i] != 0 && repeat == 0)
3264 repeat = arr[i];
3265 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3266 fsmbi = 0;
3267 if (fsmbi)
5df189be 3268 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3269
3270 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3271 return IC_CPAT;
3272
3273 if (repeated)
3274 return IC_IL2;
3275
3276 return IC_POOL;
3277 default:
3278 break;
3279 }
3280 gcc_unreachable ();
644459d0 3281}
3282
3283static enum spu_immediate
3284which_logical_immediate (HOST_WIDE_INT val)
3285{
3286 gcc_assert (val == trunc_int_for_mode (val, SImode));
3287
3288 if (val >= -0x200 && val <= 0x1ff)
3289 return SPU_ORI;
3290 if (val >= 0 && val <= 0xffff)
3291 return SPU_IOHL;
3292 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3293 {
3294 val = trunc_int_for_mode (val, HImode);
3295 if (val >= -0x200 && val <= 0x1ff)
3296 return SPU_ORHI;
3297 if ((val & 0xff) == ((val >> 8) & 0xff))
3298 {
3299 val = trunc_int_for_mode (val, QImode);
3300 if (val >= -0x200 && val <= 0x1ff)
3301 return SPU_ORBI;
3302 }
3303 }
3304 return SPU_NONE;
3305}
3306
5df189be 3307/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3308 CONST_DOUBLEs. */
3309static int
3310const_vector_immediate_p (rtx x)
3311{
3312 int i;
3313 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3314 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3315 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3316 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3317 return 0;
3318 return 1;
3319}
3320
644459d0 3321int
3754d046 3322logical_immediate_p (rtx op, machine_mode mode)
644459d0 3323{
3324 HOST_WIDE_INT val;
3325 unsigned char arr[16];
3326 int i, j;
3327
3328 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3329 || GET_CODE (op) == CONST_VECTOR);
3330
5df189be 3331 if (GET_CODE (op) == CONST_VECTOR
3332 && !const_vector_immediate_p (op))
3333 return 0;
3334
644459d0 3335 if (GET_MODE (op) != VOIDmode)
3336 mode = GET_MODE (op);
3337
3338 constant_to_array (mode, op, arr);
3339
3340 /* Check that bytes are repeated. */
3341 for (i = 4; i < 16; i += 4)
3342 for (j = 0; j < 4; j++)
3343 if (arr[j] != arr[i + j])
3344 return 0;
3345
3346 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3347 val = trunc_int_for_mode (val, SImode);
3348
3349 i = which_logical_immediate (val);
3350 return i != SPU_NONE && i != SPU_IOHL;
3351}
3352
3353int
3754d046 3354iohl_immediate_p (rtx op, machine_mode mode)
644459d0 3355{
3356 HOST_WIDE_INT val;
3357 unsigned char arr[16];
3358 int i, j;
3359
3360 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3361 || GET_CODE (op) == CONST_VECTOR);
3362
5df189be 3363 if (GET_CODE (op) == CONST_VECTOR
3364 && !const_vector_immediate_p (op))
3365 return 0;
3366
644459d0 3367 if (GET_MODE (op) != VOIDmode)
3368 mode = GET_MODE (op);
3369
3370 constant_to_array (mode, op, arr);
3371
3372 /* Check that bytes are repeated. */
3373 for (i = 4; i < 16; i += 4)
3374 for (j = 0; j < 4; j++)
3375 if (arr[j] != arr[i + j])
3376 return 0;
3377
3378 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3379 val = trunc_int_for_mode (val, SImode);
3380
3381 return val >= 0 && val <= 0xffff;
3382}
3383
3384int
3754d046 3385arith_immediate_p (rtx op, machine_mode mode,
644459d0 3386 HOST_WIDE_INT low, HOST_WIDE_INT high)
3387{
3388 HOST_WIDE_INT val;
3389 unsigned char arr[16];
3390 int bytes, i, j;
3391
3392 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3393 || GET_CODE (op) == CONST_VECTOR);
3394
5df189be 3395 if (GET_CODE (op) == CONST_VECTOR
3396 && !const_vector_immediate_p (op))
3397 return 0;
3398
644459d0 3399 if (GET_MODE (op) != VOIDmode)
3400 mode = GET_MODE (op);
3401
3402 constant_to_array (mode, op, arr);
3403
3404 if (VECTOR_MODE_P (mode))
3405 mode = GET_MODE_INNER (mode);
3406
3407 bytes = GET_MODE_SIZE (mode);
3408 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3409
3410 /* Check that bytes are repeated. */
3411 for (i = bytes; i < 16; i += bytes)
3412 for (j = 0; j < bytes; j++)
3413 if (arr[j] != arr[i + j])
3414 return 0;
3415
3416 val = arr[0];
3417 for (j = 1; j < bytes; j++)
3418 val = (val << 8) | arr[j];
3419
3420 val = trunc_int_for_mode (val, mode);
3421
3422 return val >= low && val <= high;
3423}
3424
56c7bfc2 3425/* TRUE when op is an immediate and an exact power of 2, and given that
3426 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3427 all entries must be the same. */
3428bool
3754d046 3429exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
56c7bfc2 3430{
3754d046 3431 machine_mode int_mode;
56c7bfc2 3432 HOST_WIDE_INT val;
3433 unsigned char arr[16];
3434 int bytes, i, j;
3435
3436 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3437 || GET_CODE (op) == CONST_VECTOR);
3438
3439 if (GET_CODE (op) == CONST_VECTOR
3440 && !const_vector_immediate_p (op))
3441 return 0;
3442
3443 if (GET_MODE (op) != VOIDmode)
3444 mode = GET_MODE (op);
3445
3446 constant_to_array (mode, op, arr);
3447
3448 if (VECTOR_MODE_P (mode))
3449 mode = GET_MODE_INNER (mode);
3450
3451 bytes = GET_MODE_SIZE (mode);
3452 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3453
3454 /* Check that bytes are repeated. */
3455 for (i = bytes; i < 16; i += bytes)
3456 for (j = 0; j < bytes; j++)
3457 if (arr[j] != arr[i + j])
3458 return 0;
3459
3460 val = arr[0];
3461 for (j = 1; j < bytes; j++)
3462 val = (val << 8) | arr[j];
3463
3464 val = trunc_int_for_mode (val, int_mode);
3465
3466 /* Currently, we only handle SFmode */
3467 gcc_assert (mode == SFmode);
3468 if (mode == SFmode)
3469 {
3470 int exp = (val >> 23) - 127;
3471 return val > 0 && (val & 0x007fffff) == 0
3472 && exp >= low && exp <= high;
3473 }
3474 return FALSE;
3475}
3476
6cf5579e 3477/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3478
6f4e40cd 3479static bool
3480ea_symbol_ref_p (const_rtx x)
6cf5579e 3481{
6cf5579e 3482 tree decl;
3483
3484 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3485 {
3486 rtx plus = XEXP (x, 0);
3487 rtx op0 = XEXP (plus, 0);
3488 rtx op1 = XEXP (plus, 1);
3489 if (GET_CODE (op1) == CONST_INT)
3490 x = op0;
3491 }
3492
3493 return (GET_CODE (x) == SYMBOL_REF
3494 && (decl = SYMBOL_REF_DECL (x)) != 0
3495 && TREE_CODE (decl) == VAR_DECL
3496 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3497}
3498
644459d0 3499/* We accept:
5b865faf 3500 - any 32-bit constant (SImode, SFmode)
644459d0 3501 - any constant that can be generated with fsmbi (any mode)
5b865faf 3502 - a 64-bit constant where the high and low bits are identical
644459d0 3503 (DImode, DFmode)
5b865faf 3504 - a 128-bit constant where the four 32-bit words match. */
ca316360 3505bool
3754d046 3506spu_legitimate_constant_p (machine_mode mode, rtx x)
644459d0 3507{
6f4e40cd 3508 subrtx_iterator::array_type array;
5df189be 3509 if (GET_CODE (x) == HIGH)
3510 x = XEXP (x, 0);
6cf5579e 3511
3512 /* Reject any __ea qualified reference. These can't appear in
3513 instructions but must be forced to the constant pool. */
6f4e40cd 3514 FOR_EACH_SUBRTX (iter, array, x, ALL)
3515 if (ea_symbol_ref_p (*iter))
3516 return 0;
6cf5579e 3517
644459d0 3518 /* V4SI with all identical symbols is valid. */
5df189be 3519 if (!flag_pic
ca316360 3520 && mode == V4SImode
644459d0 3521 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3522 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3523 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3524 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3525 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3526 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3527
5df189be 3528 if (GET_CODE (x) == CONST_VECTOR
3529 && !const_vector_immediate_p (x))
3530 return 0;
644459d0 3531 return 1;
3532}
3533
3534/* Valid address are:
3535 - symbol_ref, label_ref, const
3536 - reg
9d98604b 3537 - reg + const_int, where const_int is 16 byte aligned
644459d0 3538 - reg + reg, alignment doesn't matter
3539 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3540 ignore the 4 least significant bits of the const. We only care about
3541 16 byte modes because the expand phase will change all smaller MEM
3542 references to TImode. */
3543static bool
3754d046 3544spu_legitimate_address_p (machine_mode mode,
fd50b071 3545 rtx x, bool reg_ok_strict)
644459d0 3546{
9d98604b 3547 int aligned = GET_MODE_SIZE (mode) >= 16;
3548 if (aligned
3549 && GET_CODE (x) == AND
644459d0 3550 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3551 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3552 x = XEXP (x, 0);
3553 switch (GET_CODE (x))
3554 {
644459d0 3555 case LABEL_REF:
6cf5579e 3556 return !TARGET_LARGE_MEM;
3557
9d98604b 3558 case SYMBOL_REF:
644459d0 3559 case CONST:
6cf5579e 3560 /* Keep __ea references until reload so that spu_expand_mov can see them
3561 in MEMs. */
6f4e40cd 3562 if (ea_symbol_ref_p (x))
6cf5579e 3563 return !reload_in_progress && !reload_completed;
9d98604b 3564 return !TARGET_LARGE_MEM;
644459d0 3565
3566 case CONST_INT:
3567 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3568
3569 case SUBREG:
3570 x = XEXP (x, 0);
9d98604b 3571 if (REG_P (x))
3572 return 0;
644459d0 3573
3574 case REG:
3575 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3576
3577 case PLUS:
3578 case LO_SUM:
3579 {
3580 rtx op0 = XEXP (x, 0);
3581 rtx op1 = XEXP (x, 1);
3582 if (GET_CODE (op0) == SUBREG)
3583 op0 = XEXP (op0, 0);
3584 if (GET_CODE (op1) == SUBREG)
3585 op1 = XEXP (op1, 0);
644459d0 3586 if (GET_CODE (op0) == REG
3587 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3588 && GET_CODE (op1) == CONST_INT
fa695424 3589 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3590 /* If virtual registers are involved, the displacement will
3591 change later on anyway, so checking would be premature.
3592 Reload will make sure the final displacement after
3593 register elimination is OK. */
3594 || op0 == arg_pointer_rtx
3595 || op0 == frame_pointer_rtx
3596 || op0 == virtual_stack_vars_rtx)
9d98604b 3597 && (!aligned || (INTVAL (op1) & 15) == 0))
3598 return TRUE;
644459d0 3599 if (GET_CODE (op0) == REG
3600 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3601 && GET_CODE (op1) == REG
3602 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3603 return TRUE;
644459d0 3604 }
3605 break;
3606
3607 default:
3608 break;
3609 }
9d98604b 3610 return FALSE;
644459d0 3611}
3612
6cf5579e 3613/* Like spu_legitimate_address_p, except with named addresses. */
3614static bool
3754d046 3615spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
6cf5579e 3616 bool reg_ok_strict, addr_space_t as)
3617{
3618 if (as == ADDR_SPACE_EA)
3619 return (REG_P (x) && (GET_MODE (x) == EAmode));
3620
3621 else if (as != ADDR_SPACE_GENERIC)
3622 gcc_unreachable ();
3623
3624 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3625}
3626
644459d0 3627/* When the address is reg + const_int, force the const_int into a
fa7637bd 3628 register. */
3defb88e 3629static rtx
644459d0 3630spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3754d046 3631 machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3632{
3633 rtx op0, op1;
3634 /* Make sure both operands are registers. */
3635 if (GET_CODE (x) == PLUS)
3636 {
3637 op0 = XEXP (x, 0);
3638 op1 = XEXP (x, 1);
3639 if (ALIGNED_SYMBOL_REF_P (op0))
3640 {
3641 op0 = force_reg (Pmode, op0);
3642 mark_reg_pointer (op0, 128);
3643 }
3644 else if (GET_CODE (op0) != REG)
3645 op0 = force_reg (Pmode, op0);
3646 if (ALIGNED_SYMBOL_REF_P (op1))
3647 {
3648 op1 = force_reg (Pmode, op1);
3649 mark_reg_pointer (op1, 128);
3650 }
3651 else if (GET_CODE (op1) != REG)
3652 op1 = force_reg (Pmode, op1);
3653 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3654 }
41e3a0c7 3655 return x;
644459d0 3656}
3657
6cf5579e 3658/* Like spu_legitimate_address, except with named address support. */
3659static rtx
3754d046 3660spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
6cf5579e 3661 addr_space_t as)
3662{
3663 if (as != ADDR_SPACE_GENERIC)
3664 return x;
3665
3666 return spu_legitimize_address (x, oldx, mode);
3667}
3668
fa695424 3669/* Reload reg + const_int for out-of-range displacements. */
3670rtx
3754d046 3671spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
fa695424 3672 int opnum, int type)
3673{
3674 bool removed_and = false;
3675
3676 if (GET_CODE (ad) == AND
3677 && CONST_INT_P (XEXP (ad, 1))
3678 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3679 {
3680 ad = XEXP (ad, 0);
3681 removed_and = true;
3682 }
3683
3684 if (GET_CODE (ad) == PLUS
3685 && REG_P (XEXP (ad, 0))
3686 && CONST_INT_P (XEXP (ad, 1))
3687 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3688 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3689 {
3690 /* Unshare the sum. */
3691 ad = copy_rtx (ad);
3692
3693 /* Reload the displacement. */
3694 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3695 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3696 opnum, (enum reload_type) type);
3697
3698 /* Add back AND for alignment if we stripped it. */
3699 if (removed_and)
3700 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3701
3702 return ad;
3703 }
3704
3705 return NULL_RTX;
3706}
3707
644459d0 3708/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3709 struct attribute_spec.handler. */
3710static tree
3711spu_handle_fndecl_attribute (tree * node,
3712 tree name,
3713 tree args ATTRIBUTE_UNUSED,
3714 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3715{
3716 if (TREE_CODE (*node) != FUNCTION_DECL)
3717 {
67a779df 3718 warning (0, "%qE attribute only applies to functions",
3719 name);
644459d0 3720 *no_add_attrs = true;
3721 }
3722
3723 return NULL_TREE;
3724}
3725
3726/* Handle the "vector" attribute. */
3727static tree
3728spu_handle_vector_attribute (tree * node, tree name,
3729 tree args ATTRIBUTE_UNUSED,
3730 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3731{
3732 tree type = *node, result = NULL_TREE;
3754d046 3733 machine_mode mode;
644459d0 3734 int unsigned_p;
3735
3736 while (POINTER_TYPE_P (type)
3737 || TREE_CODE (type) == FUNCTION_TYPE
3738 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3739 type = TREE_TYPE (type);
3740
3741 mode = TYPE_MODE (type);
3742
3743 unsigned_p = TYPE_UNSIGNED (type);
3744 switch (mode)
3745 {
3746 case DImode:
3747 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3748 break;
3749 case SImode:
3750 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3751 break;
3752 case HImode:
3753 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3754 break;
3755 case QImode:
3756 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3757 break;
3758 case SFmode:
3759 result = V4SF_type_node;
3760 break;
3761 case DFmode:
3762 result = V2DF_type_node;
3763 break;
3764 default:
3765 break;
3766 }
3767
3768 /* Propagate qualifiers attached to the element type
3769 onto the vector type. */
3770 if (result && result != type && TYPE_QUALS (type))
3771 result = build_qualified_type (result, TYPE_QUALS (type));
3772
3773 *no_add_attrs = true; /* No need to hang on to the attribute. */
3774
3775 if (!result)
67a779df 3776 warning (0, "%qE attribute ignored", name);
644459d0 3777 else
d991e6e8 3778 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3779
3780 return NULL_TREE;
3781}
3782
f2b32076 3783/* Return nonzero if FUNC is a naked function. */
644459d0 3784static int
3785spu_naked_function_p (tree func)
3786{
3787 tree a;
3788
3789 if (TREE_CODE (func) != FUNCTION_DECL)
3790 abort ();
3791
3792 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3793 return a != NULL_TREE;
3794}
3795
3796int
3797spu_initial_elimination_offset (int from, int to)
3798{
3799 int saved_regs_size = spu_saved_regs_size ();
3800 int sp_offset = 0;
d5bf7b64 3801 if (!crtl->is_leaf || crtl->outgoing_args_size
644459d0 3802 || get_frame_size () || saved_regs_size)
3803 sp_offset = STACK_POINTER_OFFSET;
3804 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3805 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3806 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3807 return get_frame_size ();
644459d0 3808 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3809 return sp_offset + crtl->outgoing_args_size
644459d0 3810 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3811 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3812 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3813 else
3814 gcc_unreachable ();
644459d0 3815}
3816
3817rtx
fb80456a 3818spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3819{
3754d046 3820 machine_mode mode = TYPE_MODE (type);
644459d0 3821 int byte_size = ((mode == BLKmode)
3822 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3823
3824 /* Make sure small structs are left justified in a register. */
3825 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3826 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3827 {
3754d046 3828 machine_mode smode;
644459d0 3829 rtvec v;
3830 int i;
3831 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3832 int n = byte_size / UNITS_PER_WORD;
3833 v = rtvec_alloc (nregs);
3834 for (i = 0; i < n; i++)
3835 {
3836 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3837 gen_rtx_REG (TImode,
3838 FIRST_RETURN_REGNUM
3839 + i),
3840 GEN_INT (UNITS_PER_WORD * i));
3841 byte_size -= UNITS_PER_WORD;
3842 }
3843
3844 if (n < nregs)
3845 {
3846 if (byte_size < 4)
3847 byte_size = 4;
3848 smode =
3849 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3850 RTVEC_ELT (v, n) =
3851 gen_rtx_EXPR_LIST (VOIDmode,
3852 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3853 GEN_INT (UNITS_PER_WORD * n));
3854 }
3855 return gen_rtx_PARALLEL (mode, v);
3856 }
3857 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3858}
3859
ee9034d4 3860static rtx
39cba157 3861spu_function_arg (cumulative_args_t cum_v,
3754d046 3862 machine_mode mode,
ee9034d4 3863 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3864{
39cba157 3865 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
644459d0 3866 int byte_size;
3867
a08c5dd0 3868 if (*cum >= MAX_REGISTER_ARGS)
644459d0 3869 return 0;
3870
3871 byte_size = ((mode == BLKmode)
3872 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3873
3874 /* The ABI does not allow parameters to be passed partially in
3875 reg and partially in stack. */
a08c5dd0 3876 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 3877 return 0;
3878
3879 /* Make sure small structs are left justified in a register. */
3880 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3881 && byte_size < UNITS_PER_WORD && byte_size > 0)
3882 {
3754d046 3883 machine_mode smode;
644459d0 3884 rtx gr_reg;
3885 if (byte_size < 4)
3886 byte_size = 4;
3887 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3888 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 3889 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 3890 const0_rtx);
3891 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3892 }
3893 else
a08c5dd0 3894 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 3895}
3896
ee9034d4 3897static void
3754d046 3898spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
ee9034d4 3899 const_tree type, bool named ATTRIBUTE_UNUSED)
3900{
39cba157 3901 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3902
ee9034d4 3903 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3904 ? 1
3905 : mode == BLKmode
3906 ? ((int_size_in_bytes (type) + 15) / 16)
3907 : mode == VOIDmode
3908 ? 1
3909 : HARD_REGNO_NREGS (cum, mode));
3910}
3911
644459d0 3912/* Variable sized types are passed by reference. */
3913static bool
39cba157 3914spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3754d046 3915 machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3916 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3917{
3918 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3919}
3920\f
3921
3922/* Var args. */
3923
3924/* Create and return the va_list datatype.
3925
3926 On SPU, va_list is an array type equivalent to
3927
3928 typedef struct __va_list_tag
3929 {
3930 void *__args __attribute__((__aligned(16)));
3931 void *__skip __attribute__((__aligned(16)));
3932
3933 } va_list[1];
3934
fa7637bd 3935 where __args points to the arg that will be returned by the next
644459d0 3936 va_arg(), and __skip points to the previous stack frame such that
3937 when __args == __skip we should advance __args by 32 bytes. */
3938static tree
3939spu_build_builtin_va_list (void)
3940{
3941 tree f_args, f_skip, record, type_decl;
3942 bool owp;
3943
3944 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3945
3946 type_decl =
54e46243 3947 build_decl (BUILTINS_LOCATION,
3948 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3949
54e46243 3950 f_args = build_decl (BUILTINS_LOCATION,
3951 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3952 f_skip = build_decl (BUILTINS_LOCATION,
3953 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3954
3955 DECL_FIELD_CONTEXT (f_args) = record;
3956 DECL_ALIGN (f_args) = 128;
3957 DECL_USER_ALIGN (f_args) = 1;
3958
3959 DECL_FIELD_CONTEXT (f_skip) = record;
3960 DECL_ALIGN (f_skip) = 128;
3961 DECL_USER_ALIGN (f_skip) = 1;
3962
bc907808 3963 TYPE_STUB_DECL (record) = type_decl;
644459d0 3964 TYPE_NAME (record) = type_decl;
3965 TYPE_FIELDS (record) = f_args;
1767a056 3966 DECL_CHAIN (f_args) = f_skip;
644459d0 3967
3968 /* We know this is being padded and we want it too. It is an internal
3969 type so hide the warnings from the user. */
3970 owp = warn_padded;
3971 warn_padded = false;
3972
3973 layout_type (record);
3974
3975 warn_padded = owp;
3976
3977 /* The correct type is an array type of one element. */
3978 return build_array_type (record, build_index_type (size_zero_node));
3979}
3980
3981/* Implement va_start by filling the va_list structure VALIST.
3982 NEXTARG points to the first anonymous stack argument.
3983
3984 The following global variables are used to initialize
3985 the va_list structure:
3986
abe32cce 3987 crtl->args.info;
644459d0 3988 the CUMULATIVE_ARGS for this function
3989
abe32cce 3990 crtl->args.arg_offset_rtx:
644459d0 3991 holds the offset of the first anonymous stack argument
3992 (relative to the virtual arg pointer). */
3993
8a58ed0a 3994static void
644459d0 3995spu_va_start (tree valist, rtx nextarg)
3996{
3997 tree f_args, f_skip;
3998 tree args, skip, t;
3999
4000 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4001 f_skip = DECL_CHAIN (f_args);
644459d0 4002
170efcd4 4003 valist = build_simple_mem_ref (valist);
644459d0 4004 args =
4005 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4006 skip =
4007 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4008
4009 /* Find the __args area. */
4010 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4011 if (crtl->args.pretend_args_size > 0)
2cc66f2a 4012 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
75a70cf9 4013 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4014 TREE_SIDE_EFFECTS (t) = 1;
4015 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4016
4017 /* Find the __skip area. */
4018 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2cc66f2a 4019 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4020 - STACK_POINTER_OFFSET));
75a70cf9 4021 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4022 TREE_SIDE_EFFECTS (t) = 1;
4023 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4024}
4025
4026/* Gimplify va_arg by updating the va_list structure
4027 VALIST as required to retrieve an argument of type
4028 TYPE, and returning that argument.
4029
4030 ret = va_arg(VALIST, TYPE);
4031
4032 generates code equivalent to:
4033
4034 paddedsize = (sizeof(TYPE) + 15) & -16;
4035 if (VALIST.__args + paddedsize > VALIST.__skip
4036 && VALIST.__args <= VALIST.__skip)
4037 addr = VALIST.__skip + 32;
4038 else
4039 addr = VALIST.__args;
4040 VALIST.__args = addr + paddedsize;
4041 ret = *(TYPE *)addr;
4042 */
4043static tree
75a70cf9 4044spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4045 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4046{
4047 tree f_args, f_skip;
4048 tree args, skip;
4049 HOST_WIDE_INT size, rsize;
2cc66f2a 4050 tree addr, tmp;
644459d0 4051 bool pass_by_reference_p;
4052
4053 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4054 f_skip = DECL_CHAIN (f_args);
644459d0 4055
182cf5a9 4056 valist = build_simple_mem_ref (valist);
644459d0 4057 args =
4058 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4059 skip =
4060 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4061
4062 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4063
4064 /* if an object is dynamically sized, a pointer to it is passed
4065 instead of the object itself. */
27a82950 4066 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4067 false);
644459d0 4068 if (pass_by_reference_p)
4069 type = build_pointer_type (type);
4070 size = int_size_in_bytes (type);
4071 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4072
4073 /* build conditional expression to calculate addr. The expression
4074 will be gimplified later. */
2cc66f2a 4075 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
644459d0 4076 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4077 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4078 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4079 unshare_expr (skip)));
644459d0 4080
4081 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2cc66f2a 4082 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4083 unshare_expr (args));
644459d0 4084
75a70cf9 4085 gimplify_assign (addr, tmp, pre_p);
644459d0 4086
4087 /* update VALIST.__args */
2cc66f2a 4088 tmp = fold_build_pointer_plus_hwi (addr, rsize);
75a70cf9 4089 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4090
8115f0af 4091 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4092 addr);
644459d0 4093
4094 if (pass_by_reference_p)
4095 addr = build_va_arg_indirect_ref (addr);
4096
4097 return build_va_arg_indirect_ref (addr);
4098}
4099
4100/* Save parameter registers starting with the register that corresponds
4101 to the first unnamed parameters. If the first unnamed parameter is
4102 in the stack then save no registers. Set pretend_args_size to the
4103 amount of space needed to save the registers. */
39cba157 4104static void
3754d046 4105spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
644459d0 4106 tree type, int *pretend_size, int no_rtl)
4107{
4108 if (!no_rtl)
4109 {
4110 rtx tmp;
4111 int regno;
4112 int offset;
39cba157 4113 int ncum = *get_cumulative_args (cum);
644459d0 4114
4115 /* cum currently points to the last named argument, we want to
4116 start at the next argument. */
39cba157 4117 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
644459d0 4118
4119 offset = -STACK_POINTER_OFFSET;
4120 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4121 {
4122 tmp = gen_frame_mem (V4SImode,
29c05e22 4123 plus_constant (Pmode, virtual_incoming_args_rtx,
644459d0 4124 offset));
4125 emit_move_insn (tmp,
4126 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4127 offset += 16;
4128 }
4129 *pretend_size = offset + STACK_POINTER_OFFSET;
4130 }
4131}
4132\f
b2d7ede1 4133static void
644459d0 4134spu_conditional_register_usage (void)
4135{
4136 if (flag_pic)
4137 {
4138 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4139 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4140 }
644459d0 4141}
4142
9d98604b 4143/* This is called any time we inspect the alignment of a register for
4144 addresses. */
644459d0 4145static int
9d98604b 4146reg_aligned_for_addr (rtx x)
644459d0 4147{
9d98604b 4148 int regno =
4149 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4150 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4151}
4152
69ced2d6 4153/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4154 into its SYMBOL_REF_FLAGS. */
4155static void
4156spu_encode_section_info (tree decl, rtx rtl, int first)
4157{
4158 default_encode_section_info (decl, rtl, first);
4159
4160 /* If a variable has a forced alignment to < 16 bytes, mark it with
4161 SYMBOL_FLAG_ALIGN1. */
4162 if (TREE_CODE (decl) == VAR_DECL
4163 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4164 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4165}
4166
644459d0 4167/* Return TRUE if we are certain the mem refers to a complete object
4168 which is both 16-byte aligned and padded to a 16-byte boundary. This
4169 would make it safe to store with a single instruction.
4170 We guarantee the alignment and padding for static objects by aligning
4171 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4172 FIXME: We currently cannot guarantee this for objects on the stack
4173 because assign_parm_setup_stack calls assign_stack_local with the
4174 alignment of the parameter mode and in that case the alignment never
4175 gets adjusted by LOCAL_ALIGNMENT. */
4176static int
4177store_with_one_insn_p (rtx mem)
4178{
3754d046 4179 machine_mode mode = GET_MODE (mem);
644459d0 4180 rtx addr = XEXP (mem, 0);
9d98604b 4181 if (mode == BLKmode)
644459d0 4182 return 0;
9d98604b 4183 if (GET_MODE_SIZE (mode) >= 16)
4184 return 1;
644459d0 4185 /* Only static objects. */
4186 if (GET_CODE (addr) == SYMBOL_REF)
4187 {
4188 /* We use the associated declaration to make sure the access is
fa7637bd 4189 referring to the whole object.
851d9296 4190 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4191 if it is necessary. Will there be cases where one exists, and
4192 the other does not? Will there be cases where both exist, but
4193 have different types? */
4194 tree decl = MEM_EXPR (mem);
4195 if (decl
4196 && TREE_CODE (decl) == VAR_DECL
4197 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4198 return 1;
4199 decl = SYMBOL_REF_DECL (addr);
4200 if (decl
4201 && TREE_CODE (decl) == VAR_DECL
4202 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4203 return 1;
4204 }
4205 return 0;
4206}
4207
9d98604b 4208/* Return 1 when the address is not valid for a simple load and store as
4209 required by the '_mov*' patterns. We could make this less strict
4210 for loads, but we prefer mem's to look the same so they are more
4211 likely to be merged. */
4212static int
4213address_needs_split (rtx mem)
4214{
4215 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4216 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4217 || !(store_with_one_insn_p (mem)
4218 || mem_is_padded_component_ref (mem))))
4219 return 1;
4220
4221 return 0;
4222}
4223
6cf5579e 4224static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4225static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4226static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4227
4228/* MEM is known to be an __ea qualified memory access. Emit a call to
4229 fetch the ppu memory to local store, and return its address in local
4230 store. */
4231
4232static void
4233ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4234{
4235 if (is_store)
4236 {
4237 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4238 if (!cache_fetch_dirty)
4239 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4240 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4241 2, ea_addr, EAmode, ndirty, SImode);
4242 }
4243 else
4244 {
4245 if (!cache_fetch)
4246 cache_fetch = init_one_libfunc ("__cache_fetch");
4247 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4248 1, ea_addr, EAmode);
4249 }
4250}
4251
4252/* Like ea_load_store, but do the cache tag comparison and, for stores,
4253 dirty bit marking, inline.
4254
4255 The cache control data structure is an array of
4256
4257 struct __cache_tag_array
4258 {
4259 unsigned int tag_lo[4];
4260 unsigned int tag_hi[4];
4261 void *data_pointer[4];
4262 int reserved[4];
4263 vector unsigned short dirty_bits[4];
4264 } */
4265
4266static void
4267ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4268{
4269 rtx ea_addr_si;
4270 HOST_WIDE_INT v;
4271 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4272 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4273 rtx index_mask = gen_reg_rtx (SImode);
4274 rtx tag_arr = gen_reg_rtx (Pmode);
4275 rtx splat_mask = gen_reg_rtx (TImode);
4276 rtx splat = gen_reg_rtx (V4SImode);
4277 rtx splat_hi = NULL_RTX;
4278 rtx tag_index = gen_reg_rtx (Pmode);
4279 rtx block_off = gen_reg_rtx (SImode);
4280 rtx tag_addr = gen_reg_rtx (Pmode);
4281 rtx tag = gen_reg_rtx (V4SImode);
4282 rtx cache_tag = gen_reg_rtx (V4SImode);
4283 rtx cache_tag_hi = NULL_RTX;
4284 rtx cache_ptrs = gen_reg_rtx (TImode);
4285 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4286 rtx tag_equal = gen_reg_rtx (V4SImode);
4287 rtx tag_equal_hi = NULL_RTX;
4288 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4289 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4290 rtx eq_index = gen_reg_rtx (SImode);
0af56f80 4291 rtx bcomp, hit_label, hit_ref, cont_label;
4292 rtx_insn *insn;
6cf5579e 4293
4294 if (spu_ea_model != 32)
4295 {
4296 splat_hi = gen_reg_rtx (V4SImode);
4297 cache_tag_hi = gen_reg_rtx (V4SImode);
4298 tag_equal_hi = gen_reg_rtx (V4SImode);
4299 }
4300
29c05e22 4301 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
6cf5579e 4302 emit_move_insn (tag_arr, tag_arr_sym);
4303 v = 0x0001020300010203LL;
4304 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4305 ea_addr_si = ea_addr;
4306 if (spu_ea_model != 32)
4307 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4308
4309 /* tag_index = ea_addr & (tag_array_size - 128) */
4310 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4311
4312 /* splat ea_addr to all 4 slots. */
4313 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4314 /* Similarly for high 32 bits of ea_addr. */
4315 if (spu_ea_model != 32)
4316 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4317
4318 /* block_off = ea_addr & 127 */
4319 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4320
4321 /* tag_addr = tag_arr + tag_index */
4322 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4323
4324 /* Read cache tags. */
4325 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4326 if (spu_ea_model != 32)
4327 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
29c05e22 4328 plus_constant (Pmode,
4329 tag_addr, 16)));
6cf5579e 4330
4331 /* tag = ea_addr & -128 */
4332 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4333
4334 /* Read all four cache data pointers. */
4335 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
29c05e22 4336 plus_constant (Pmode,
4337 tag_addr, 32)));
6cf5579e 4338
4339 /* Compare tags. */
4340 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4341 if (spu_ea_model != 32)
4342 {
4343 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4344 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4345 }
4346
4347 /* At most one of the tags compare equal, so tag_equal has one
4348 32-bit slot set to all 1's, with the other slots all zero.
4349 gbb picks off low bit from each byte in the 128-bit registers,
4350 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4351 we have a hit. */
4352 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4353 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4354
4355 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4356 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4357
4358 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4359 (rotating eq_index mod 16 bytes). */
4360 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4361 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4362
4363 /* Add block offset to form final data address. */
4364 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4365
4366 /* Check that we did hit. */
4367 hit_label = gen_label_rtx ();
4368 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4369 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4370 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4371 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4372 hit_ref, pc_rtx)));
4373 /* Say that this branch is very likely to happen. */
4374 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
9eb946de 4375 add_int_reg_note (insn, REG_BR_PROB, v);
6cf5579e 4376
4377 ea_load_store (mem, is_store, ea_addr, data_addr);
4378 cont_label = gen_label_rtx ();
4379 emit_jump_insn (gen_jump (cont_label));
4380 emit_barrier ();
4381
4382 emit_label (hit_label);
4383
4384 if (is_store)
4385 {
4386 HOST_WIDE_INT v_hi;
4387 rtx dirty_bits = gen_reg_rtx (TImode);
4388 rtx dirty_off = gen_reg_rtx (SImode);
4389 rtx dirty_128 = gen_reg_rtx (TImode);
4390 rtx neg_block_off = gen_reg_rtx (SImode);
4391
4392 /* Set up mask with one dirty bit per byte of the mem we are
4393 writing, starting from top bit. */
4394 v_hi = v = -1;
4395 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4396 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4397 {
4398 v_hi = v;
4399 v = 0;
4400 }
4401 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4402
4403 /* Form index into cache dirty_bits. eq_index is one of
4404 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4405 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4406 offset to each of the four dirty_bits elements. */
4407 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4408
4409 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4410
4411 /* Rotate bit mask to proper bit. */
4412 emit_insn (gen_negsi2 (neg_block_off, block_off));
4413 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4414 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4415
4416 /* Or in the new dirty bits. */
4417 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4418
4419 /* Store. */
4420 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4421 }
4422
4423 emit_label (cont_label);
4424}
4425
4426static rtx
4427expand_ea_mem (rtx mem, bool is_store)
4428{
4429 rtx ea_addr;
4430 rtx data_addr = gen_reg_rtx (Pmode);
4431 rtx new_mem;
4432
4433 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4434 if (optimize_size || optimize == 0)
4435 ea_load_store (mem, is_store, ea_addr, data_addr);
4436 else
4437 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4438
4439 if (ea_alias_set == -1)
4440 ea_alias_set = new_alias_set ();
4441
4442 /* We generate a new MEM RTX to refer to the copy of the data
4443 in the cache. We do not copy memory attributes (except the
4444 alignment) from the original MEM, as they may no longer apply
4445 to the cache copy. */
4446 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4447 set_mem_alias_set (new_mem, ea_alias_set);
4448 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4449
4450 return new_mem;
4451}
4452
644459d0 4453int
3754d046 4454spu_expand_mov (rtx * ops, machine_mode mode)
644459d0 4455{
4456 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4457 {
4458 /* Perform the move in the destination SUBREG's inner mode. */
4459 ops[0] = SUBREG_REG (ops[0]);
4460 mode = GET_MODE (ops[0]);
4461 ops[1] = gen_lowpart_common (mode, ops[1]);
4462 gcc_assert (ops[1]);
4463 }
644459d0 4464
4465 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4466 {
4467 rtx from = SUBREG_REG (ops[1]);
3754d046 4468 machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4469
4470 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4471 && GET_MODE_CLASS (imode) == MODE_INT
4472 && subreg_lowpart_p (ops[1]));
4473
4474 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4475 imode = SImode;
4476 if (imode != GET_MODE (from))
4477 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4478
4479 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4480 {
d6bf3b14 4481 enum insn_code icode = convert_optab_handler (trunc_optab,
4482 mode, imode);
644459d0 4483 emit_insn (GEN_FCN (icode) (ops[0], from));
4484 }
4485 else
4486 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4487 return 1;
4488 }
4489
4490 /* At least one of the operands needs to be a register. */
4491 if ((reload_in_progress | reload_completed) == 0
4492 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4493 {
4494 rtx temp = force_reg (mode, ops[1]);
4495 emit_move_insn (ops[0], temp);
4496 return 1;
4497 }
4498 if (reload_in_progress || reload_completed)
4499 {
dea01258 4500 if (CONSTANT_P (ops[1]))
4501 return spu_split_immediate (ops);
644459d0 4502 return 0;
4503 }
9d98604b 4504
4505 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4506 extend them. */
4507 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4508 {
9d98604b 4509 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4510 if (val != INTVAL (ops[1]))
644459d0 4511 {
9d98604b 4512 emit_move_insn (ops[0], GEN_INT (val));
4513 return 1;
644459d0 4514 }
4515 }
9d98604b 4516 if (MEM_P (ops[0]))
6cf5579e 4517 {
4518 if (MEM_ADDR_SPACE (ops[0]))
4519 ops[0] = expand_ea_mem (ops[0], true);
4520 return spu_split_store (ops);
4521 }
9d98604b 4522 if (MEM_P (ops[1]))
6cf5579e 4523 {
4524 if (MEM_ADDR_SPACE (ops[1]))
4525 ops[1] = expand_ea_mem (ops[1], false);
4526 return spu_split_load (ops);
4527 }
9d98604b 4528
644459d0 4529 return 0;
4530}
4531
9d98604b 4532static void
4533spu_convert_move (rtx dst, rtx src)
644459d0 4534{
3754d046 4535 machine_mode mode = GET_MODE (dst);
4536 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
9d98604b 4537 rtx reg;
4538 gcc_assert (GET_MODE (src) == TImode);
4539 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4540 emit_insn (gen_rtx_SET (VOIDmode, reg,
4541 gen_rtx_TRUNCATE (int_mode,
4542 gen_rtx_LSHIFTRT (TImode, src,
4543 GEN_INT (int_mode == DImode ? 64 : 96)))));
4544 if (int_mode != mode)
4545 {
4546 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4547 emit_move_insn (dst, reg);
4548 }
4549}
644459d0 4550
9d98604b 4551/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4552 the address from SRC and SRC+16. Return a REG or CONST_INT that
4553 specifies how many bytes to rotate the loaded registers, plus any
4554 extra from EXTRA_ROTQBY. The address and rotate amounts are
4555 normalized to improve merging of loads and rotate computations. */
4556static rtx
4557spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4558{
4559 rtx addr = XEXP (src, 0);
4560 rtx p0, p1, rot, addr0, addr1;
4561 int rot_amt;
644459d0 4562
4563 rot = 0;
4564 rot_amt = 0;
9d98604b 4565
4566 if (MEM_ALIGN (src) >= 128)
4567 /* Address is already aligned; simply perform a TImode load. */ ;
4568 else if (GET_CODE (addr) == PLUS)
644459d0 4569 {
4570 /* 8 cases:
4571 aligned reg + aligned reg => lqx
4572 aligned reg + unaligned reg => lqx, rotqby
4573 aligned reg + aligned const => lqd
4574 aligned reg + unaligned const => lqd, rotqbyi
4575 unaligned reg + aligned reg => lqx, rotqby
4576 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4577 unaligned reg + aligned const => lqd, rotqby
4578 unaligned reg + unaligned const -> not allowed by legitimate address
4579 */
4580 p0 = XEXP (addr, 0);
4581 p1 = XEXP (addr, 1);
9d98604b 4582 if (!reg_aligned_for_addr (p0))
644459d0 4583 {
9d98604b 4584 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4585 {
9d98604b 4586 rot = gen_reg_rtx (SImode);
4587 emit_insn (gen_addsi3 (rot, p0, p1));
4588 }
4589 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4590 {
4591 if (INTVAL (p1) > 0
4592 && REG_POINTER (p0)
4593 && INTVAL (p1) * BITS_PER_UNIT
4594 < REGNO_POINTER_ALIGN (REGNO (p0)))
4595 {
4596 rot = gen_reg_rtx (SImode);
4597 emit_insn (gen_addsi3 (rot, p0, p1));
4598 addr = p0;
4599 }
4600 else
4601 {
4602 rtx x = gen_reg_rtx (SImode);
4603 emit_move_insn (x, p1);
4604 if (!spu_arith_operand (p1, SImode))
4605 p1 = x;
4606 rot = gen_reg_rtx (SImode);
4607 emit_insn (gen_addsi3 (rot, p0, p1));
4608 addr = gen_rtx_PLUS (Pmode, p0, x);
4609 }
644459d0 4610 }
4611 else
4612 rot = p0;
4613 }
4614 else
4615 {
4616 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4617 {
4618 rot_amt = INTVAL (p1) & 15;
9d98604b 4619 if (INTVAL (p1) & -16)
4620 {
4621 p1 = GEN_INT (INTVAL (p1) & -16);
4622 addr = gen_rtx_PLUS (SImode, p0, p1);
4623 }
4624 else
4625 addr = p0;
644459d0 4626 }
9d98604b 4627 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4628 rot = p1;
4629 }
4630 }
9d98604b 4631 else if (REG_P (addr))
644459d0 4632 {
9d98604b 4633 if (!reg_aligned_for_addr (addr))
644459d0 4634 rot = addr;
4635 }
4636 else if (GET_CODE (addr) == CONST)
4637 {
4638 if (GET_CODE (XEXP (addr, 0)) == PLUS
4639 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4640 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4641 {
4642 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4643 if (rot_amt & -16)
4644 addr = gen_rtx_CONST (Pmode,
4645 gen_rtx_PLUS (Pmode,
4646 XEXP (XEXP (addr, 0), 0),
4647 GEN_INT (rot_amt & -16)));
4648 else
4649 addr = XEXP (XEXP (addr, 0), 0);
4650 }
4651 else
9d98604b 4652 {
4653 rot = gen_reg_rtx (Pmode);
4654 emit_move_insn (rot, addr);
4655 }
644459d0 4656 }
4657 else if (GET_CODE (addr) == CONST_INT)
4658 {
4659 rot_amt = INTVAL (addr);
4660 addr = GEN_INT (rot_amt & -16);
4661 }
4662 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4663 {
4664 rot = gen_reg_rtx (Pmode);
4665 emit_move_insn (rot, addr);
4666 }
644459d0 4667
9d98604b 4668 rot_amt += extra_rotby;
644459d0 4669
4670 rot_amt &= 15;
4671
4672 if (rot && rot_amt)
4673 {
9d98604b 4674 rtx x = gen_reg_rtx (SImode);
4675 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4676 rot = x;
644459d0 4677 rot_amt = 0;
4678 }
9d98604b 4679 if (!rot && rot_amt)
4680 rot = GEN_INT (rot_amt);
4681
4682 addr0 = copy_rtx (addr);
4683 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4684 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4685
4686 if (dst1)
4687 {
29c05e22 4688 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
9d98604b 4689 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4690 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4691 }
644459d0 4692
9d98604b 4693 return rot;
4694}
4695
4696int
4697spu_split_load (rtx * ops)
4698{
3754d046 4699 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4700 rtx addr, load, rot;
4701 int rot_amt;
644459d0 4702
9d98604b 4703 if (GET_MODE_SIZE (mode) >= 16)
4704 return 0;
644459d0 4705
9d98604b 4706 addr = XEXP (ops[1], 0);
4707 gcc_assert (GET_CODE (addr) != AND);
4708
4709 if (!address_needs_split (ops[1]))
4710 {
4711 ops[1] = change_address (ops[1], TImode, addr);
4712 load = gen_reg_rtx (TImode);
4713 emit_insn (gen__movti (load, ops[1]));
4714 spu_convert_move (ops[0], load);
4715 return 1;
4716 }
4717
4718 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4719
4720 load = gen_reg_rtx (TImode);
4721 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4722
4723 if (rot)
4724 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4725
9d98604b 4726 spu_convert_move (ops[0], load);
4727 return 1;
644459d0 4728}
4729
9d98604b 4730int
644459d0 4731spu_split_store (rtx * ops)
4732{
3754d046 4733 machine_mode mode = GET_MODE (ops[0]);
9d98604b 4734 rtx reg;
644459d0 4735 rtx addr, p0, p1, p1_lo, smem;
4736 int aform;
4737 int scalar;
4738
9d98604b 4739 if (GET_MODE_SIZE (mode) >= 16)
4740 return 0;
4741
644459d0 4742 addr = XEXP (ops[0], 0);
9d98604b 4743 gcc_assert (GET_CODE (addr) != AND);
4744
4745 if (!address_needs_split (ops[0]))
4746 {
4747 reg = gen_reg_rtx (TImode);
4748 emit_insn (gen_spu_convert (reg, ops[1]));
4749 ops[0] = change_address (ops[0], TImode, addr);
4750 emit_move_insn (ops[0], reg);
4751 return 1;
4752 }
644459d0 4753
4754 if (GET_CODE (addr) == PLUS)
4755 {
4756 /* 8 cases:
4757 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4758 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4759 aligned reg + aligned const => lqd, c?d, shuf, stqx
4760 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4761 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4762 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4763 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4764 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4765 */
4766 aform = 0;
4767 p0 = XEXP (addr, 0);
4768 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4769 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4770 {
4771 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4772 if (reg_aligned_for_addr (p0))
4773 {
4774 p1 = GEN_INT (INTVAL (p1) & -16);
4775 if (p1 == const0_rtx)
4776 addr = p0;
4777 else
4778 addr = gen_rtx_PLUS (SImode, p0, p1);
4779 }
4780 else
4781 {
4782 rtx x = gen_reg_rtx (SImode);
4783 emit_move_insn (x, p1);
4784 addr = gen_rtx_PLUS (SImode, p0, x);
4785 }
644459d0 4786 }
4787 }
9d98604b 4788 else if (REG_P (addr))
644459d0 4789 {
4790 aform = 0;
4791 p0 = addr;
4792 p1 = p1_lo = const0_rtx;
4793 }
4794 else
4795 {
4796 aform = 1;
4797 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4798 p1 = 0; /* aform doesn't use p1 */
4799 p1_lo = addr;
4800 if (ALIGNED_SYMBOL_REF_P (addr))
4801 p1_lo = const0_rtx;
9d98604b 4802 else if (GET_CODE (addr) == CONST
4803 && GET_CODE (XEXP (addr, 0)) == PLUS
4804 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4805 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4806 {
9d98604b 4807 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4808 if ((v & -16) != 0)
4809 addr = gen_rtx_CONST (Pmode,
4810 gen_rtx_PLUS (Pmode,
4811 XEXP (XEXP (addr, 0), 0),
4812 GEN_INT (v & -16)));
4813 else
4814 addr = XEXP (XEXP (addr, 0), 0);
4815 p1_lo = GEN_INT (v & 15);
644459d0 4816 }
4817 else if (GET_CODE (addr) == CONST_INT)
4818 {
4819 p1_lo = GEN_INT (INTVAL (addr) & 15);
4820 addr = GEN_INT (INTVAL (addr) & -16);
4821 }
9d98604b 4822 else
4823 {
4824 p1_lo = gen_reg_rtx (SImode);
4825 emit_move_insn (p1_lo, addr);
4826 }
644459d0 4827 }
4828
4cbad5bb 4829 gcc_assert (aform == 0 || aform == 1);
9d98604b 4830 reg = gen_reg_rtx (TImode);
e04cf423 4831
644459d0 4832 scalar = store_with_one_insn_p (ops[0]);
4833 if (!scalar)
4834 {
4835 /* We could copy the flags from the ops[0] MEM to mem here,
4836 We don't because we want this load to be optimized away if
4837 possible, and copying the flags will prevent that in certain
4838 cases, e.g. consider the volatile flag. */
4839
9d98604b 4840 rtx pat = gen_reg_rtx (TImode);
e04cf423 4841 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4842 set_mem_alias_set (lmem, 0);
4843 emit_insn (gen_movti (reg, lmem));
644459d0 4844
9d98604b 4845 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4846 p0 = stack_pointer_rtx;
4847 if (!p1_lo)
4848 p1_lo = const0_rtx;
4849
4850 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4851 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4852 }
644459d0 4853 else
4854 {
4855 if (GET_CODE (ops[1]) == REG)
4856 emit_insn (gen_spu_convert (reg, ops[1]));
4857 else if (GET_CODE (ops[1]) == SUBREG)
4858 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4859 else
4860 abort ();
4861 }
4862
4863 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4864 emit_insn (gen_ashlti3
4865 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4866
9d98604b 4867 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4868 /* We can't use the previous alias set because the memory has changed
4869 size and can potentially overlap objects of other types. */
4870 set_mem_alias_set (smem, 0);
4871
e04cf423 4872 emit_insn (gen_movti (smem, reg));
9d98604b 4873 return 1;
644459d0 4874}
4875
4876/* Return TRUE if X is MEM which is a struct member reference
4877 and the member can safely be loaded and stored with a single
4878 instruction because it is padded. */
4879static int
4880mem_is_padded_component_ref (rtx x)
4881{
4882 tree t = MEM_EXPR (x);
4883 tree r;
4884 if (!t || TREE_CODE (t) != COMPONENT_REF)
4885 return 0;
4886 t = TREE_OPERAND (t, 1);
4887 if (!t || TREE_CODE (t) != FIELD_DECL
4888 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4889 return 0;
4890 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4891 r = DECL_FIELD_CONTEXT (t);
4892 if (!r || TREE_CODE (r) != RECORD_TYPE)
4893 return 0;
4894 /* Make sure they are the same mode */
4895 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4896 return 0;
4897 /* If there are no following fields then the field alignment assures
fa7637bd 4898 the structure is padded to the alignment which means this field is
4899 padded too. */
644459d0 4900 if (TREE_CHAIN (t) == 0)
4901 return 1;
4902 /* If the following field is also aligned then this field will be
4903 padded. */
4904 t = TREE_CHAIN (t);
4905 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4906 return 1;
4907 return 0;
4908}
4909
c7b91b14 4910/* Parse the -mfixed-range= option string. */
4911static void
4912fix_range (const char *const_str)
4913{
4914 int i, first, last;
4915 char *str, *dash, *comma;
4916
4917 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4918 REG2 are either register names or register numbers. The effect
4919 of this option is to mark the registers in the range from REG1 to
4920 REG2 as ``fixed'' so they won't be used by the compiler. */
4921
4922 i = strlen (const_str);
4923 str = (char *) alloca (i + 1);
4924 memcpy (str, const_str, i + 1);
4925
4926 while (1)
4927 {
4928 dash = strchr (str, '-');
4929 if (!dash)
4930 {
4931 warning (0, "value of -mfixed-range must have form REG1-REG2");
4932 return;
4933 }
4934 *dash = '\0';
4935 comma = strchr (dash + 1, ',');
4936 if (comma)
4937 *comma = '\0';
4938
4939 first = decode_reg_name (str);
4940 if (first < 0)
4941 {
4942 warning (0, "unknown register name: %s", str);
4943 return;
4944 }
4945
4946 last = decode_reg_name (dash + 1);
4947 if (last < 0)
4948 {
4949 warning (0, "unknown register name: %s", dash + 1);
4950 return;
4951 }
4952
4953 *dash = '-';
4954
4955 if (first > last)
4956 {
4957 warning (0, "%s-%s is an empty range", str, dash + 1);
4958 return;
4959 }
4960
4961 for (i = first; i <= last; ++i)
4962 fixed_regs[i] = call_used_regs[i] = 1;
4963
4964 if (!comma)
4965 break;
4966
4967 *comma = ',';
4968 str = comma + 1;
4969 }
4970}
4971
644459d0 4972/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4973 can be generated using the fsmbi instruction. */
4974int
4975fsmbi_const_p (rtx x)
4976{
dea01258 4977 if (CONSTANT_P (x))
4978 {
5df189be 4979 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4980 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4981 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4982 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4983 }
4984 return 0;
4985}
4986
4987/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4988 can be generated using the cbd, chd, cwd or cdd instruction. */
4989int
3754d046 4990cpat_const_p (rtx x, machine_mode mode)
dea01258 4991{
4992 if (CONSTANT_P (x))
4993 {
4994 enum immediate_class c = classify_immediate (x, mode);
4995 return c == IC_CPAT;
4996 }
4997 return 0;
4998}
644459d0 4999
dea01258 5000rtx
5001gen_cpat_const (rtx * ops)
5002{
5003 unsigned char dst[16];
5004 int i, offset, shift, isize;
5005 if (GET_CODE (ops[3]) != CONST_INT
5006 || GET_CODE (ops[2]) != CONST_INT
5007 || (GET_CODE (ops[1]) != CONST_INT
5008 && GET_CODE (ops[1]) != REG))
5009 return 0;
5010 if (GET_CODE (ops[1]) == REG
5011 && (!REG_POINTER (ops[1])
5012 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5013 return 0;
644459d0 5014
5015 for (i = 0; i < 16; i++)
dea01258 5016 dst[i] = i + 16;
5017 isize = INTVAL (ops[3]);
5018 if (isize == 1)
5019 shift = 3;
5020 else if (isize == 2)
5021 shift = 2;
5022 else
5023 shift = 0;
5024 offset = (INTVAL (ops[2]) +
5025 (GET_CODE (ops[1]) ==
5026 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5027 for (i = 0; i < isize; i++)
5028 dst[offset + i] = i + shift;
5029 return array_to_constant (TImode, dst);
644459d0 5030}
5031
5032/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5033 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5034 than 16 bytes, the value is repeated across the rest of the array. */
5035void
3754d046 5036constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
644459d0 5037{
5038 HOST_WIDE_INT val;
5039 int i, j, first;
5040
5041 memset (arr, 0, 16);
5042 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5043 if (GET_CODE (x) == CONST_INT
5044 || (GET_CODE (x) == CONST_DOUBLE
5045 && (mode == SFmode || mode == DFmode)))
5046 {
5047 gcc_assert (mode != VOIDmode && mode != BLKmode);
5048
5049 if (GET_CODE (x) == CONST_DOUBLE)
5050 val = const_double_to_hwint (x);
5051 else
5052 val = INTVAL (x);
5053 first = GET_MODE_SIZE (mode) - 1;
5054 for (i = first; i >= 0; i--)
5055 {
5056 arr[i] = val & 0xff;
5057 val >>= 8;
5058 }
5059 /* Splat the constant across the whole array. */
5060 for (j = 0, i = first + 1; i < 16; i++)
5061 {
5062 arr[i] = arr[j];
5063 j = (j == first) ? 0 : j + 1;
5064 }
5065 }
5066 else if (GET_CODE (x) == CONST_DOUBLE)
5067 {
5068 val = CONST_DOUBLE_LOW (x);
5069 for (i = 15; i >= 8; i--)
5070 {
5071 arr[i] = val & 0xff;
5072 val >>= 8;
5073 }
5074 val = CONST_DOUBLE_HIGH (x);
5075 for (i = 7; i >= 0; i--)
5076 {
5077 arr[i] = val & 0xff;
5078 val >>= 8;
5079 }
5080 }
5081 else if (GET_CODE (x) == CONST_VECTOR)
5082 {
5083 int units;
5084 rtx elt;
5085 mode = GET_MODE_INNER (mode);
5086 units = CONST_VECTOR_NUNITS (x);
5087 for (i = 0; i < units; i++)
5088 {
5089 elt = CONST_VECTOR_ELT (x, i);
5090 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5091 {
5092 if (GET_CODE (elt) == CONST_DOUBLE)
5093 val = const_double_to_hwint (elt);
5094 else
5095 val = INTVAL (elt);
5096 first = GET_MODE_SIZE (mode) - 1;
5097 if (first + i * GET_MODE_SIZE (mode) > 16)
5098 abort ();
5099 for (j = first; j >= 0; j--)
5100 {
5101 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5102 val >>= 8;
5103 }
5104 }
5105 }
5106 }
5107 else
5108 gcc_unreachable();
5109}
5110
5111/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5112 smaller than 16 bytes, use the bytes that would represent that value
5113 in a register, e.g., for QImode return the value of arr[3]. */
5114rtx
3754d046 5115array_to_constant (machine_mode mode, const unsigned char arr[16])
644459d0 5116{
3754d046 5117 machine_mode inner_mode;
644459d0 5118 rtvec v;
5119 int units, size, i, j, k;
5120 HOST_WIDE_INT val;
5121
5122 if (GET_MODE_CLASS (mode) == MODE_INT
5123 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5124 {
5125 j = GET_MODE_SIZE (mode);
5126 i = j < 4 ? 4 - j : 0;
5127 for (val = 0; i < j; i++)
5128 val = (val << 8) | arr[i];
5129 val = trunc_int_for_mode (val, mode);
5130 return GEN_INT (val);
5131 }
5132
5133 if (mode == TImode)
5134 {
5135 HOST_WIDE_INT high;
5136 for (i = high = 0; i < 8; i++)
5137 high = (high << 8) | arr[i];
5138 for (i = 8, val = 0; i < 16; i++)
5139 val = (val << 8) | arr[i];
5140 return immed_double_const (val, high, TImode);
5141 }
5142 if (mode == SFmode)
5143 {
5144 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5145 val = trunc_int_for_mode (val, SImode);
171b6d22 5146 return hwint_to_const_double (SFmode, val);
644459d0 5147 }
5148 if (mode == DFmode)
5149 {
1f915911 5150 for (i = 0, val = 0; i < 8; i++)
5151 val = (val << 8) | arr[i];
171b6d22 5152 return hwint_to_const_double (DFmode, val);
644459d0 5153 }
5154
5155 if (!VECTOR_MODE_P (mode))
5156 abort ();
5157
5158 units = GET_MODE_NUNITS (mode);
5159 size = GET_MODE_UNIT_SIZE (mode);
5160 inner_mode = GET_MODE_INNER (mode);
5161 v = rtvec_alloc (units);
5162
5163 for (k = i = 0; i < units; ++i)
5164 {
5165 val = 0;
5166 for (j = 0; j < size; j++, k++)
5167 val = (val << 8) | arr[k];
5168
5169 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5170 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5171 else
5172 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5173 }
5174 if (k > 16)
5175 abort ();
5176
5177 return gen_rtx_CONST_VECTOR (mode, v);
5178}
5179
5180static void
5181reloc_diagnostic (rtx x)
5182{
712d2297 5183 tree decl = 0;
644459d0 5184 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5185 return;
5186
5187 if (GET_CODE (x) == SYMBOL_REF)
5188 decl = SYMBOL_REF_DECL (x);
5189 else if (GET_CODE (x) == CONST
5190 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5191 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5192
5193 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5194 if (decl && !DECL_P (decl))
5195 decl = 0;
5196
644459d0 5197 /* The decl could be a string constant. */
5198 if (decl && DECL_P (decl))
712d2297 5199 {
5200 location_t loc;
5201 /* We use last_assemble_variable_decl to get line information. It's
5202 not always going to be right and might not even be close, but will
5203 be right for the more common cases. */
5204 if (!last_assemble_variable_decl || in_section == ctors_section)
5205 loc = DECL_SOURCE_LOCATION (decl);
5206 else
5207 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5208
712d2297 5209 if (TARGET_WARN_RELOC)
5210 warning_at (loc, 0,
5211 "creating run-time relocation for %qD", decl);
5212 else
5213 error_at (loc,
5214 "creating run-time relocation for %qD", decl);
5215 }
5216 else
5217 {
5218 if (TARGET_WARN_RELOC)
5219 warning_at (input_location, 0, "creating run-time relocation");
5220 else
5221 error_at (input_location, "creating run-time relocation");
5222 }
644459d0 5223}
5224
5225/* Hook into assemble_integer so we can generate an error for run-time
5226 relocations. The SPU ABI disallows them. */
5227static bool
5228spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5229{
5230 /* By default run-time relocations aren't supported, but we allow them
5231 in case users support it in their own run-time loader. And we provide
5232 a warning for those users that don't. */
5233 if ((GET_CODE (x) == SYMBOL_REF)
5234 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5235 reloc_diagnostic (x);
5236
5237 return default_assemble_integer (x, size, aligned_p);
5238}
5239
5240static void
5241spu_asm_globalize_label (FILE * file, const char *name)
5242{
5243 fputs ("\t.global\t", file);
5244 assemble_name (file, name);
5245 fputs ("\n", file);
5246}
5247
5248static bool
20d892d1 5249spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5250 int opno ATTRIBUTE_UNUSED, int *total,
f529eb25 5251 bool speed ATTRIBUTE_UNUSED)
644459d0 5252{
3754d046 5253 machine_mode mode = GET_MODE (x);
644459d0 5254 int cost = COSTS_N_INSNS (2);
5255
5256 /* Folding to a CONST_VECTOR will use extra space but there might
5257 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5258 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5259 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5260 because this cost will only be compared against a single insn.
5261 if (code == CONST_VECTOR)
ca316360 5262 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5263 */
5264
5265 /* Use defaults for float operations. Not accurate but good enough. */
5266 if (mode == DFmode)
5267 {
5268 *total = COSTS_N_INSNS (13);
5269 return true;
5270 }
5271 if (mode == SFmode)
5272 {
5273 *total = COSTS_N_INSNS (6);
5274 return true;
5275 }
5276 switch (code)
5277 {
5278 case CONST_INT:
5279 if (satisfies_constraint_K (x))
5280 *total = 0;
5281 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5282 *total = COSTS_N_INSNS (1);
5283 else
5284 *total = COSTS_N_INSNS (3);
5285 return true;
5286
5287 case CONST:
5288 *total = COSTS_N_INSNS (3);
5289 return true;
5290
5291 case LABEL_REF:
5292 case SYMBOL_REF:
5293 *total = COSTS_N_INSNS (0);
5294 return true;
5295
5296 case CONST_DOUBLE:
5297 *total = COSTS_N_INSNS (5);
5298 return true;
5299
5300 case FLOAT_EXTEND:
5301 case FLOAT_TRUNCATE:
5302 case FLOAT:
5303 case UNSIGNED_FLOAT:
5304 case FIX:
5305 case UNSIGNED_FIX:
5306 *total = COSTS_N_INSNS (7);
5307 return true;
5308
5309 case PLUS:
5310 if (mode == TImode)
5311 {
5312 *total = COSTS_N_INSNS (9);
5313 return true;
5314 }
5315 break;
5316
5317 case MULT:
5318 cost =
5319 GET_CODE (XEXP (x, 0)) ==
5320 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5321 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5322 {
5323 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5324 {
5325 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5326 cost = COSTS_N_INSNS (14);
5327 if ((val & 0xffff) == 0)
5328 cost = COSTS_N_INSNS (9);
5329 else if (val > 0 && val < 0x10000)
5330 cost = COSTS_N_INSNS (11);
5331 }
5332 }
5333 *total = cost;
5334 return true;
5335 case DIV:
5336 case UDIV:
5337 case MOD:
5338 case UMOD:
5339 *total = COSTS_N_INSNS (20);
5340 return true;
5341 case ROTATE:
5342 case ROTATERT:
5343 case ASHIFT:
5344 case ASHIFTRT:
5345 case LSHIFTRT:
5346 *total = COSTS_N_INSNS (4);
5347 return true;
5348 case UNSPEC:
5349 if (XINT (x, 1) == UNSPEC_CONVERT)
5350 *total = COSTS_N_INSNS (0);
5351 else
5352 *total = COSTS_N_INSNS (4);
5353 return true;
5354 }
5355 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5356 if (GET_MODE_CLASS (mode) == MODE_INT
5357 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5358 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5359 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5360 *total = cost;
5361 return true;
5362}
5363
3754d046 5364static machine_mode
1bd43494 5365spu_unwind_word_mode (void)
644459d0 5366{
1bd43494 5367 return SImode;
644459d0 5368}
5369
5370/* Decide whether we can make a sibling call to a function. DECL is the
5371 declaration of the function being targeted by the call and EXP is the
5372 CALL_EXPR representing the call. */
5373static bool
5374spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5375{
5376 return decl && !TARGET_LARGE_MEM;
5377}
5378
5379/* We need to correctly update the back chain pointer and the Available
5380 Stack Size (which is in the second slot of the sp register.) */
5381void
5382spu_allocate_stack (rtx op0, rtx op1)
5383{
5384 HOST_WIDE_INT v;
5385 rtx chain = gen_reg_rtx (V4SImode);
5386 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5387 rtx sp = gen_reg_rtx (V4SImode);
5388 rtx splatted = gen_reg_rtx (V4SImode);
5389 rtx pat = gen_reg_rtx (TImode);
5390
5391 /* copy the back chain so we can save it back again. */
5392 emit_move_insn (chain, stack_bot);
5393
5394 op1 = force_reg (SImode, op1);
5395
5396 v = 0x1020300010203ll;
5397 emit_move_insn (pat, immed_double_const (v, v, TImode));
5398 emit_insn (gen_shufb (splatted, op1, op1, pat));
5399
5400 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5401 emit_insn (gen_subv4si3 (sp, sp, splatted));
5402
5403 if (flag_stack_check)
5404 {
5405 rtx avail = gen_reg_rtx(SImode);
5406 rtx result = gen_reg_rtx(SImode);
5407 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5408 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5409 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5410 }
5411
5412 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5413
5414 emit_move_insn (stack_bot, chain);
5415
5416 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5417}
5418
5419void
5420spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5421{
5422 static unsigned char arr[16] =
5423 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5424 rtx temp = gen_reg_rtx (SImode);
5425 rtx temp2 = gen_reg_rtx (SImode);
5426 rtx temp3 = gen_reg_rtx (V4SImode);
5427 rtx temp4 = gen_reg_rtx (V4SImode);
5428 rtx pat = gen_reg_rtx (TImode);
5429 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5430
5431 /* Restore the backchain from the first word, sp from the second. */
5432 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5433 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5434
5435 emit_move_insn (pat, array_to_constant (TImode, arr));
5436
5437 /* Compute Available Stack Size for sp */
5438 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5439 emit_insn (gen_shufb (temp3, temp, temp, pat));
5440
5441 /* Compute Available Stack Size for back chain */
5442 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5443 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5444 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5445
5446 emit_insn (gen_addv4si3 (sp, sp, temp3));
5447 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5448}
5449
5450static void
5451spu_init_libfuncs (void)
5452{
5453 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5454 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5455 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5456 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5457 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5458 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5459 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5460 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5461 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4d3aeb29 5462 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
644459d0 5463 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5464 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5465
5466 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5467 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5468
5825ec3f 5469 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5470 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5471 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5472 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5473 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5474 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5475 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5476 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5477 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5478 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5479 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5480 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5481
19a53068 5482 set_optab_libfunc (smul_optab, TImode, "__multi3");
5483 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5484 set_optab_libfunc (smod_optab, TImode, "__modti3");
5485 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5486 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5487 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5488}
5489
5490/* Make a subreg, stripping any existing subreg. We could possibly just
5491 call simplify_subreg, but in this case we know what we want. */
5492rtx
3754d046 5493spu_gen_subreg (machine_mode mode, rtx x)
644459d0 5494{
5495 if (GET_CODE (x) == SUBREG)
5496 x = SUBREG_REG (x);
5497 if (GET_MODE (x) == mode)
5498 return x;
5499 return gen_rtx_SUBREG (mode, x, 0);
5500}
5501
5502static bool
fb80456a 5503spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5504{
5505 return (TYPE_MODE (type) == BLKmode
5506 && ((type) == 0
5507 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5508 || int_size_in_bytes (type) >
5509 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5510}
5511\f
5512/* Create the built-in types and functions */
5513
c2233b46 5514enum spu_function_code
5515{
5516#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5517#include "spu-builtins.def"
5518#undef DEF_BUILTIN
5519 NUM_SPU_BUILTINS
5520};
5521
5522extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5523
644459d0 5524struct spu_builtin_description spu_builtins[] = {
5525#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5526 {fcode, icode, name, type, params},
644459d0 5527#include "spu-builtins.def"
5528#undef DEF_BUILTIN
5529};
5530
0c5c4d59 5531static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5532
5533/* Returns the spu builtin decl for CODE. */
e6925042 5534
5535static tree
5536spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5537{
5538 if (code >= NUM_SPU_BUILTINS)
5539 return error_mark_node;
5540
0c5c4d59 5541 return spu_builtin_decls[code];
e6925042 5542}
5543
5544
644459d0 5545static void
5546spu_init_builtins (void)
5547{
5548 struct spu_builtin_description *d;
5549 unsigned int i;
5550
5551 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5552 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5553 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5554 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5555 V4SF_type_node = build_vector_type (float_type_node, 4);
5556 V2DF_type_node = build_vector_type (double_type_node, 2);
5557
5558 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5559 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5560 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5561 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5562
c4ecce0c 5563 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5564
5565 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5566 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5567 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5568 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5569 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5570 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5571 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5572 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5573 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5574 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5575 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5576 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5577
5578 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5579 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5580 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5581 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5582 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5583 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5584 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5585 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5586
5587 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5588 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5589
5590 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5591
5592 spu_builtin_types[SPU_BTI_PTR] =
5593 build_pointer_type (build_qualified_type
5594 (void_type_node,
5595 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5596
5597 /* For each builtin we build a new prototype. The tree code will make
5598 sure nodes are shared. */
5599 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5600 {
5601 tree p;
5602 char name[64]; /* build_function will make a copy. */
5603 int parm;
5604
5605 if (d->name == 0)
5606 continue;
5607
5dfbd18f 5608 /* Find last parm. */
644459d0 5609 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5610 ;
644459d0 5611
5612 p = void_list_node;
5613 while (parm > 1)
5614 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5615
5616 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5617
5618 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5619 spu_builtin_decls[i] =
3726fe5e 5620 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5621 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5622 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5623
5624 /* These builtins don't throw. */
0c5c4d59 5625 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5626 }
5627}
5628
cf31d486 5629void
5630spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5631{
5632 static unsigned char arr[16] =
5633 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5634
5635 rtx temp = gen_reg_rtx (Pmode);
5636 rtx temp2 = gen_reg_rtx (V4SImode);
5637 rtx temp3 = gen_reg_rtx (V4SImode);
5638 rtx pat = gen_reg_rtx (TImode);
5639 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5640
5641 emit_move_insn (pat, array_to_constant (TImode, arr));
5642
5643 /* Restore the sp. */
5644 emit_move_insn (temp, op1);
5645 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5646
5647 /* Compute available stack size for sp. */
5648 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5649 emit_insn (gen_shufb (temp3, temp, temp, pat));
5650
5651 emit_insn (gen_addv4si3 (sp, sp, temp3));
5652 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5653}
5654
644459d0 5655int
5656spu_safe_dma (HOST_WIDE_INT channel)
5657{
006e4b96 5658 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5659}
5660
5661void
5662spu_builtin_splats (rtx ops[])
5663{
3754d046 5664 machine_mode mode = GET_MODE (ops[0]);
644459d0 5665 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5666 {
5667 unsigned char arr[16];
5668 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5669 emit_move_insn (ops[0], array_to_constant (mode, arr));
5670 }
644459d0 5671 else
5672 {
5673 rtx reg = gen_reg_rtx (TImode);
5674 rtx shuf;
5675 if (GET_CODE (ops[1]) != REG
5676 && GET_CODE (ops[1]) != SUBREG)
5677 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5678 switch (mode)
5679 {
5680 case V2DImode:
5681 case V2DFmode:
5682 shuf =
5683 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5684 TImode);
5685 break;
5686 case V4SImode:
5687 case V4SFmode:
5688 shuf =
5689 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5690 TImode);
5691 break;
5692 case V8HImode:
5693 shuf =
5694 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5695 TImode);
5696 break;
5697 case V16QImode:
5698 shuf =
5699 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5700 TImode);
5701 break;
5702 default:
5703 abort ();
5704 }
5705 emit_move_insn (reg, shuf);
5706 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5707 }
5708}
5709
5710void
5711spu_builtin_extract (rtx ops[])
5712{
3754d046 5713 machine_mode mode;
644459d0 5714 rtx rot, from, tmp;
5715
5716 mode = GET_MODE (ops[1]);
5717
5718 if (GET_CODE (ops[2]) == CONST_INT)
5719 {
5720 switch (mode)
5721 {
5722 case V16QImode:
5723 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5724 break;
5725 case V8HImode:
5726 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5727 break;
5728 case V4SFmode:
5729 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5730 break;
5731 case V4SImode:
5732 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5733 break;
5734 case V2DImode:
5735 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5736 break;
5737 case V2DFmode:
5738 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5739 break;
5740 default:
5741 abort ();
5742 }
5743 return;
5744 }
5745
5746 from = spu_gen_subreg (TImode, ops[1]);
5747 rot = gen_reg_rtx (TImode);
5748 tmp = gen_reg_rtx (SImode);
5749
5750 switch (mode)
5751 {
5752 case V16QImode:
5753 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5754 break;
5755 case V8HImode:
5756 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5757 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5758 break;
5759 case V4SFmode:
5760 case V4SImode:
5761 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5762 break;
5763 case V2DImode:
5764 case V2DFmode:
5765 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5766 break;
5767 default:
5768 abort ();
5769 }
5770 emit_insn (gen_rotqby_ti (rot, from, tmp));
5771
5772 emit_insn (gen_spu_convert (ops[0], rot));
5773}
5774
5775void
5776spu_builtin_insert (rtx ops[])
5777{
3754d046 5778 machine_mode mode = GET_MODE (ops[0]);
5779 machine_mode imode = GET_MODE_INNER (mode);
644459d0 5780 rtx mask = gen_reg_rtx (TImode);
5781 rtx offset;
5782
5783 if (GET_CODE (ops[3]) == CONST_INT)
5784 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5785 else
5786 {
5787 offset = gen_reg_rtx (SImode);
5788 emit_insn (gen_mulsi3
5789 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5790 }
5791 emit_insn (gen_cpat
5792 (mask, stack_pointer_rtx, offset,
5793 GEN_INT (GET_MODE_SIZE (imode))));
5794 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5795}
5796
5797void
5798spu_builtin_promote (rtx ops[])
5799{
3754d046 5800 machine_mode mode, imode;
644459d0 5801 rtx rot, from, offset;
5802 HOST_WIDE_INT pos;
5803
5804 mode = GET_MODE (ops[0]);
5805 imode = GET_MODE_INNER (mode);
5806
5807 from = gen_reg_rtx (TImode);
5808 rot = spu_gen_subreg (TImode, ops[0]);
5809
5810 emit_insn (gen_spu_convert (from, ops[1]));
5811
5812 if (GET_CODE (ops[2]) == CONST_INT)
5813 {
5814 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5815 if (GET_MODE_SIZE (imode) < 4)
5816 pos += 4 - GET_MODE_SIZE (imode);
5817 offset = GEN_INT (pos & 15);
5818 }
5819 else
5820 {
5821 offset = gen_reg_rtx (SImode);
5822 switch (mode)
5823 {
5824 case V16QImode:
5825 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5826 break;
5827 case V8HImode:
5828 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5829 emit_insn (gen_addsi3 (offset, offset, offset));
5830 break;
5831 case V4SFmode:
5832 case V4SImode:
5833 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5834 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5835 break;
5836 case V2DImode:
5837 case V2DFmode:
5838 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5839 break;
5840 default:
5841 abort ();
5842 }
5843 }
5844 emit_insn (gen_rotqby_ti (rot, from, offset));
5845}
5846
e96f2783 5847static void
5848spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5849{
e96f2783 5850 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5851 rtx shuf = gen_reg_rtx (V4SImode);
5852 rtx insn = gen_reg_rtx (V4SImode);
5853 rtx shufc;
5854 rtx insnc;
5855 rtx mem;
5856
5857 fnaddr = force_reg (SImode, fnaddr);
5858 cxt = force_reg (SImode, cxt);
5859
5860 if (TARGET_LARGE_MEM)
5861 {
5862 rtx rotl = gen_reg_rtx (V4SImode);
5863 rtx mask = gen_reg_rtx (V4SImode);
5864 rtx bi = gen_reg_rtx (SImode);
e96f2783 5865 static unsigned char const shufa[16] = {
644459d0 5866 2, 3, 0, 1, 18, 19, 16, 17,
5867 0, 1, 2, 3, 16, 17, 18, 19
5868 };
e96f2783 5869 static unsigned char const insna[16] = {
644459d0 5870 0x41, 0, 0, 79,
5871 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5872 0x60, 0x80, 0, 79,
5873 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5874 };
5875
5876 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5877 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5878
5879 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5880 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5881 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5882 emit_insn (gen_selb (insn, insnc, rotl, mask));
5883
e96f2783 5884 mem = adjust_address (m_tramp, V4SImode, 0);
5885 emit_move_insn (mem, insn);
644459d0 5886
5887 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5888 mem = adjust_address (m_tramp, Pmode, 16);
5889 emit_move_insn (mem, bi);
644459d0 5890 }
5891 else
5892 {
5893 rtx scxt = gen_reg_rtx (SImode);
5894 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5895 static unsigned char const insna[16] = {
644459d0 5896 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5897 0x30, 0, 0, 0,
5898 0, 0, 0, 0,
5899 0, 0, 0, 0
5900 };
5901
5902 shufc = gen_reg_rtx (TImode);
5903 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5904
5905 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5906 fits 18 bits and the last 4 are zeros. This will be true if
5907 the stack pointer is initialized to 0x3fff0 at program start,
5908 otherwise the ila instruction will be garbage. */
5909
5910 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5911 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5912 emit_insn (gen_cpat
5913 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5914 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5915 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5916
e96f2783 5917 mem = adjust_address (m_tramp, V4SImode, 0);
5918 emit_move_insn (mem, insn);
644459d0 5919 }
5920 emit_insn (gen_sync ());
5921}
5922
08c6cbd2 5923static bool
5924spu_warn_func_return (tree decl)
5925{
5926 /* Naked functions are implemented entirely in assembly, including the
5927 return sequence, so suppress warnings about this. */
5928 return !spu_naked_function_p (decl);
5929}
5930
644459d0 5931void
5932spu_expand_sign_extend (rtx ops[])
5933{
5934 unsigned char arr[16];
5935 rtx pat = gen_reg_rtx (TImode);
5936 rtx sign, c;
5937 int i, last;
5938 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5939 if (GET_MODE (ops[1]) == QImode)
5940 {
5941 sign = gen_reg_rtx (HImode);
5942 emit_insn (gen_extendqihi2 (sign, ops[1]));
5943 for (i = 0; i < 16; i++)
5944 arr[i] = 0x12;
5945 arr[last] = 0x13;
5946 }
5947 else
5948 {
5949 for (i = 0; i < 16; i++)
5950 arr[i] = 0x10;
5951 switch (GET_MODE (ops[1]))
5952 {
5953 case HImode:
5954 sign = gen_reg_rtx (SImode);
5955 emit_insn (gen_extendhisi2 (sign, ops[1]));
5956 arr[last] = 0x03;
5957 arr[last - 1] = 0x02;
5958 break;
5959 case SImode:
5960 sign = gen_reg_rtx (SImode);
5961 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5962 for (i = 0; i < 4; i++)
5963 arr[last - i] = 3 - i;
5964 break;
5965 case DImode:
5966 sign = gen_reg_rtx (SImode);
5967 c = gen_reg_rtx (SImode);
5968 emit_insn (gen_spu_convert (c, ops[1]));
5969 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5970 for (i = 0; i < 8; i++)
5971 arr[last - i] = 7 - i;
5972 break;
5973 default:
5974 abort ();
5975 }
5976 }
5977 emit_move_insn (pat, array_to_constant (TImode, arr));
5978 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5979}
5980
5981/* expand vector initialization. If there are any constant parts,
5982 load constant parts first. Then load any non-constant parts. */
5983void
5984spu_expand_vector_init (rtx target, rtx vals)
5985{
3754d046 5986 machine_mode mode = GET_MODE (target);
644459d0 5987 int n_elts = GET_MODE_NUNITS (mode);
5988 int n_var = 0;
5989 bool all_same = true;
790c536c 5990 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5991 int i;
5992
5993 first = XVECEXP (vals, 0, 0);
5994 for (i = 0; i < n_elts; ++i)
5995 {
5996 x = XVECEXP (vals, 0, i);
e442af0b 5997 if (!(CONST_INT_P (x)
5998 || GET_CODE (x) == CONST_DOUBLE
5999 || GET_CODE (x) == CONST_FIXED))
644459d0 6000 ++n_var;
6001 else
6002 {
6003 if (first_constant == NULL_RTX)
6004 first_constant = x;
6005 }
6006 if (i > 0 && !rtx_equal_p (x, first))
6007 all_same = false;
6008 }
6009
6010 /* if all elements are the same, use splats to repeat elements */
6011 if (all_same)
6012 {
6013 if (!CONSTANT_P (first)
6014 && !register_operand (first, GET_MODE (x)))
6015 first = force_reg (GET_MODE (first), first);
6016 emit_insn (gen_spu_splats (target, first));
6017 return;
6018 }
6019
6020 /* load constant parts */
6021 if (n_var != n_elts)
6022 {
6023 if (n_var == 0)
6024 {
6025 emit_move_insn (target,
6026 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6027 }
6028 else
6029 {
6030 rtx constant_parts_rtx = copy_rtx (vals);
6031
6032 gcc_assert (first_constant != NULL_RTX);
6033 /* fill empty slots with the first constant, this increases
6034 our chance of using splats in the recursive call below. */
6035 for (i = 0; i < n_elts; ++i)
e442af0b 6036 {
6037 x = XVECEXP (constant_parts_rtx, 0, i);
6038 if (!(CONST_INT_P (x)
6039 || GET_CODE (x) == CONST_DOUBLE
6040 || GET_CODE (x) == CONST_FIXED))
6041 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6042 }
644459d0 6043
6044 spu_expand_vector_init (target, constant_parts_rtx);
6045 }
6046 }
6047
6048 /* load variable parts */
6049 if (n_var != 0)
6050 {
6051 rtx insert_operands[4];
6052
6053 insert_operands[0] = target;
6054 insert_operands[2] = target;
6055 for (i = 0; i < n_elts; ++i)
6056 {
6057 x = XVECEXP (vals, 0, i);
e442af0b 6058 if (!(CONST_INT_P (x)
6059 || GET_CODE (x) == CONST_DOUBLE
6060 || GET_CODE (x) == CONST_FIXED))
644459d0 6061 {
6062 if (!register_operand (x, GET_MODE (x)))
6063 x = force_reg (GET_MODE (x), x);
6064 insert_operands[1] = x;
6065 insert_operands[3] = GEN_INT (i);
6066 spu_builtin_insert (insert_operands);
6067 }
6068 }
6069 }
6070}
6352eedf 6071
5474166e 6072/* Return insn index for the vector compare instruction for given CODE,
6073 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6074
6075static int
6076get_vec_cmp_insn (enum rtx_code code,
3754d046 6077 machine_mode dest_mode,
6078 machine_mode op_mode)
5474166e 6079
6080{
6081 switch (code)
6082 {
6083 case EQ:
6084 if (dest_mode == V16QImode && op_mode == V16QImode)
6085 return CODE_FOR_ceq_v16qi;
6086 if (dest_mode == V8HImode && op_mode == V8HImode)
6087 return CODE_FOR_ceq_v8hi;
6088 if (dest_mode == V4SImode && op_mode == V4SImode)
6089 return CODE_FOR_ceq_v4si;
6090 if (dest_mode == V4SImode && op_mode == V4SFmode)
6091 return CODE_FOR_ceq_v4sf;
6092 if (dest_mode == V2DImode && op_mode == V2DFmode)
6093 return CODE_FOR_ceq_v2df;
6094 break;
6095 case GT:
6096 if (dest_mode == V16QImode && op_mode == V16QImode)
6097 return CODE_FOR_cgt_v16qi;
6098 if (dest_mode == V8HImode && op_mode == V8HImode)
6099 return CODE_FOR_cgt_v8hi;
6100 if (dest_mode == V4SImode && op_mode == V4SImode)
6101 return CODE_FOR_cgt_v4si;
6102 if (dest_mode == V4SImode && op_mode == V4SFmode)
6103 return CODE_FOR_cgt_v4sf;
6104 if (dest_mode == V2DImode && op_mode == V2DFmode)
6105 return CODE_FOR_cgt_v2df;
6106 break;
6107 case GTU:
6108 if (dest_mode == V16QImode && op_mode == V16QImode)
6109 return CODE_FOR_clgt_v16qi;
6110 if (dest_mode == V8HImode && op_mode == V8HImode)
6111 return CODE_FOR_clgt_v8hi;
6112 if (dest_mode == V4SImode && op_mode == V4SImode)
6113 return CODE_FOR_clgt_v4si;
6114 break;
6115 default:
6116 break;
6117 }
6118 return -1;
6119}
6120
6121/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6122 DMODE is expected destination mode. This is a recursive function. */
6123
6124static rtx
6125spu_emit_vector_compare (enum rtx_code rcode,
6126 rtx op0, rtx op1,
3754d046 6127 machine_mode dmode)
5474166e 6128{
6129 int vec_cmp_insn;
6130 rtx mask;
3754d046 6131 machine_mode dest_mode;
6132 machine_mode op_mode = GET_MODE (op1);
5474166e 6133
6134 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6135
6136 /* Floating point vector compare instructions uses destination V4SImode.
6137 Double floating point vector compare instructions uses destination V2DImode.
6138 Move destination to appropriate mode later. */
6139 if (dmode == V4SFmode)
6140 dest_mode = V4SImode;
6141 else if (dmode == V2DFmode)
6142 dest_mode = V2DImode;
6143 else
6144 dest_mode = dmode;
6145
6146 mask = gen_reg_rtx (dest_mode);
6147 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6148
6149 if (vec_cmp_insn == -1)
6150 {
6151 bool swap_operands = false;
6152 bool try_again = false;
6153 switch (rcode)
6154 {
6155 case LT:
6156 rcode = GT;
6157 swap_operands = true;
6158 try_again = true;
6159 break;
6160 case LTU:
6161 rcode = GTU;
6162 swap_operands = true;
6163 try_again = true;
6164 break;
6165 case NE:
e20943d4 6166 case UNEQ:
6167 case UNLE:
6168 case UNLT:
6169 case UNGE:
6170 case UNGT:
6171 case UNORDERED:
5474166e 6172 /* Treat A != B as ~(A==B). */
6173 {
e20943d4 6174 enum rtx_code rev_code;
5474166e 6175 enum insn_code nor_code;
e20943d4 6176 rtx rev_mask;
6177
6178 rev_code = reverse_condition_maybe_unordered (rcode);
6179 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6180
d6bf3b14 6181 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6182 gcc_assert (nor_code != CODE_FOR_nothing);
e20943d4 6183 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
5474166e 6184 if (dmode != dest_mode)
6185 {
6186 rtx temp = gen_reg_rtx (dest_mode);
6187 convert_move (temp, mask, 0);
6188 return temp;
6189 }
6190 return mask;
6191 }
6192 break;
6193 case GE:
6194 case GEU:
6195 case LE:
6196 case LEU:
6197 /* Try GT/GTU/LT/LTU OR EQ */
6198 {
6199 rtx c_rtx, eq_rtx;
6200 enum insn_code ior_code;
6201 enum rtx_code new_code;
6202
6203 switch (rcode)
6204 {
6205 case GE: new_code = GT; break;
6206 case GEU: new_code = GTU; break;
6207 case LE: new_code = LT; break;
6208 case LEU: new_code = LTU; break;
6209 default:
6210 gcc_unreachable ();
6211 }
6212
6213 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6214 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6215
d6bf3b14 6216 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6217 gcc_assert (ior_code != CODE_FOR_nothing);
6218 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6219 if (dmode != dest_mode)
6220 {
6221 rtx temp = gen_reg_rtx (dest_mode);
6222 convert_move (temp, mask, 0);
6223 return temp;
6224 }
6225 return mask;
6226 }
6227 break;
e20943d4 6228 case LTGT:
6229 /* Try LT OR GT */
6230 {
6231 rtx lt_rtx, gt_rtx;
6232 enum insn_code ior_code;
6233
6234 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6235 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6236
6237 ior_code = optab_handler (ior_optab, dest_mode);
6238 gcc_assert (ior_code != CODE_FOR_nothing);
6239 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6240 if (dmode != dest_mode)
6241 {
6242 rtx temp = gen_reg_rtx (dest_mode);
6243 convert_move (temp, mask, 0);
6244 return temp;
6245 }
6246 return mask;
6247 }
6248 break;
6249 case ORDERED:
6250 /* Implement as (A==A) & (B==B) */
6251 {
6252 rtx a_rtx, b_rtx;
6253 enum insn_code and_code;
6254
6255 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6256 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6257
6258 and_code = optab_handler (and_optab, dest_mode);
6259 gcc_assert (and_code != CODE_FOR_nothing);
6260 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6261 if (dmode != dest_mode)
6262 {
6263 rtx temp = gen_reg_rtx (dest_mode);
6264 convert_move (temp, mask, 0);
6265 return temp;
6266 }
6267 return mask;
6268 }
6269 break;
5474166e 6270 default:
6271 gcc_unreachable ();
6272 }
6273
6274 /* You only get two chances. */
6275 if (try_again)
6276 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6277
6278 gcc_assert (vec_cmp_insn != -1);
6279
6280 if (swap_operands)
6281 {
6282 rtx tmp;
6283 tmp = op0;
6284 op0 = op1;
6285 op1 = tmp;
6286 }
6287 }
6288
6289 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6290 if (dmode != dest_mode)
6291 {
6292 rtx temp = gen_reg_rtx (dest_mode);
6293 convert_move (temp, mask, 0);
6294 return temp;
6295 }
6296 return mask;
6297}
6298
6299
6300/* Emit vector conditional expression.
6301 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6302 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6303
6304int
6305spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6306 rtx cond, rtx cc_op0, rtx cc_op1)
6307{
3754d046 6308 machine_mode dest_mode = GET_MODE (dest);
5474166e 6309 enum rtx_code rcode = GET_CODE (cond);
6310 rtx mask;
6311
6312 /* Get the vector mask for the given relational operations. */
6313 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6314
6315 emit_insn(gen_selb (dest, op2, op1, mask));
6316
6317 return 1;
6318}
6319
6352eedf 6320static rtx
3754d046 6321spu_force_reg (machine_mode mode, rtx op)
6352eedf 6322{
6323 rtx x, r;
6324 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6325 {
6326 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6327 || GET_MODE (op) == BLKmode)
6328 return force_reg (mode, convert_to_mode (mode, op, 0));
6329 abort ();
6330 }
6331
6332 r = force_reg (GET_MODE (op), op);
6333 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6334 {
6335 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6336 if (x)
6337 return x;
6338 }
6339
6340 x = gen_reg_rtx (mode);
6341 emit_insn (gen_spu_convert (x, r));
6342 return x;
6343}
6344
6345static void
6346spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6347{
6348 HOST_WIDE_INT v = 0;
6349 int lsbits;
6350 /* Check the range of immediate operands. */
6351 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6352 {
6353 int range = p - SPU_BTI_7;
5df189be 6354
6355 if (!CONSTANT_P (op))
bf776685 6356 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6357 d->name,
6358 spu_builtin_range[range].low, spu_builtin_range[range].high);
6359
6360 if (GET_CODE (op) == CONST
6361 && (GET_CODE (XEXP (op, 0)) == PLUS
6362 || GET_CODE (XEXP (op, 0)) == MINUS))
6363 {
6364 v = INTVAL (XEXP (XEXP (op, 0), 1));
6365 op = XEXP (XEXP (op, 0), 0);
6366 }
6367 else if (GET_CODE (op) == CONST_INT)
6368 v = INTVAL (op);
5df189be 6369 else if (GET_CODE (op) == CONST_VECTOR
6370 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6371 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6372
6373 /* The default for v is 0 which is valid in every range. */
6374 if (v < spu_builtin_range[range].low
6375 || v > spu_builtin_range[range].high)
bf776685 6376 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6377 d->name,
6378 spu_builtin_range[range].low, spu_builtin_range[range].high,
6379 v);
6352eedf 6380
6381 switch (p)
6382 {
6383 case SPU_BTI_S10_4:
6384 lsbits = 4;
6385 break;
6386 case SPU_BTI_U16_2:
6387 /* This is only used in lqa, and stqa. Even though the insns
6388 encode 16 bits of the address (all but the 2 least
6389 significant), only 14 bits are used because it is masked to
6390 be 16 byte aligned. */
6391 lsbits = 4;
6392 break;
6393 case SPU_BTI_S16_2:
6394 /* This is used for lqr and stqr. */
6395 lsbits = 2;
6396 break;
6397 default:
6398 lsbits = 0;
6399 }
6400
6401 if (GET_CODE (op) == LABEL_REF
6402 || (GET_CODE (op) == SYMBOL_REF
6403 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6404 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6405 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6406 d->name);
6407 }
6408}
6409
6410
70ca06f8 6411static int
5df189be 6412expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6413 rtx target, rtx ops[])
6414{
bc620c5c 6415 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6416 int i = 0, a;
6352eedf 6417
6418 /* Expand the arguments into rtl. */
6419
6420 if (d->parm[0] != SPU_BTI_VOID)
6421 ops[i++] = target;
6422
70ca06f8 6423 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6424 {
5df189be 6425 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6426 if (arg == 0)
6427 abort ();
b9c74b4d 6428 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6429 }
70ca06f8 6430
32f79657 6431 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6432 return i;
6352eedf 6433}
6434
6435static rtx
6436spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6437 tree exp, rtx target)
6352eedf 6438{
6439 rtx pat;
6440 rtx ops[8];
bc620c5c 6441 enum insn_code icode = (enum insn_code) d->icode;
3754d046 6442 machine_mode mode, tmode;
6352eedf 6443 int i, p;
70ca06f8 6444 int n_operands;
6352eedf 6445 tree return_type;
6446
6447 /* Set up ops[] with values from arglist. */
70ca06f8 6448 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6449
6450 /* Handle the target operand which must be operand 0. */
6451 i = 0;
6452 if (d->parm[0] != SPU_BTI_VOID)
6453 {
6454
6455 /* We prefer the mode specified for the match_operand otherwise
6456 use the mode from the builtin function prototype. */
6457 tmode = insn_data[d->icode].operand[0].mode;
6458 if (tmode == VOIDmode)
6459 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6460
6461 /* Try to use target because not using it can lead to extra copies
6462 and when we are using all of the registers extra copies leads
6463 to extra spills. */
6464 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6465 ops[0] = target;
6466 else
6467 target = ops[0] = gen_reg_rtx (tmode);
6468
6469 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6470 abort ();
6471
6472 i++;
6473 }
6474
a76866d3 6475 if (d->fcode == SPU_MASK_FOR_LOAD)
6476 {
3754d046 6477 machine_mode mode = insn_data[icode].operand[1].mode;
a76866d3 6478 tree arg;
6479 rtx addr, op, pat;
6480
6481 /* get addr */
5df189be 6482 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6483 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6484 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6485 addr = memory_address (mode, op);
6486
6487 /* negate addr */
6488 op = gen_reg_rtx (GET_MODE (addr));
6489 emit_insn (gen_rtx_SET (VOIDmode, op,
6490 gen_rtx_NEG (GET_MODE (addr), addr)));
6491 op = gen_rtx_MEM (mode, op);
6492
6493 pat = GEN_FCN (icode) (target, op);
6494 if (!pat)
6495 return 0;
6496 emit_insn (pat);
6497 return target;
6498 }
6499
6352eedf 6500 /* Ignore align_hint, but still expand it's args in case they have
6501 side effects. */
6502 if (icode == CODE_FOR_spu_align_hint)
6503 return 0;
6504
6505 /* Handle the rest of the operands. */
70ca06f8 6506 for (p = 1; i < n_operands; i++, p++)
6352eedf 6507 {
6508 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6509 mode = insn_data[d->icode].operand[i].mode;
6510 else
6511 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6512
6513 /* mode can be VOIDmode here for labels */
6514
6515 /* For specific intrinsics with an immediate operand, e.g.,
6516 si_ai(), we sometimes need to convert the scalar argument to a
6517 vector argument by splatting the scalar. */
6518 if (VECTOR_MODE_P (mode)
6519 && (GET_CODE (ops[i]) == CONST_INT
6520 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6521 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6522 {
6523 if (GET_CODE (ops[i]) == CONST_INT)
6524 ops[i] = spu_const (mode, INTVAL (ops[i]));
6525 else
6526 {
6527 rtx reg = gen_reg_rtx (mode);
3754d046 6528 machine_mode imode = GET_MODE_INNER (mode);
6352eedf 6529 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6530 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6531 if (imode != GET_MODE (ops[i]))
6532 ops[i] = convert_to_mode (imode, ops[i],
6533 TYPE_UNSIGNED (spu_builtin_types
6534 [d->parm[i]]));
6535 emit_insn (gen_spu_splats (reg, ops[i]));
6536 ops[i] = reg;
6537 }
6538 }
6539
5df189be 6540 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6541
6352eedf 6542 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6543 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6544 }
6545
70ca06f8 6546 switch (n_operands)
6352eedf 6547 {
6548 case 0:
6549 pat = GEN_FCN (icode) (0);
6550 break;
6551 case 1:
6552 pat = GEN_FCN (icode) (ops[0]);
6553 break;
6554 case 2:
6555 pat = GEN_FCN (icode) (ops[0], ops[1]);
6556 break;
6557 case 3:
6558 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6559 break;
6560 case 4:
6561 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6562 break;
6563 case 5:
6564 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6565 break;
6566 case 6:
6567 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6568 break;
6569 default:
6570 abort ();
6571 }
6572
6573 if (!pat)
6574 abort ();
6575
6576 if (d->type == B_CALL || d->type == B_BISLED)
6577 emit_call_insn (pat);
6578 else if (d->type == B_JUMP)
6579 {
6580 emit_jump_insn (pat);
6581 emit_barrier ();
6582 }
6583 else
6584 emit_insn (pat);
6585
6586 return_type = spu_builtin_types[d->parm[0]];
6587 if (d->parm[0] != SPU_BTI_VOID
6588 && GET_MODE (target) != TYPE_MODE (return_type))
6589 {
6590 /* target is the return value. It should always be the mode of
6591 the builtin function prototype. */
6592 target = spu_force_reg (TYPE_MODE (return_type), target);
6593 }
6594
6595 return target;
6596}
6597
6598rtx
6599spu_expand_builtin (tree exp,
6600 rtx target,
6601 rtx subtarget ATTRIBUTE_UNUSED,
3754d046 6602 machine_mode mode ATTRIBUTE_UNUSED,
6352eedf 6603 int ignore ATTRIBUTE_UNUSED)
6604{
5df189be 6605 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6606 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6607 struct spu_builtin_description *d;
6608
6609 if (fcode < NUM_SPU_BUILTINS)
6610 {
6611 d = &spu_builtins[fcode];
6612
5df189be 6613 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6614 }
6615 abort ();
6616}
6617
a76866d3 6618/* Implement targetm.vectorize.builtin_mask_for_load. */
6619static tree
6620spu_builtin_mask_for_load (void)
6621{
0c5c4d59 6622 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6623}
5df189be 6624
a28df51d 6625/* Implement targetm.vectorize.builtin_vectorization_cost. */
6626static int
0822b158 6627spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
d13adc77 6628 tree vectype,
0822b158 6629 int misalign ATTRIBUTE_UNUSED)
559093aa 6630{
d13adc77 6631 unsigned elements;
6632
559093aa 6633 switch (type_of_cost)
6634 {
6635 case scalar_stmt:
6636 case vector_stmt:
6637 case vector_load:
6638 case vector_store:
6639 case vec_to_scalar:
6640 case scalar_to_vec:
6641 case cond_branch_not_taken:
6642 case vec_perm:
5df2530b 6643 case vec_promote_demote:
559093aa 6644 return 1;
6645
6646 case scalar_store:
6647 return 10;
6648
6649 case scalar_load:
6650 /* Load + rotate. */
6651 return 2;
6652
6653 case unaligned_load:
6654 return 2;
6655
6656 case cond_branch_taken:
6657 return 6;
6658
d13adc77 6659 case vec_construct:
6660 elements = TYPE_VECTOR_SUBPARTS (vectype);
6661 return elements / 2 + 1;
6662
559093aa 6663 default:
6664 gcc_unreachable ();
6665 }
a28df51d 6666}
6667
4db2b577 6668/* Implement targetm.vectorize.init_cost. */
6669
61b33788 6670static void *
4db2b577 6671spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6672{
f97dec81 6673 unsigned *cost = XNEWVEC (unsigned, 3);
6674 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
4db2b577 6675 return cost;
6676}
6677
6678/* Implement targetm.vectorize.add_stmt_cost. */
6679
61b33788 6680static unsigned
4db2b577 6681spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
f97dec81 6682 struct _stmt_vec_info *stmt_info, int misalign,
6683 enum vect_cost_model_location where)
4db2b577 6684{
6685 unsigned *cost = (unsigned *) data;
6686 unsigned retval = 0;
6687
6688 if (flag_vect_cost_model)
6689 {
f97dec81 6690 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4db2b577 6691 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6692
6693 /* Statements in an inner loop relative to the loop being
6694 vectorized are weighted more heavily. The value here is
6695 arbitrary and could potentially be improved with analysis. */
f97dec81 6696 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4db2b577 6697 count *= 50; /* FIXME. */
6698
6699 retval = (unsigned) (count * stmt_cost);
f97dec81 6700 cost[where] += retval;
4db2b577 6701 }
6702
6703 return retval;
6704}
6705
6706/* Implement targetm.vectorize.finish_cost. */
6707
f97dec81 6708static void
6709spu_finish_cost (void *data, unsigned *prologue_cost,
6710 unsigned *body_cost, unsigned *epilogue_cost)
4db2b577 6711{
f97dec81 6712 unsigned *cost = (unsigned *) data;
6713 *prologue_cost = cost[vect_prologue];
6714 *body_cost = cost[vect_body];
6715 *epilogue_cost = cost[vect_epilogue];
4db2b577 6716}
6717
6718/* Implement targetm.vectorize.destroy_cost_data. */
6719
61b33788 6720static void
4db2b577 6721spu_destroy_cost_data (void *data)
6722{
6723 free (data);
6724}
6725
0e87db76 6726/* Return true iff, data reference of TYPE can reach vector alignment (16)
6727 after applying N number of iterations. This routine does not determine
6728 how may iterations are required to reach desired alignment. */
6729
6730static bool
a9f1838b 6731spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6732{
6733 if (is_packed)
6734 return false;
6735
6736 /* All other types are naturally aligned. */
6737 return true;
6738}
6739
6cf5579e 6740/* Return the appropriate mode for a named address pointer. */
3754d046 6741static machine_mode
6cf5579e 6742spu_addr_space_pointer_mode (addr_space_t addrspace)
6743{
6744 switch (addrspace)
6745 {
6746 case ADDR_SPACE_GENERIC:
6747 return ptr_mode;
6748 case ADDR_SPACE_EA:
6749 return EAmode;
6750 default:
6751 gcc_unreachable ();
6752 }
6753}
6754
6755/* Return the appropriate mode for a named address address. */
3754d046 6756static machine_mode
6cf5579e 6757spu_addr_space_address_mode (addr_space_t addrspace)
6758{
6759 switch (addrspace)
6760 {
6761 case ADDR_SPACE_GENERIC:
6762 return Pmode;
6763 case ADDR_SPACE_EA:
6764 return EAmode;
6765 default:
6766 gcc_unreachable ();
6767 }
6768}
6769
6770/* Determine if one named address space is a subset of another. */
6771
6772static bool
6773spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6774{
6775 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6776 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6777
6778 if (subset == superset)
6779 return true;
6780
6781 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6782 being subsets but instead as disjoint address spaces. */
6783 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6784 return false;
6785
6786 else
6787 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6788}
6789
6790/* Convert from one address space to another. */
6791static rtx
6792spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6793{
6794 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6795 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6796
6797 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6798 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6799
6800 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6801 {
6802 rtx result, ls;
6803
6804 ls = gen_const_mem (DImode,
6805 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6806 set_mem_align (ls, 128);
6807
6808 result = gen_reg_rtx (Pmode);
6809 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6810 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6811 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6812 ls, const0_rtx, Pmode, 1);
6813
6814 emit_insn (gen_subsi3 (result, op, ls));
6815
6816 return result;
6817 }
6818
6819 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6820 {
6821 rtx result, ls;
6822
6823 ls = gen_const_mem (DImode,
6824 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6825 set_mem_align (ls, 128);
6826
6827 result = gen_reg_rtx (EAmode);
6828 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6829 op = force_reg (Pmode, op);
6830 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6831 ls, const0_rtx, EAmode, 1);
6832 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6833
6834 if (EAmode == SImode)
6835 emit_insn (gen_addsi3 (result, op, ls));
6836 else
6837 emit_insn (gen_adddi3 (result, op, ls));
6838
6839 return result;
6840 }
6841
6842 else
6843 gcc_unreachable ();
6844}
6845
6846
d52fd16a 6847/* Count the total number of instructions in each pipe and return the
6848 maximum, which is used as the Minimum Iteration Interval (MII)
6849 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6850 -2 are instructions that can go in pipe0 or pipe1. */
6851static int
6852spu_sms_res_mii (struct ddg *g)
6853{
6854 int i;
6855 unsigned t[4] = {0, 0, 0, 0};
6856
6857 for (i = 0; i < g->num_nodes; i++)
6858 {
0af56f80 6859 rtx_insn *insn = g->nodes[i].insn;
d52fd16a 6860 int p = get_pipe (insn) + 2;
6861
1e944a0b 6862 gcc_assert (p >= 0);
6863 gcc_assert (p < 4);
d52fd16a 6864
6865 t[p]++;
6866 if (dump_file && INSN_P (insn))
6867 fprintf (dump_file, "i%d %s %d %d\n",
6868 INSN_UID (insn),
6869 insn_data[INSN_CODE(insn)].name,
6870 p, t[p]);
6871 }
6872 if (dump_file)
6873 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6874
6875 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6876}
6877
6878
5df189be 6879void
6880spu_init_expanders (void)
9d98604b 6881{
5df189be 6882 if (cfun)
9d98604b 6883 {
6884 rtx r0, r1;
6885 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6886 frame_pointer_needed is true. We don't know that until we're
6887 expanding the prologue. */
6888 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6889
6890 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6891 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6892 to be treated as aligned, so generate them here. */
6893 r0 = gen_reg_rtx (SImode);
6894 r1 = gen_reg_rtx (SImode);
6895 mark_reg_pointer (r0, 128);
6896 mark_reg_pointer (r1, 128);
6897 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6898 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6899 }
ea32e033 6900}
6901
3754d046 6902static machine_mode
ea32e033 6903spu_libgcc_cmp_return_mode (void)
6904{
6905
6906/* For SPU word mode is TI mode so it is better to use SImode
6907 for compare returns. */
6908 return SImode;
6909}
6910
3754d046 6911static machine_mode
ea32e033 6912spu_libgcc_shift_count_mode (void)
6913{
6914/* For SPU word mode is TI mode so it is better to use SImode
6915 for shift counts. */
6916 return SImode;
6917}
5a976006 6918
a08dfd55 6919/* Implement targetm.section_type_flags. */
6920static unsigned int
6921spu_section_type_flags (tree decl, const char *name, int reloc)
6922{
6923 /* .toe needs to have type @nobits. */
6924 if (strcmp (name, ".toe") == 0)
6925 return SECTION_BSS;
6cf5579e 6926 /* Don't load _ea into the current address space. */
6927 if (strcmp (name, "._ea") == 0)
6928 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6929 return default_section_type_flags (decl, name, reloc);
6930}
c2233b46 6931
6cf5579e 6932/* Implement targetm.select_section. */
6933static section *
6934spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6935{
6936 /* Variables and constants defined in the __ea address space
6937 go into a special section named "._ea". */
6938 if (TREE_TYPE (decl) != error_mark_node
6939 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6940 {
6941 /* We might get called with string constants, but get_named_section
6942 doesn't like them as they are not DECLs. Also, we need to set
6943 flags in that case. */
6944 if (!DECL_P (decl))
6945 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6946
6947 return get_named_section (decl, "._ea", reloc);
6948 }
6949
6950 return default_elf_select_section (decl, reloc, align);
6951}
6952
6953/* Implement targetm.unique_section. */
6954static void
6955spu_unique_section (tree decl, int reloc)
6956{
6957 /* We don't support unique section names in the __ea address
6958 space for now. */
6959 if (TREE_TYPE (decl) != error_mark_node
6960 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6961 return;
6962
6963 default_unique_section (decl, reloc);
6964}
6965
56c7bfc2 6966/* Generate a constant or register which contains 2^SCALE. We assume
6967 the result is valid for MODE. Currently, MODE must be V4SFmode and
6968 SCALE must be SImode. */
6969rtx
3754d046 6970spu_gen_exp2 (machine_mode mode, rtx scale)
56c7bfc2 6971{
6972 gcc_assert (mode == V4SFmode);
6973 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6974 if (GET_CODE (scale) != CONST_INT)
6975 {
6976 /* unsigned int exp = (127 + scale) << 23;
6977 __vector float m = (__vector float) spu_splats (exp); */
6978 rtx reg = force_reg (SImode, scale);
6979 rtx exp = gen_reg_rtx (SImode);
6980 rtx mul = gen_reg_rtx (mode);
6981 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6982 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6983 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6984 return mul;
6985 }
6986 else
6987 {
6988 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6989 unsigned char arr[16];
6990 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6991 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6992 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6993 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6994 return array_to_constant (mode, arr);
6995 }
6996}
6997
9d98604b 6998/* After reload, just change the convert into a move instruction
6999 or a dead instruction. */
7000void
7001spu_split_convert (rtx ops[])
7002{
7003 if (REGNO (ops[0]) == REGNO (ops[1]))
7004 emit_note (NOTE_INSN_DELETED);
7005 else
7006 {
7007 /* Use TImode always as this might help hard reg copyprop. */
7008 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7009 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7010 emit_insn (gen_move_insn (op0, op1));
7011 }
7012}
7013
b3878a6c 7014void
4cbad5bb 7015spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7016{
7017 fprintf (file, "# profile\n");
7018 fprintf (file, "brsl $75, _mcount\n");
7019}
7020
329c1e4e 7021/* Implement targetm.ref_may_alias_errno. */
7022static bool
7023spu_ref_may_alias_errno (ao_ref *ref)
7024{
7025 tree base = ao_ref_base (ref);
7026
7027 /* With SPU newlib, errno is defined as something like
7028 _impure_data._errno
7029 The default implementation of this target macro does not
7030 recognize such expressions, so special-code for it here. */
7031
7032 if (TREE_CODE (base) == VAR_DECL
7033 && !TREE_STATIC (base)
7034 && DECL_EXTERNAL (base)
7035 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7036 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7037 "_impure_data") == 0
7038 /* _errno is the first member of _impure_data. */
7039 && ref->offset == 0)
7040 return true;
7041
7042 return default_ref_may_alias_errno (ref);
7043}
7044
f17d2d13 7045/* Output thunk to FILE that implements a C++ virtual function call (with
7046 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7047 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7048 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7049 relative to the resulting this pointer. */
7050
7051static void
7052spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7053 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7054 tree function)
7055{
7056 rtx op[8];
7057
7058 /* Make sure unwind info is emitted for the thunk if needed. */
7059 final_start_function (emit_barrier (), file, 1);
7060
7061 /* Operand 0 is the target function. */
7062 op[0] = XEXP (DECL_RTL (function), 0);
7063
7064 /* Operand 1 is the 'this' pointer. */
7065 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7066 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7067 else
7068 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7069
7070 /* Operands 2/3 are the low/high halfwords of delta. */
7071 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7072 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7073
7074 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7075 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7076 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7077
7078 /* Operands 6/7 are temporary registers. */
7079 op[6] = gen_rtx_REG (Pmode, 79);
7080 op[7] = gen_rtx_REG (Pmode, 78);
7081
7082 /* Add DELTA to this pointer. */
7083 if (delta)
7084 {
7085 if (delta >= -0x200 && delta < 0x200)
7086 output_asm_insn ("ai\t%1,%1,%2", op);
7087 else if (delta >= -0x8000 && delta < 0x8000)
7088 {
7089 output_asm_insn ("il\t%6,%2", op);
7090 output_asm_insn ("a\t%1,%1,%6", op);
7091 }
7092 else
7093 {
7094 output_asm_insn ("ilhu\t%6,%3", op);
7095 output_asm_insn ("iohl\t%6,%2", op);
7096 output_asm_insn ("a\t%1,%1,%6", op);
7097 }
7098 }
7099
7100 /* Perform vcall adjustment. */
7101 if (vcall_offset)
7102 {
7103 output_asm_insn ("lqd\t%7,0(%1)", op);
7104 output_asm_insn ("rotqby\t%7,%7,%1", op);
7105
7106 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7107 output_asm_insn ("ai\t%7,%7,%4", op);
7108 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7109 {
7110 output_asm_insn ("il\t%6,%4", op);
7111 output_asm_insn ("a\t%7,%7,%6", op);
7112 }
7113 else
7114 {
7115 output_asm_insn ("ilhu\t%6,%5", op);
7116 output_asm_insn ("iohl\t%6,%4", op);
7117 output_asm_insn ("a\t%7,%7,%6", op);
7118 }
7119
7120 output_asm_insn ("lqd\t%6,0(%7)", op);
7121 output_asm_insn ("rotqby\t%6,%6,%7", op);
7122 output_asm_insn ("a\t%1,%1,%6", op);
7123 }
7124
7125 /* Jump to target. */
7126 output_asm_insn ("br\t%0", op);
7127
7128 final_end_function ();
7129}
7130
d5065e6e 7131/* Canonicalize a comparison from one we don't have to one we do have. */
7132static void
7133spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7134 bool op0_preserve_value)
7135{
7136 if (!op0_preserve_value
7137 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7138 {
7139 rtx tem = *op0;
7140 *op0 = *op1;
7141 *op1 = tem;
7142 *code = (int)swap_condition ((enum rtx_code)*code);
7143 }
7144}
3defb88e 7145\f
7146/* Table of machine attributes. */
7147static const struct attribute_spec spu_attribute_table[] =
7148{
7149 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7150 affects_type_identity } */
7151 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7152 false },
7153 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7154 false },
7155 { NULL, 0, 0, false, false, false, NULL, false }
7156};
7157
7158/* TARGET overrides. */
7159
7160#undef TARGET_ADDR_SPACE_POINTER_MODE
7161#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7162
7163#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7164#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7165
7166#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7167#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7168 spu_addr_space_legitimate_address_p
7169
7170#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7171#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7172
7173#undef TARGET_ADDR_SPACE_SUBSET_P
7174#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7175
7176#undef TARGET_ADDR_SPACE_CONVERT
7177#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7178
7179#undef TARGET_INIT_BUILTINS
7180#define TARGET_INIT_BUILTINS spu_init_builtins
7181#undef TARGET_BUILTIN_DECL
7182#define TARGET_BUILTIN_DECL spu_builtin_decl
7183
7184#undef TARGET_EXPAND_BUILTIN
7185#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7186
7187#undef TARGET_UNWIND_WORD_MODE
7188#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7189
7190#undef TARGET_LEGITIMIZE_ADDRESS
7191#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7192
7193/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7194 and .quad for the debugger. When it is known that the assembler is fixed,
7195 these can be removed. */
7196#undef TARGET_ASM_UNALIGNED_SI_OP
7197#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7198
7199#undef TARGET_ASM_ALIGNED_DI_OP
7200#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7201
7202/* The .8byte directive doesn't seem to work well for a 32 bit
7203 architecture. */
7204#undef TARGET_ASM_UNALIGNED_DI_OP
7205#define TARGET_ASM_UNALIGNED_DI_OP NULL
7206
7207#undef TARGET_RTX_COSTS
7208#define TARGET_RTX_COSTS spu_rtx_costs
7209
7210#undef TARGET_ADDRESS_COST
d9c5e5f4 7211#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3defb88e 7212
7213#undef TARGET_SCHED_ISSUE_RATE
7214#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7215
7216#undef TARGET_SCHED_INIT_GLOBAL
7217#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7218
7219#undef TARGET_SCHED_INIT
7220#define TARGET_SCHED_INIT spu_sched_init
7221
7222#undef TARGET_SCHED_VARIABLE_ISSUE
7223#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7224
7225#undef TARGET_SCHED_REORDER
7226#define TARGET_SCHED_REORDER spu_sched_reorder
7227
7228#undef TARGET_SCHED_REORDER2
7229#define TARGET_SCHED_REORDER2 spu_sched_reorder
7230
7231#undef TARGET_SCHED_ADJUST_COST
7232#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7233
7234#undef TARGET_ATTRIBUTE_TABLE
7235#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7236
7237#undef TARGET_ASM_INTEGER
7238#define TARGET_ASM_INTEGER spu_assemble_integer
7239
7240#undef TARGET_SCALAR_MODE_SUPPORTED_P
7241#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7242
7243#undef TARGET_VECTOR_MODE_SUPPORTED_P
7244#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7245
7246#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7247#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7248
7249#undef TARGET_ASM_GLOBALIZE_LABEL
7250#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7251
7252#undef TARGET_PASS_BY_REFERENCE
7253#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7254
7255#undef TARGET_FUNCTION_ARG
7256#define TARGET_FUNCTION_ARG spu_function_arg
7257
7258#undef TARGET_FUNCTION_ARG_ADVANCE
7259#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7260
7261#undef TARGET_MUST_PASS_IN_STACK
7262#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7263
7264#undef TARGET_BUILD_BUILTIN_VA_LIST
7265#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7266
7267#undef TARGET_EXPAND_BUILTIN_VA_START
7268#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7269
7270#undef TARGET_SETUP_INCOMING_VARARGS
7271#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7272
7273#undef TARGET_MACHINE_DEPENDENT_REORG
7274#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7275
7276#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7277#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7278
7279#undef TARGET_INIT_LIBFUNCS
7280#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7281
7282#undef TARGET_RETURN_IN_MEMORY
7283#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7284
7285#undef TARGET_ENCODE_SECTION_INFO
7286#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7287
7288#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7289#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7290
7291#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7292#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7293
7294#undef TARGET_VECTORIZE_INIT_COST
7295#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7296
7297#undef TARGET_VECTORIZE_ADD_STMT_COST
7298#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7299
7300#undef TARGET_VECTORIZE_FINISH_COST
7301#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7302
7303#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7304#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7305
7306#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7307#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7308
7309#undef TARGET_LIBGCC_CMP_RETURN_MODE
7310#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7311
7312#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7313#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7314
7315#undef TARGET_SCHED_SMS_RES_MII
7316#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7317
7318#undef TARGET_SECTION_TYPE_FLAGS
7319#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7320
7321#undef TARGET_ASM_SELECT_SECTION
7322#define TARGET_ASM_SELECT_SECTION spu_select_section
7323
7324#undef TARGET_ASM_UNIQUE_SECTION
7325#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7326
7327#undef TARGET_LEGITIMATE_ADDRESS_P
7328#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7329
7330#undef TARGET_LEGITIMATE_CONSTANT_P
7331#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7332
7333#undef TARGET_TRAMPOLINE_INIT
7334#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7335
08c6cbd2 7336#undef TARGET_WARN_FUNC_RETURN
7337#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7338
3defb88e 7339#undef TARGET_OPTION_OVERRIDE
7340#define TARGET_OPTION_OVERRIDE spu_option_override
7341
7342#undef TARGET_CONDITIONAL_REGISTER_USAGE
7343#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7344
7345#undef TARGET_REF_MAY_ALIAS_ERRNO
7346#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7347
7348#undef TARGET_ASM_OUTPUT_MI_THUNK
7349#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7350#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7351#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7352
7353/* Variable tracking should be run after all optimizations which
7354 change order of insns. It also needs a valid CFG. */
7355#undef TARGET_DELAY_VARTRACK
7356#define TARGET_DELAY_VARTRACK true
7357
d5065e6e 7358#undef TARGET_CANONICALIZE_COMPARISON
7359#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7360
5f35dd0e 7361#undef TARGET_CAN_USE_DOLOOP_P
7362#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7363
3defb88e 7364struct gcc_target targetm = TARGET_INITIALIZER;
7365
c2233b46 7366#include "gt-spu.h"