]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
gen-mul-tables.cc: Adjust include files.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
5624e564 1/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
85d9c13c
TS
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
2f83c7d6 5 Software Foundation; either version 3 of the License, or (at your option)
85d9c13c
TS
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
2f83c7d6
NC
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
85d9c13c
TS
16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
c7131fb2 20#include "backend.h"
e11c4407
AM
21#include "target.h"
22#include "rtl.h"
c7131fb2
AM
23#include "tree.h"
24#include "gimple.h"
e11c4407
AM
25#include "cfghooks.h"
26#include "cfgloop.h"
c7131fb2 27#include "df.h"
e11c4407
AM
28#include "tm_p.h"
29#include "stringpool.h"
30#include "expmed.h"
31#include "optabs.h"
85d9c13c 32#include "regs.h"
e11c4407 33#include "emit-rtl.h"
85d9c13c 34#include "recog.h"
e11c4407
AM
35#include "diagnostic-core.h"
36#include "insn-attr.h"
40e23961 37#include "alias.h"
40e23961 38#include "fold-const.h"
d8a2d370
DN
39#include "stor-layout.h"
40#include "calls.h"
41#include "varasm.h"
36566b39 42#include "explow.h"
85d9c13c 43#include "expr.h"
85d9c13c 44#include "output.h"
60393bbc 45#include "cfgrtl.h"
60393bbc 46#include "cfgbuild.h"
85d9c13c
TS
47#include "langhooks.h"
48#include "reload.h"
85d9c13c
TS
49#include "sched-int.h"
50#include "params.h"
45b0be94 51#include "gimplify.h"
85d9c13c 52#include "tm-constrs.h"
60393bbc 53#include "ddg.h"
7ee2468b 54#include "dumpfile.h"
9b2b7279 55#include "builtins.h"
3dfc96ea 56#include "rtl-iter.h"
b66b813d 57
994c5d85 58/* This file should be included last. */
d58627a0
RS
59#include "target-def.h"
60
b66b813d 61/* Builtin types, data and prototypes. */
4a3a2376
UW
62
63enum spu_builtin_type_index
64{
65 SPU_BTI_END_OF_PARAMS,
66
67 /* We create new type nodes for these. */
68 SPU_BTI_V16QI,
69 SPU_BTI_V8HI,
70 SPU_BTI_V4SI,
71 SPU_BTI_V2DI,
72 SPU_BTI_V4SF,
73 SPU_BTI_V2DF,
74 SPU_BTI_UV16QI,
75 SPU_BTI_UV8HI,
76 SPU_BTI_UV4SI,
77 SPU_BTI_UV2DI,
78
79 /* A 16-byte type. (Implemented with V16QI_type_node) */
80 SPU_BTI_QUADWORD,
81
82 /* These all correspond to intSI_type_node */
83 SPU_BTI_7,
84 SPU_BTI_S7,
85 SPU_BTI_U7,
86 SPU_BTI_S10,
87 SPU_BTI_S10_4,
88 SPU_BTI_U14,
89 SPU_BTI_16,
90 SPU_BTI_S16,
91 SPU_BTI_S16_2,
92 SPU_BTI_U16,
93 SPU_BTI_U16_2,
94 SPU_BTI_U18,
95
96 /* These correspond to the standard types */
97 SPU_BTI_INTQI,
98 SPU_BTI_INTHI,
99 SPU_BTI_INTSI,
100 SPU_BTI_INTDI,
101
102 SPU_BTI_UINTQI,
103 SPU_BTI_UINTHI,
104 SPU_BTI_UINTSI,
105 SPU_BTI_UINTDI,
106
107 SPU_BTI_FLOAT,
108 SPU_BTI_DOUBLE,
109
110 SPU_BTI_VOID,
111 SPU_BTI_PTR,
112
113 SPU_BTI_MAX
114};
115
116#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
117#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
118#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
119#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
120#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
121#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
122#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
123#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
124#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
125#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
126
127static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
128
b66b813d
AP
129struct spu_builtin_range
130{
131 int low, high;
132};
133
134static struct spu_builtin_range spu_builtin_range[] = {
135 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
136 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
137 {0ll, 0x7fll}, /* SPU_BTI_U7 */
138 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
139 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
140 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
141 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
142 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
143 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
144 {0ll, 0xffffll}, /* SPU_BTI_U16 */
145 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
146 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
147};
148
85d9c13c
TS
149\f
150/* Target specific attribute specifications. */
151char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
152
153/* Prototypes and external defs. */
23c39aaa 154static int get_pipe (rtx_insn *insn);
85d9c13c 155static int spu_naked_function_p (tree func);
85d9c13c 156static int mem_is_padded_component_ref (rtx x);
32fb22af 157static void fix_range (const char *);
eec9405e 158static rtx spu_expand_load (rtx, rtx, rtx, int);
85d9c13c 159
39aeae85
SL
160/* Which instruction set architecture to use. */
161int spu_arch;
162/* Which cpu are we tuning for. */
163int spu_tune;
164
9dcc2e87
TS
165/* The hardware requires 8 insns between a hint and the branch it
166 effects. This variable describes how many rtl instructions the
167 compiler needs to see before inserting a hint, and then the compiler
168 will insert enough nops to make it at least 8 insns. The default is
169 for the compiler to allow up to 2 nops be emitted. The nops are
170 inserted in pairs, so we round down. */
171int spu_hint_dist = (8*4) - (2*4);
172
85d9c13c
TS
173enum spu_immediate {
174 SPU_NONE,
175 SPU_IL,
176 SPU_ILA,
177 SPU_ILH,
178 SPU_ILHU,
179 SPU_ORI,
180 SPU_ORHI,
181 SPU_ORBI,
01975fc7 182 SPU_IOHL
85d9c13c 183};
a1c6e4b8
TS
184enum immediate_class
185{
186 IC_POOL, /* constant pool */
187 IC_IL1, /* one il* instruction */
188 IC_IL2, /* both ilhu and iohl instructions */
189 IC_IL1s, /* one il* instruction */
190 IC_IL2s, /* both ilhu and iohl instructions */
191 IC_FSMBI, /* the fsmbi instruction */
192 IC_CPAT, /* one of the c*d instructions */
73701e27 193 IC_FSMBI2 /* fsmbi plus 1 other instruction */
a1c6e4b8 194};
85d9c13c
TS
195
196static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
197static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
a1c6e4b8
TS
198static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
199static enum immediate_class classify_immediate (rtx op,
ef4bddc2 200 machine_mode mode);
85d9c13c 201
299456f3
BE
202/* Pointer mode for __ea references. */
203#define EAmode (spu_ea_model != 32 ? DImode : SImode)
204
5a82ecd9 205\f
c5d94218
UW
206/* Define the structure for the machine field in struct function. */
207struct GTY(()) machine_function
208{
209 /* Register to use for PIC accesses. */
210 rtx pic_reg;
211};
212
213/* How to allocate a 'struct machine_function'. */
214static struct machine_function *
215spu_init_machine_status (void)
216{
766090c2 217 return ggc_cleared_alloc<machine_function> ();
c5d94218
UW
218}
219
c5387660
JM
220/* Implement TARGET_OPTION_OVERRIDE. */
221static void
222spu_option_override (void)
85d9c13c 223{
c5d94218
UW
224 /* Set up function hooks. */
225 init_machine_status = spu_init_machine_status;
226
039cb258
UW
227 /* Small loops will be unpeeled at -O3. For SPU it is more important
228 to keep code small by default. */
128dc8e2 229 if (!flag_unroll_loops && !flag_peel_loops)
dc242c4a 230 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
48476d13
JM
231 global_options.x_param_values,
232 global_options_set.x_param_values);
039cb258 233
85d9c13c
TS
234 flag_omit_frame_pointer = 1;
235
9dcc2e87 236 /* Functions must be 8 byte aligned so we correctly handle dual issue */
85d9c13c
TS
237 if (align_functions < 8)
238 align_functions = 8;
32fb22af 239
9dcc2e87
TS
240 spu_hint_dist = 8*4 - spu_max_nops*4;
241 if (spu_hint_dist < 0)
242 spu_hint_dist = 0;
243
32fb22af
SL
244 if (spu_fixed_range_string)
245 fix_range (spu_fixed_range_string);
39aeae85
SL
246
247 /* Determine processor architectural level. */
248 if (spu_arch_string)
249 {
250 if (strcmp (&spu_arch_string[0], "cell") == 0)
251 spu_arch = PROCESSOR_CELL;
252 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
253 spu_arch = PROCESSOR_CELLEDP;
254 else
9c1732c4 255 error ("bad value (%s) for -march= switch", spu_arch_string);
39aeae85
SL
256 }
257
258 /* Determine processor to tune for. */
259 if (spu_tune_string)
260 {
261 if (strcmp (&spu_tune_string[0], "cell") == 0)
262 spu_tune = PROCESSOR_CELL;
263 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
264 spu_tune = PROCESSOR_CELLEDP;
265 else
9c1732c4 266 error ("bad value (%s) for -mtune= switch", spu_tune_string);
39aeae85 267 }
88f091f5 268
59dbe4fe
UW
269 /* Change defaults according to the processor architecture. */
270 if (spu_arch == PROCESSOR_CELLEDP)
271 {
272 /* If no command line option has been otherwise specified, change
273 the default to -mno-safe-hints on celledp -- only the original
274 Cell/B.E. processors require this workaround. */
275 if (!(target_flags_explicit & MASK_SAFE_HINTS))
276 target_flags &= ~MASK_SAFE_HINTS;
277 }
278
88f091f5 279 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
85d9c13c
TS
280}
281\f
282/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
283 struct attribute_spec.handler. */
284
85d9c13c
TS
285/* True if MODE is valid for the target. By "valid", we mean able to
286 be manipulated in non-trivial ways. In particular, this means all
287 the arithmetic is supported. */
288static bool
ef4bddc2 289spu_scalar_mode_supported_p (machine_mode mode)
85d9c13c
TS
290{
291 switch (mode)
292 {
293 case QImode:
294 case HImode:
295 case SImode:
296 case SFmode:
297 case DImode:
298 case TImode:
299 case DFmode:
300 return true;
301
302 default:
303 return false;
304 }
305}
306
307/* Similarly for vector modes. "Supported" here is less strict. At
308 least some operations are supported; need to check optabs or builtins
309 for further details. */
310static bool
ef4bddc2 311spu_vector_mode_supported_p (machine_mode mode)
85d9c13c
TS
312{
313 switch (mode)
314 {
315 case V16QImode:
316 case V8HImode:
317 case V4SImode:
318 case V2DImode:
319 case V4SFmode:
320 case V2DFmode:
321 return true;
322
323 default:
324 return false;
325 }
326}
327
328/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
329 least significant bytes of the outer mode. This function returns
330 TRUE for the SUBREG's where this is correct. */
331int
332valid_subreg (rtx op)
333{
ef4bddc2
RS
334 machine_mode om = GET_MODE (op);
335 machine_mode im = GET_MODE (SUBREG_REG (op));
85d9c13c
TS
336 return om != VOIDmode && im != VOIDmode
337 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
9e071d06
UW
338 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
339 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
85d9c13c
TS
340}
341
342/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
2f8e468b 343 and adjust the start offset. */
85d9c13c
TS
344static rtx
345adjust_operand (rtx op, HOST_WIDE_INT * start)
346{
ef4bddc2 347 machine_mode mode;
85d9c13c 348 int op_size;
9e071d06
UW
349 /* Strip any paradoxical SUBREG. */
350 if (GET_CODE (op) == SUBREG
351 && (GET_MODE_BITSIZE (GET_MODE (op))
352 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
85d9c13c
TS
353 {
354 if (start)
355 *start -=
356 GET_MODE_BITSIZE (GET_MODE (op)) -
357 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
358 op = SUBREG_REG (op);
359 }
360 /* If it is smaller than SI, assure a SUBREG */
361 op_size = GET_MODE_BITSIZE (GET_MODE (op));
362 if (op_size < 32)
363 {
364 if (start)
365 *start += 32 - op_size;
366 op_size = 32;
367 }
368 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
369 mode = mode_for_size (op_size, MODE_INT, 0);
370 if (mode != GET_MODE (op))
371 op = gen_rtx_SUBREG (mode, op, 0);
372 return op;
373}
374
375void
376spu_expand_extv (rtx ops[], int unsignedp)
377{
eec9405e 378 rtx dst = ops[0], src = ops[1];
85d9c13c
TS
379 HOST_WIDE_INT width = INTVAL (ops[2]);
380 HOST_WIDE_INT start = INTVAL (ops[3]);
eec9405e
TS
381 HOST_WIDE_INT align_mask;
382 rtx s0, s1, mask, r0;
85d9c13c 383
eec9405e 384 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
85d9c13c 385
eec9405e 386 if (MEM_P (src))
85d9c13c 387 {
eec9405e
TS
388 /* First, determine if we need 1 TImode load or 2. We need only 1
389 if the bits being extracted do not cross the alignment boundary
390 as determined by the MEM and its address. */
391
392 align_mask = -MEM_ALIGN (src);
393 if ((start & align_mask) == ((start + width - 1) & align_mask))
85d9c13c 394 {
eec9405e
TS
395 /* Alignment is sufficient for 1 load. */
396 s0 = gen_reg_rtx (TImode);
397 r0 = spu_expand_load (s0, 0, src, start / 8);
398 start &= 7;
399 if (r0)
400 emit_insn (gen_rotqby_ti (s0, s0, r0));
85d9c13c 401 }
eec9405e
TS
402 else
403 {
404 /* Need 2 loads. */
405 s0 = gen_reg_rtx (TImode);
406 s1 = gen_reg_rtx (TImode);
407 r0 = spu_expand_load (s0, s1, src, start / 8);
408 start &= 7;
409
410 gcc_assert (start + width <= 128);
411 if (r0)
412 {
413 rtx r1 = gen_reg_rtx (SImode);
414 mask = gen_reg_rtx (TImode);
415 emit_move_insn (mask, GEN_INT (-1));
416 emit_insn (gen_rotqby_ti (s0, s0, r0));
417 emit_insn (gen_rotqby_ti (s1, s1, r0));
418 if (GET_CODE (r0) == CONST_INT)
419 r1 = GEN_INT (INTVAL (r0) & 15);
420 else
421 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
422 emit_insn (gen_shlqby_ti (mask, mask, r1));
423 emit_insn (gen_selb (s0, s1, s0, mask));
424 }
425 }
426
427 }
428 else if (GET_CODE (src) == SUBREG)
429 {
430 rtx r = SUBREG_REG (src);
431 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
432 s0 = gen_reg_rtx (TImode);
433 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
f7df4a84 434 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
eec9405e
TS
435 else
436 emit_move_insn (s0, src);
437 }
438 else
439 {
440 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
441 s0 = gen_reg_rtx (TImode);
442 emit_move_insn (s0, src);
85d9c13c
TS
443 }
444
eec9405e
TS
445 /* Now s0 is TImode and contains the bits to extract at start. */
446
447 if (start)
448 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
449
450 if (128 - width)
eb6c3df1 451 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
85d9c13c 452
eec9405e 453 emit_move_insn (dst, s0);
85d9c13c
TS
454}
455
456void
457spu_expand_insv (rtx ops[])
458{
459 HOST_WIDE_INT width = INTVAL (ops[1]);
460 HOST_WIDE_INT start = INTVAL (ops[2]);
a4944851 461 unsigned HOST_WIDE_INT maskbits;
ef4bddc2 462 machine_mode dst_mode;
85d9c13c 463 rtx dst = ops[0], src = ops[3];
d707fc77 464 int dst_size;
85d9c13c
TS
465 rtx mask;
466 rtx shift_reg;
467 int shift;
468
469
470 if (GET_CODE (ops[0]) == MEM)
471 dst = gen_reg_rtx (TImode);
472 else
473 dst = adjust_operand (dst, &start);
474 dst_mode = GET_MODE (dst);
475 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
476
477 if (CONSTANT_P (src))
478 {
ef4bddc2 479 machine_mode m =
85d9c13c
TS
480 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
481 src = force_reg (m, convert_to_mode (m, src, 0));
482 }
483 src = adjust_operand (src, 0);
85d9c13c
TS
484
485 mask = gen_reg_rtx (dst_mode);
486 shift_reg = gen_reg_rtx (dst_mode);
487 shift = dst_size - start - width;
488
489 /* It's not safe to use subreg here because the compiler assumes
490 that the SUBREG_REG is right justified in the SUBREG. */
491 convert_move (shift_reg, src, 1);
492
493 if (shift > 0)
494 {
495 switch (dst_mode)
496 {
497 case SImode:
498 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
499 break;
500 case DImode:
501 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
502 break;
503 case TImode:
504 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
505 break;
506 default:
507 abort ();
508 }
509 }
510 else if (shift < 0)
511 abort ();
512
513 switch (dst_size)
514 {
515 case 32:
a4944851 516 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
85d9c13c 517 if (start)
a4944851 518 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
85d9c13c
TS
519 emit_move_insn (mask, GEN_INT (maskbits));
520 break;
521 case 64:
a4944851 522 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
85d9c13c 523 if (start)
a4944851 524 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
85d9c13c
TS
525 emit_move_insn (mask, GEN_INT (maskbits));
526 break;
527 case 128:
528 {
529 unsigned char arr[16];
530 int i = start / 8;
531 memset (arr, 0, sizeof (arr));
532 arr[i] = 0xff >> (start & 7);
533 for (i++; i <= (start + width - 1) / 8; i++)
534 arr[i] = 0xff;
535 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
536 emit_move_insn (mask, array_to_constant (TImode, arr));
537 }
538 break;
539 default:
540 abort ();
541 }
542 if (GET_CODE (ops[0]) == MEM)
543 {
85d9c13c 544 rtx low = gen_reg_rtx (SImode);
85d9c13c
TS
545 rtx rotl = gen_reg_rtx (SImode);
546 rtx mask0 = gen_reg_rtx (TImode);
eec9405e
TS
547 rtx addr;
548 rtx addr0;
549 rtx addr1;
85d9c13c
TS
550 rtx mem;
551
eec9405e
TS
552 addr = force_reg (Pmode, XEXP (ops[0], 0));
553 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
85d9c13c
TS
554 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
555 emit_insn (gen_negsi2 (rotl, low));
556 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
557 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
eec9405e 558 mem = change_address (ops[0], TImode, addr0);
85d9c13c
TS
559 set_mem_alias_set (mem, 0);
560 emit_move_insn (dst, mem);
561 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
85d9c13c
TS
562 if (start + width > MEM_ALIGN (ops[0]))
563 {
564 rtx shl = gen_reg_rtx (SImode);
565 rtx mask1 = gen_reg_rtx (TImode);
566 rtx dst1 = gen_reg_rtx (TImode);
567 rtx mem1;
0a81f074 568 addr1 = plus_constant (Pmode, addr, 16);
eec9405e 569 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
85d9c13c
TS
570 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
571 emit_insn (gen_shlqby_ti (mask1, mask, shl));
eec9405e 572 mem1 = change_address (ops[0], TImode, addr1);
85d9c13c
TS
573 set_mem_alias_set (mem1, 0);
574 emit_move_insn (dst1, mem1);
575 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
576 emit_move_insn (mem1, dst1);
577 }
eec9405e 578 emit_move_insn (mem, dst);
85d9c13c
TS
579 }
580 else
9c1f1e55 581 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
85d9c13c
TS
582}
583
584
585int
586spu_expand_block_move (rtx ops[])
587{
588 HOST_WIDE_INT bytes, align, offset;
589 rtx src, dst, sreg, dreg, target;
590 int i;
591 if (GET_CODE (ops[2]) != CONST_INT
592 || GET_CODE (ops[3]) != CONST_INT
f69bbb46 593 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
85d9c13c
TS
594 return 0;
595
596 bytes = INTVAL (ops[2]);
597 align = INTVAL (ops[3]);
598
599 if (bytes <= 0)
600 return 1;
601
602 dst = ops[0];
603 src = ops[1];
604
605 if (align == 16)
606 {
607 for (offset = 0; offset + 16 <= bytes; offset += 16)
608 {
609 dst = adjust_address (ops[0], V16QImode, offset);
610 src = adjust_address (ops[1], V16QImode, offset);
611 emit_move_insn (dst, src);
612 }
613 if (offset < bytes)
614 {
615 rtx mask;
616 unsigned char arr[16] = { 0 };
617 for (i = 0; i < bytes - offset; i++)
618 arr[i] = 0xff;
619 dst = adjust_address (ops[0], V16QImode, offset);
620 src = adjust_address (ops[1], V16QImode, offset);
621 mask = gen_reg_rtx (V16QImode);
622 sreg = gen_reg_rtx (V16QImode);
623 dreg = gen_reg_rtx (V16QImode);
624 target = gen_reg_rtx (V16QImode);
625 emit_move_insn (mask, array_to_constant (V16QImode, arr));
626 emit_move_insn (dreg, dst);
627 emit_move_insn (sreg, src);
628 emit_insn (gen_selb (target, dreg, sreg, mask));
629 emit_move_insn (dst, target);
630 }
631 return 1;
632 }
633 return 0;
634}
635
636enum spu_comp_code
637{ SPU_EQ, SPU_GT, SPU_GTU };
638
39aeae85
SL
639int spu_comp_icode[12][3] = {
640 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
641 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
642 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
643 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
644 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
645 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
646 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
647 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
648 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
649 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
650 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
651 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
85d9c13c
TS
652};
653
654/* Generate a compare for CODE. Return a brand-new rtx that represents
655 the result of the compare. GCC can figure this out too if we don't
656 provide all variations of compares, but GCC always wants to use
657 WORD_MODE, we can generate better code in most cases if we do it
658 ourselves. */
659void
f90b7a5a 660spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
85d9c13c
TS
661{
662 int reverse_compare = 0;
663 int reverse_test = 0;
9943eb0b
BE
664 rtx compare_result, eq_result;
665 rtx comp_rtx, eq_rtx;
ef4bddc2
RS
666 machine_mode comp_mode;
667 machine_mode op_mode;
bbbbb16a
ILT
668 enum spu_comp_code scode, eq_code;
669 enum insn_code ior_code;
f90b7a5a
PB
670 enum rtx_code code = GET_CODE (cmp);
671 rtx op0 = XEXP (cmp, 0);
672 rtx op1 = XEXP (cmp, 1);
85d9c13c 673 int index;
9943eb0b 674 int eq_test = 0;
85d9c13c 675
f90b7a5a 676 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
85d9c13c 677 and so on, to keep the constant in operand 1. */
f90b7a5a 678 if (GET_CODE (op1) == CONST_INT)
85d9c13c 679 {
f90b7a5a
PB
680 HOST_WIDE_INT val = INTVAL (op1) - 1;
681 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
85d9c13c
TS
682 switch (code)
683 {
684 case GE:
f90b7a5a 685 op1 = GEN_INT (val);
85d9c13c
TS
686 code = GT;
687 break;
688 case LT:
f90b7a5a 689 op1 = GEN_INT (val);
85d9c13c
TS
690 code = LE;
691 break;
692 case GEU:
f90b7a5a 693 op1 = GEN_INT (val);
85d9c13c
TS
694 code = GTU;
695 break;
696 case LTU:
f90b7a5a 697 op1 = GEN_INT (val);
85d9c13c
TS
698 code = LEU;
699 break;
700 default:
701 break;
702 }
703 }
704
0b01f619
UW
705 /* However, if we generate an integer result, performing a reverse test
706 would require an extra negation, so avoid that where possible. */
707 if (GET_CODE (op1) == CONST_INT && is_set == 1)
708 {
709 HOST_WIDE_INT val = INTVAL (op1) + 1;
710 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
711 switch (code)
712 {
713 case LE:
714 op1 = GEN_INT (val);
715 code = LT;
716 break;
717 case LEU:
718 op1 = GEN_INT (val);
719 code = LTU;
720 break;
721 default:
722 break;
723 }
724 }
725
9943eb0b 726 comp_mode = SImode;
f90b7a5a 727 op_mode = GET_MODE (op0);
9943eb0b 728
85d9c13c
TS
729 switch (code)
730 {
731 case GE:
85d9c13c 732 scode = SPU_GT;
dd4ba939 733 if (HONOR_NANS (op_mode))
9943eb0b
BE
734 {
735 reverse_compare = 0;
736 reverse_test = 0;
737 eq_test = 1;
738 eq_code = SPU_EQ;
739 }
740 else
741 {
742 reverse_compare = 1;
743 reverse_test = 1;
744 }
85d9c13c
TS
745 break;
746 case LE:
85d9c13c 747 scode = SPU_GT;
dd4ba939 748 if (HONOR_NANS (op_mode))
9943eb0b
BE
749 {
750 reverse_compare = 1;
751 reverse_test = 0;
752 eq_test = 1;
753 eq_code = SPU_EQ;
754 }
755 else
756 {
757 reverse_compare = 0;
758 reverse_test = 1;
759 }
85d9c13c
TS
760 break;
761 case LT:
762 reverse_compare = 1;
763 reverse_test = 0;
764 scode = SPU_GT;
765 break;
766 case GEU:
767 reverse_compare = 1;
768 reverse_test = 1;
769 scode = SPU_GTU;
770 break;
771 case LEU:
772 reverse_compare = 0;
773 reverse_test = 1;
774 scode = SPU_GTU;
775 break;
776 case LTU:
777 reverse_compare = 1;
778 reverse_test = 0;
779 scode = SPU_GTU;
780 break;
781 case NE:
782 reverse_compare = 0;
783 reverse_test = 1;
784 scode = SPU_EQ;
785 break;
786
787 case EQ:
788 scode = SPU_EQ;
789 break;
790 case GT:
791 scode = SPU_GT;
792 break;
793 case GTU:
794 scode = SPU_GTU;
795 break;
796 default:
797 scode = SPU_EQ;
798 break;
799 }
800
85d9c13c
TS
801 switch (op_mode)
802 {
803 case QImode:
804 index = 0;
805 comp_mode = QImode;
806 break;
807 case HImode:
808 index = 1;
809 comp_mode = HImode;
810 break;
811 case SImode:
812 index = 2;
813 break;
814 case DImode:
815 index = 3;
816 break;
817 case TImode:
818 index = 4;
819 break;
820 case SFmode:
821 index = 5;
822 break;
823 case DFmode:
824 index = 6;
825 break;
826 case V16QImode:
39aeae85
SL
827 index = 7;
828 comp_mode = op_mode;
829 break;
85d9c13c 830 case V8HImode:
39aeae85
SL
831 index = 8;
832 comp_mode = op_mode;
833 break;
85d9c13c 834 case V4SImode:
39aeae85
SL
835 index = 9;
836 comp_mode = op_mode;
837 break;
85d9c13c 838 case V4SFmode:
39aeae85
SL
839 index = 10;
840 comp_mode = V4SImode;
841 break;
85d9c13c 842 case V2DFmode:
39aeae85
SL
843 index = 11;
844 comp_mode = V2DImode;
85d9c13c 845 break;
39aeae85 846 case V2DImode:
85d9c13c
TS
847 default:
848 abort ();
849 }
850
f90b7a5a 851 if (GET_MODE (op1) == DFmode
dd4ba939
BE
852 && (scode != SPU_GT && scode != SPU_EQ))
853 abort ();
85d9c13c 854
f90b7a5a
PB
855 if (is_set == 0 && op1 == const0_rtx
856 && (GET_MODE (op0) == SImode
0b01f619
UW
857 || GET_MODE (op0) == HImode
858 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
85d9c13c
TS
859 {
860 /* Don't need to set a register with the result when we are
861 comparing against zero and branching. */
862 reverse_test = !reverse_test;
f90b7a5a 863 compare_result = op0;
85d9c13c
TS
864 }
865 else
866 {
867 compare_result = gen_reg_rtx (comp_mode);
868
869 if (reverse_compare)
870 {
f90b7a5a
PB
871 rtx t = op1;
872 op1 = op0;
873 op0 = t;
85d9c13c
TS
874 }
875
876 if (spu_comp_icode[index][scode] == 0)
877 abort ();
878
879 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
f90b7a5a
PB
880 (op0, op_mode))
881 op0 = force_reg (op_mode, op0);
85d9c13c 882 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
f90b7a5a
PB
883 (op1, op_mode))
884 op1 = force_reg (op_mode, op1);
85d9c13c 885 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
f90b7a5a 886 op0, op1);
85d9c13c
TS
887 if (comp_rtx == 0)
888 abort ();
889 emit_insn (comp_rtx);
890
9943eb0b
BE
891 if (eq_test)
892 {
893 eq_result = gen_reg_rtx (comp_mode);
894 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
f90b7a5a 895 op0, op1);
9943eb0b
BE
896 if (eq_rtx == 0)
897 abort ();
898 emit_insn (eq_rtx);
947131ba 899 ior_code = optab_handler (ior_optab, comp_mode);
9943eb0b
BE
900 gcc_assert (ior_code != CODE_FOR_nothing);
901 emit_insn (GEN_FCN (ior_code)
902 (compare_result, compare_result, eq_result));
903 }
85d9c13c
TS
904 }
905
906 if (is_set == 0)
907 {
908 rtx bcomp;
909 rtx loc_ref;
910
911 /* We don't have branch on QI compare insns, so we convert the
912 QI compare result to a HI result. */
913 if (comp_mode == QImode)
914 {
915 rtx old_res = compare_result;
916 compare_result = gen_reg_rtx (HImode);
917 comp_mode = HImode;
918 emit_insn (gen_extendqihi2 (compare_result, old_res));
919 }
920
921 if (reverse_test)
922 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
923 else
924 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
925
f90b7a5a 926 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
f7df4a84 927 emit_jump_insn (gen_rtx_SET (pc_rtx,
85d9c13c
TS
928 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
929 loc_ref, pc_rtx)));
930 }
931 else if (is_set == 2)
932 {
f90b7a5a 933 rtx target = operands[0];
85d9c13c
TS
934 int compare_size = GET_MODE_BITSIZE (comp_mode);
935 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
ef4bddc2 936 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
85d9c13c
TS
937 rtx select_mask;
938 rtx op_t = operands[2];
939 rtx op_f = operands[3];
940
941 /* The result of the comparison can be SI, HI or QI mode. Create a
942 mask based on that result. */
943 if (target_size > compare_size)
944 {
945 select_mask = gen_reg_rtx (mode);
946 emit_insn (gen_extend_compare (select_mask, compare_result));
947 }
948 else if (target_size < compare_size)
949 select_mask =
950 gen_rtx_SUBREG (mode, compare_result,
951 (compare_size - target_size) / BITS_PER_UNIT);
952 else if (comp_mode != mode)
953 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
954 else
955 select_mask = compare_result;
956
957 if (GET_MODE (target) != GET_MODE (op_t)
958 || GET_MODE (target) != GET_MODE (op_f))
959 abort ();
960
961 if (reverse_test)
962 emit_insn (gen_selb (target, op_t, op_f, select_mask));
963 else
964 emit_insn (gen_selb (target, op_f, op_t, select_mask));
965 }
966 else
967 {
f90b7a5a 968 rtx target = operands[0];
85d9c13c 969 if (reverse_test)
f7df4a84 970 emit_insn (gen_rtx_SET (compare_result,
85d9c13c
TS
971 gen_rtx_NOT (comp_mode, compare_result)));
972 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
973 emit_insn (gen_extendhisi2 (target, compare_result));
974 else if (GET_MODE (target) == SImode
975 && GET_MODE (compare_result) == QImode)
976 emit_insn (gen_extend_compare (target, compare_result));
977 else
978 emit_move_insn (target, compare_result);
979 }
980}
981
982HOST_WIDE_INT
983const_double_to_hwint (rtx x)
984{
985 HOST_WIDE_INT val;
85d9c13c 986 if (GET_MODE (x) == SFmode)
34a72c33 987 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
85d9c13c
TS
988 else if (GET_MODE (x) == DFmode)
989 {
990 long l[2];
34a72c33 991 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
85d9c13c
TS
992 val = l[0];
993 val = (val << 32) | (l[1] & 0xffffffff);
994 }
995 else
996 abort ();
997 return val;
998}
999
1000rtx
ef4bddc2 1001hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
85d9c13c
TS
1002{
1003 long tv[2];
1004 REAL_VALUE_TYPE rv;
1005 gcc_assert (mode == SFmode || mode == DFmode);
1006
1007 if (mode == SFmode)
1008 tv[0] = (v << 32) >> 32;
1009 else if (mode == DFmode)
1010 {
1011 tv[1] = (v << 32) >> 32;
1012 tv[0] = v >> 32;
1013 }
1014 real_from_target (&rv, tv, mode);
555affd7 1015 return const_double_from_real_value (rv, mode);
85d9c13c
TS
1016}
1017
1018void
1019print_operand_address (FILE * file, register rtx addr)
1020{
1021 rtx reg;
1022 rtx offset;
1023
09aad82b
TS
1024 if (GET_CODE (addr) == AND
1025 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1026 && INTVAL (XEXP (addr, 1)) == -16)
1027 addr = XEXP (addr, 0);
1028
85d9c13c
TS
1029 switch (GET_CODE (addr))
1030 {
1031 case REG:
1032 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1033 break;
1034
1035 case PLUS:
1036 reg = XEXP (addr, 0);
1037 offset = XEXP (addr, 1);
1038 if (GET_CODE (offset) == REG)
1039 {
1040 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1041 reg_names[REGNO (offset)]);
1042 }
1043 else if (GET_CODE (offset) == CONST_INT)
1044 {
1045 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1046 INTVAL (offset), reg_names[REGNO (reg)]);
1047 }
1048 else
1049 abort ();
1050 break;
1051
1052 case CONST:
1053 case LABEL_REF:
1054 case SYMBOL_REF:
1055 case CONST_INT:
1056 output_addr_const (file, addr);
1057 break;
1058
1059 default:
1060 debug_rtx (addr);
1061 abort ();
1062 }
1063}
1064
1065void
1066print_operand (FILE * file, rtx x, int code)
1067{
ef4bddc2 1068 machine_mode mode = GET_MODE (x);
85d9c13c
TS
1069 HOST_WIDE_INT val;
1070 unsigned char arr[16];
1071 int xcode = GET_CODE (x);
a1c6e4b8 1072 int i, info;
85d9c13c
TS
1073 if (GET_MODE (x) == VOIDmode)
1074 switch (code)
1075 {
85d9c13c
TS
1076 case 'L': /* 128 bits, signed */
1077 case 'm': /* 128 bits, signed */
1078 case 'T': /* 128 bits, signed */
1079 case 't': /* 128 bits, signed */
1080 mode = TImode;
1081 break;
85d9c13c
TS
1082 case 'K': /* 64 bits, signed */
1083 case 'k': /* 64 bits, signed */
1084 case 'D': /* 64 bits, signed */
1085 case 'd': /* 64 bits, signed */
1086 mode = DImode;
1087 break;
85d9c13c
TS
1088 case 'J': /* 32 bits, signed */
1089 case 'j': /* 32 bits, signed */
1090 case 's': /* 32 bits, signed */
1091 case 'S': /* 32 bits, signed */
1092 mode = SImode;
1093 break;
1094 }
1095 switch (code)
1096 {
1097
1098 case 'j': /* 32 bits, signed */
1099 case 'k': /* 64 bits, signed */
1100 case 'm': /* 128 bits, signed */
1101 if (xcode == CONST_INT
1102 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1103 {
1104 gcc_assert (logical_immediate_p (x, mode));
1105 constant_to_array (mode, x, arr);
1106 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1107 val = trunc_int_for_mode (val, SImode);
1108 switch (which_logical_immediate (val))
1109 {
1110 case SPU_ORI:
1111 break;
1112 case SPU_ORHI:
1113 fprintf (file, "h");
1114 break;
1115 case SPU_ORBI:
1116 fprintf (file, "b");
1117 break;
1118 default:
1119 gcc_unreachable();
1120 }
1121 }
1122 else
1123 gcc_unreachable();
1124 return;
1125
1126 case 'J': /* 32 bits, signed */
1127 case 'K': /* 64 bits, signed */
1128 case 'L': /* 128 bits, signed */
1129 if (xcode == CONST_INT
1130 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1131 {
1132 gcc_assert (logical_immediate_p (x, mode)
1133 || iohl_immediate_p (x, mode));
1134 constant_to_array (mode, x, arr);
1135 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1136 val = trunc_int_for_mode (val, SImode);
1137 switch (which_logical_immediate (val))
1138 {
1139 case SPU_ORI:
1140 case SPU_IOHL:
1141 break;
1142 case SPU_ORHI:
1143 val = trunc_int_for_mode (val, HImode);
1144 break;
1145 case SPU_ORBI:
1146 val = trunc_int_for_mode (val, QImode);
1147 break;
1148 default:
1149 gcc_unreachable();
1150 }
1151 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1152 }
1153 else
1154 gcc_unreachable();
1155 return;
1156
1157 case 't': /* 128 bits, signed */
1158 case 'd': /* 64 bits, signed */
1159 case 's': /* 32 bits, signed */
a1c6e4b8 1160 if (CONSTANT_P (x))
85d9c13c 1161 {
a1c6e4b8
TS
1162 enum immediate_class c = classify_immediate (x, mode);
1163 switch (c)
1164 {
1165 case IC_IL1:
1166 constant_to_array (mode, x, arr);
1167 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1168 val = trunc_int_for_mode (val, SImode);
1169 switch (which_immediate_load (val))
1170 {
1171 case SPU_IL:
1172 break;
1173 case SPU_ILA:
1174 fprintf (file, "a");
1175 break;
1176 case SPU_ILH:
1177 fprintf (file, "h");
1178 break;
1179 case SPU_ILHU:
1180 fprintf (file, "hu");
1181 break;
1182 default:
1183 gcc_unreachable ();
1184 }
1185 break;
1186 case IC_CPAT:
1187 constant_to_array (mode, x, arr);
1188 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1189 if (info == 1)
1190 fprintf (file, "b");
1191 else if (info == 2)
1192 fprintf (file, "h");
1193 else if (info == 4)
1194 fprintf (file, "w");
1195 else if (info == 8)
1196 fprintf (file, "d");
1197 break;
1198 case IC_IL1s:
1199 if (xcode == CONST_VECTOR)
1200 {
1201 x = CONST_VECTOR_ELT (x, 0);
1202 xcode = GET_CODE (x);
1203 }
1204 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1205 fprintf (file, "a");
1206 else if (xcode == HIGH)
1207 fprintf (file, "hu");
1208 break;
1209 case IC_FSMBI:
73701e27 1210 case IC_FSMBI2:
a1c6e4b8
TS
1211 case IC_IL2:
1212 case IC_IL2s:
1213 case IC_POOL:
1214 abort ();
1215 }
85d9c13c 1216 }
85d9c13c
TS
1217 else
1218 gcc_unreachable ();
1219 return;
1220
1221 case 'T': /* 128 bits, signed */
1222 case 'D': /* 64 bits, signed */
1223 case 'S': /* 32 bits, signed */
a1c6e4b8 1224 if (CONSTANT_P (x))
85d9c13c 1225 {
a1c6e4b8
TS
1226 enum immediate_class c = classify_immediate (x, mode);
1227 switch (c)
85d9c13c 1228 {
a1c6e4b8
TS
1229 case IC_IL1:
1230 constant_to_array (mode, x, arr);
1231 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1232 val = trunc_int_for_mode (val, SImode);
1233 switch (which_immediate_load (val))
1234 {
1235 case SPU_IL:
1236 case SPU_ILA:
1237 break;
1238 case SPU_ILH:
1239 case SPU_ILHU:
1240 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1241 break;
1242 default:
1243 gcc_unreachable ();
1244 }
1245 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1246 break;
1247 case IC_FSMBI:
1248 constant_to_array (mode, x, arr);
1249 val = 0;
1250 for (i = 0; i < 16; i++)
1251 {
1252 val <<= 1;
1253 val |= arr[i] & 1;
1254 }
1255 print_operand (file, GEN_INT (val), 0);
1256 break;
1257 case IC_CPAT:
1258 constant_to_array (mode, x, arr);
1259 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1260 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
85d9c13c 1261 break;
a1c6e4b8 1262 case IC_IL1s:
a1c6e4b8 1263 if (xcode == HIGH)
73701e27
TS
1264 x = XEXP (x, 0);
1265 if (GET_CODE (x) == CONST_VECTOR)
1266 x = CONST_VECTOR_ELT (x, 0);
1267 output_addr_const (file, x);
1268 if (xcode == HIGH)
1269 fprintf (file, "@h");
85d9c13c 1270 break;
a1c6e4b8
TS
1271 case IC_IL2:
1272 case IC_IL2s:
73701e27 1273 case IC_FSMBI2:
a1c6e4b8
TS
1274 case IC_POOL:
1275 abort ();
85d9c13c 1276 }
20e9e759 1277 }
85d9c13c
TS
1278 else
1279 gcc_unreachable ();
1280 return;
1281
85d9c13c
TS
1282 case 'C':
1283 if (xcode == CONST_INT)
1284 {
1285 /* Only 4 least significant bits are relevant for generate
1286 control word instructions. */
1287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1288 return;
1289 }
1290 break;
1291
1292 case 'M': /* print code for c*d */
1293 if (GET_CODE (x) == CONST_INT)
1294 switch (INTVAL (x))
1295 {
1296 case 1:
1297 fprintf (file, "b");
1298 break;
1299 case 2:
1300 fprintf (file, "h");
1301 break;
1302 case 4:
1303 fprintf (file, "w");
1304 break;
1305 case 8:
1306 fprintf (file, "d");
1307 break;
1308 default:
1309 gcc_unreachable();
1310 }
1311 else
1312 gcc_unreachable();
1313 return;
1314
1315 case 'N': /* Negate the operand */
1316 if (xcode == CONST_INT)
1317 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1318 else if (xcode == CONST_VECTOR)
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1320 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1321 return;
1322
1323 case 'I': /* enable/disable interrupts */
1324 if (xcode == CONST_INT)
1325 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1326 return;
1327
1328 case 'b': /* branch modifiers */
1329 if (xcode == REG)
1330 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1331 else if (COMPARISON_P (x))
1332 fprintf (file, "%s", xcode == NE ? "n" : "");
1333 return;
1334
1335 case 'i': /* indirect call */
1336 if (xcode == MEM)
1337 {
1338 if (GET_CODE (XEXP (x, 0)) == REG)
1339 /* Used in indirect function calls. */
1340 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1341 else
1342 output_address (XEXP (x, 0));
1343 }
1344 return;
1345
1346 case 'p': /* load/store */
1347 if (xcode == MEM)
1348 {
1349 x = XEXP (x, 0);
1350 xcode = GET_CODE (x);
1351 }
09aad82b
TS
1352 if (xcode == AND)
1353 {
1354 x = XEXP (x, 0);
1355 xcode = GET_CODE (x);
1356 }
85d9c13c
TS
1357 if (xcode == REG)
1358 fprintf (file, "d");
1359 else if (xcode == CONST_INT)
1360 fprintf (file, "a");
1361 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1362 fprintf (file, "r");
1363 else if (xcode == PLUS || xcode == LO_SUM)
1364 {
1365 if (GET_CODE (XEXP (x, 1)) == REG)
1366 fprintf (file, "x");
1367 else
1368 fprintf (file, "d");
1369 }
1370 return;
1371
73701e27
TS
1372 case 'e':
1373 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1374 val &= 0x7;
1375 output_addr_const (file, GEN_INT (val));
1376 return;
1377
1378 case 'f':
1379 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1380 val &= 0x1f;
1381 output_addr_const (file, GEN_INT (val));
1382 return;
1383
1384 case 'g':
1385 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1386 val &= 0x3f;
1387 output_addr_const (file, GEN_INT (val));
1388 return;
1389
1390 case 'h':
1391 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1392 val = (val >> 3) & 0x1f;
1393 output_addr_const (file, GEN_INT (val));
1394 return;
1395
1396 case 'E':
1397 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1398 val = -val;
1399 val &= 0x7;
1400 output_addr_const (file, GEN_INT (val));
1401 return;
1402
1403 case 'F':
1404 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1405 val = -val;
1406 val &= 0x1f;
1407 output_addr_const (file, GEN_INT (val));
1408 return;
1409
1410 case 'G':
1411 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1412 val = -val;
1413 val &= 0x3f;
1414 output_addr_const (file, GEN_INT (val));
1415 return;
1416
1417 case 'H':
1418 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1419 val = -(val & -8ll);
1420 val = (val >> 3) & 0x1f;
1421 output_addr_const (file, GEN_INT (val));
1422 return;
1423
5345cf68
TS
1424 case 'v':
1425 case 'w':
1426 constant_to_array (mode, x, arr);
1427 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1428 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1429 return;
1430
85d9c13c
TS
1431 case 0:
1432 if (xcode == REG)
1433 fprintf (file, "%s", reg_names[REGNO (x)]);
1434 else if (xcode == MEM)
1435 output_address (XEXP (x, 0));
1436 else if (xcode == CONST_VECTOR)
a1c6e4b8 1437 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
85d9c13c
TS
1438 else
1439 output_addr_const (file, x);
1440 return;
1441
44c7bd63 1442 /* unused letters
5345cf68 1443 o qr u yz
73701e27 1444 AB OPQR UVWXYZ */
85d9c13c
TS
1445 default:
1446 output_operand_lossage ("invalid %%xn code");
1447 }
1448 gcc_unreachable ();
1449}
1450
85d9c13c
TS
1451/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1452 caller saved register. For leaf functions it is more efficient to
1453 use a volatile register because we won't need to save and restore the
1454 pic register. This routine is only valid after register allocation
1455 is completed, so we can pick an unused register. */
1456static rtx
1457get_pic_reg (void)
1458{
85d9c13c
TS
1459 if (!reload_completed && !reload_in_progress)
1460 abort ();
c5d94218
UW
1461
1462 /* If we've already made the decision, we need to keep with it. Once we've
1463 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1464 return true since the register is now live; this should not cause us to
1465 "switch back" to using pic_offset_table_rtx. */
1466 if (!cfun->machine->pic_reg)
1467 {
416ff32e 1468 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
c5d94218
UW
1469 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1470 else
1471 cfun->machine->pic_reg = pic_offset_table_rtx;
1472 }
1473
1474 return cfun->machine->pic_reg;
85d9c13c
TS
1475}
1476
73701e27
TS
1477/* Split constant addresses to handle cases that are too large.
1478 Add in the pic register when in PIC mode.
1479 Split immediates that require more than 1 instruction. */
a1c6e4b8
TS
1480int
1481spu_split_immediate (rtx * ops)
20e9e759 1482{
ef4bddc2 1483 machine_mode mode = GET_MODE (ops[0]);
a1c6e4b8
TS
1484 enum immediate_class c = classify_immediate (ops[1], mode);
1485
1486 switch (c)
20e9e759 1487 {
a1c6e4b8
TS
1488 case IC_IL2:
1489 {
1490 unsigned char arrhi[16];
1491 unsigned char arrlo[16];
88f091f5 1492 rtx to, temp, hi, lo;
a1c6e4b8 1493 int i;
ef4bddc2 1494 machine_mode imode = mode;
88f091f5
UW
1495 /* We need to do reals as ints because the constant used in the
1496 IOR might not be a legitimate real constant. */
1497 imode = int_mode_for_mode (mode);
a1c6e4b8 1498 constant_to_array (mode, ops[1], arrhi);
88f091f5
UW
1499 if (imode != mode)
1500 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1501 else
1502 to = ops[0];
1503 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
a1c6e4b8
TS
1504 for (i = 0; i < 16; i += 4)
1505 {
1506 arrlo[i + 2] = arrhi[i + 2];
1507 arrlo[i + 3] = arrhi[i + 3];
1508 arrlo[i + 0] = arrlo[i + 1] = 0;
1509 arrhi[i + 2] = arrhi[i + 3] = 0;
1510 }
88f091f5
UW
1511 hi = array_to_constant (imode, arrhi);
1512 lo = array_to_constant (imode, arrlo);
1513 emit_move_insn (temp, hi);
f7df4a84 1514 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
a1c6e4b8
TS
1515 return 1;
1516 }
73701e27
TS
1517 case IC_FSMBI2:
1518 {
1519 unsigned char arr_fsmbi[16];
1520 unsigned char arr_andbi[16];
1521 rtx to, reg_fsmbi, reg_and;
1522 int i;
ef4bddc2 1523 machine_mode imode = mode;
73701e27
TS
1524 /* We need to do reals as ints because the constant used in the
1525 * AND might not be a legitimate real constant. */
1526 imode = int_mode_for_mode (mode);
1527 constant_to_array (mode, ops[1], arr_fsmbi);
1528 if (imode != mode)
1529 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1530 else
1531 to = ops[0];
1532 for (i = 0; i < 16; i++)
1533 if (arr_fsmbi[i] != 0)
1534 {
1535 arr_andbi[0] = arr_fsmbi[i];
1536 arr_fsmbi[i] = 0xff;
1537 }
1538 for (i = 1; i < 16; i++)
1539 arr_andbi[i] = arr_andbi[0];
1540 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1541 reg_and = array_to_constant (imode, arr_andbi);
1542 emit_move_insn (to, reg_fsmbi);
f7df4a84 1543 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
73701e27
TS
1544 return 1;
1545 }
a1c6e4b8
TS
1546 case IC_POOL:
1547 if (reload_in_progress || reload_completed)
1548 {
1549 rtx mem = force_const_mem (mode, ops[1]);
1550 if (TARGET_LARGE_MEM)
1551 {
1552 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1553 emit_move_insn (addr, XEXP (mem, 0));
1554 mem = replace_equiv_address (mem, addr);
1555 }
1556 emit_move_insn (ops[0], mem);
1557 return 1;
1558 }
1559 break;
1560 case IC_IL1s:
1561 case IC_IL2s:
1562 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1563 {
1564 if (c == IC_IL2s)
1565 {
73701e27
TS
1566 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1567 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
a1c6e4b8
TS
1568 }
1569 else if (flag_pic)
1570 emit_insn (gen_pic (ops[0], ops[1]));
1571 if (flag_pic)
1572 {
1573 rtx pic_reg = get_pic_reg ();
1574 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
a1c6e4b8
TS
1575 }
1576 return flag_pic || c == IC_IL2s;
1577 }
1578 break;
1579 case IC_IL1:
1580 case IC_FSMBI:
1581 case IC_CPAT:
1582 break;
20e9e759 1583 }
a1c6e4b8 1584 return 0;
20e9e759
TS
1585}
1586
85d9c13c
TS
1587/* SAVING is TRUE when we are generating the actual load and store
1588 instructions for REGNO. When determining the size of the stack
1589 needed for saving register we must allocate enough space for the
1590 worst case, because we don't always have the information early enough
1591 to not allocate it. But we can at least eliminate the actual loads
1592 and stores during the prologue/epilogue. */
1593static int
1594need_to_save_reg (int regno, int saving)
1595{
6fb5fa3c 1596 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
85d9c13c
TS
1597 return 1;
1598 if (flag_pic
1599 && regno == PIC_OFFSET_TABLE_REGNUM
c5d94218 1600 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
85d9c13c
TS
1601 return 1;
1602 return 0;
1603}
1604
1605/* This function is only correct starting with local register
1606 allocation */
1607int
1608spu_saved_regs_size (void)
1609{
1610 int reg_save_size = 0;
1611 int regno;
1612
1613 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1614 if (need_to_save_reg (regno, 0))
1615 reg_save_size += 0x10;
1616 return reg_save_size;
1617}
1618
23c39aaa 1619static rtx_insn *
85d9c13c
TS
1620frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1621{
1622 rtx reg = gen_rtx_REG (V4SImode, regno);
1623 rtx mem =
1624 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1625 return emit_insn (gen_movv4si (mem, reg));
1626}
1627
23c39aaa 1628static rtx_insn *
85d9c13c
TS
1629frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1630{
1631 rtx reg = gen_rtx_REG (V4SImode, regno);
1632 rtx mem =
1633 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1634 return emit_insn (gen_movv4si (reg, mem));
1635}
1636
1637/* This happens after reload, so we need to expand it. */
23c39aaa 1638static rtx_insn *
85d9c13c
TS
1639frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1640{
23c39aaa 1641 rtx_insn *insn;
85d9c13c
TS
1642 if (satisfies_constraint_K (GEN_INT (imm)))
1643 {
1644 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1645 }
1646 else
1647 {
6fb5fa3c 1648 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
85d9c13c
TS
1649 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1650 if (REGNO (src) == REGNO (scratch))
1651 abort ();
1652 }
85d9c13c
TS
1653 return insn;
1654}
1655
1656/* Return nonzero if this function is known to have a null epilogue. */
1657
1658int
1659direct_return (void)
1660{
1661 if (reload_completed)
1662 {
1663 if (cfun->static_chain_decl == 0
1664 && (spu_saved_regs_size ()
1665 + get_frame_size ()
38173d38
JH
1666 + crtl->outgoing_args_size
1667 + crtl->args.pretend_args_size == 0)
416ff32e 1668 && crtl->is_leaf)
85d9c13c
TS
1669 return 1;
1670 }
1671 return 0;
1672}
1673
1674/*
1675 The stack frame looks like this:
1676 +-------------+
1677 | incoming |
7310a2da
SSF
1678 | args |
1679 AP -> +-------------+
85d9c13c
TS
1680 | $lr save |
1681 +-------------+
1682 prev SP | back chain |
1683 +-------------+
1684 | var args |
38173d38 1685 | reg save | crtl->args.pretend_args_size bytes
85d9c13c
TS
1686 +-------------+
1687 | ... |
1688 | saved regs | spu_saved_regs_size() bytes
7310a2da 1689 FP -> +-------------+
85d9c13c 1690 | ... |
7310a2da
SSF
1691 | vars | get_frame_size() bytes
1692 HFP -> +-------------+
85d9c13c
TS
1693 | ... |
1694 | outgoing |
38173d38 1695 | args | crtl->outgoing_args_size bytes
85d9c13c
TS
1696 +-------------+
1697 | $lr of next |
1698 | frame |
1699 +-------------+
7310a2da
SSF
1700 | back chain |
1701 SP -> +-------------+
85d9c13c
TS
1702
1703*/
1704void
1705spu_expand_prologue (void)
1706{
1707 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1708 HOST_WIDE_INT total_size;
1709 HOST_WIDE_INT saved_regs_size;
1710 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1711 rtx scratch_reg_0, scratch_reg_1;
23c39aaa
DM
1712 rtx_insn *insn;
1713 rtx real;
85d9c13c 1714
c5d94218
UW
1715 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1716 cfun->machine->pic_reg = pic_offset_table_rtx;
85d9c13c
TS
1717
1718 if (spu_naked_function_p (current_function_decl))
1719 return;
1720
1721 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1722 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1723
1724 saved_regs_size = spu_saved_regs_size ();
1725 total_size = size + saved_regs_size
38173d38
JH
1726 + crtl->outgoing_args_size
1727 + crtl->args.pretend_args_size;
85d9c13c 1728
416ff32e 1729 if (!crtl->is_leaf
e3b5732b 1730 || cfun->calls_alloca || total_size > 0)
85d9c13c
TS
1731 total_size += STACK_POINTER_OFFSET;
1732
1733 /* Save this first because code after this might use the link
1734 register as a scratch register. */
416ff32e 1735 if (!crtl->is_leaf)
85d9c13c
TS
1736 {
1737 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1738 RTX_FRAME_RELATED_P (insn) = 1;
1739 }
1740
1741 if (total_size > 0)
1742 {
38173d38 1743 offset = -crtl->args.pretend_args_size;
85d9c13c
TS
1744 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1745 if (need_to_save_reg (regno, 1))
1746 {
1747 offset -= 16;
1748 insn = frame_emit_store (regno, sp_reg, offset);
1749 RTX_FRAME_RELATED_P (insn) = 1;
1750 }
1751 }
1752
c5d94218 1753 if (flag_pic && cfun->machine->pic_reg)
85d9c13c 1754 {
c5d94218 1755 rtx pic_reg = cfun->machine->pic_reg;
85d9c13c 1756 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
85d9c13c 1757 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
85d9c13c
TS
1758 }
1759
1760 if (total_size > 0)
1761 {
1762 if (flag_stack_check)
1763 {
24fc18b9 1764 /* We compare against total_size-1 because
85d9c13c
TS
1765 ($sp >= total_size) <=> ($sp > total_size-1) */
1766 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1767 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1768 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1769 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1770 {
1771 emit_move_insn (scratch_v4si, size_v4si);
1772 size_v4si = scratch_v4si;
1773 }
1774 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1775 emit_insn (gen_vec_extractv4si
1776 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1777 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1778 }
1779
1780 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1781 the value of the previous $sp because we save it as the back
1782 chain. */
1783 if (total_size <= 2000)
1784 {
1785 /* In this case we save the back chain first. */
1786 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
85d9c13c
TS
1787 insn =
1788 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1789 }
85d9c13c
TS
1790 else
1791 {
1792 insn = emit_move_insn (scratch_reg_0, sp_reg);
85d9c13c
TS
1793 insn =
1794 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1795 }
1796 RTX_FRAME_RELATED_P (insn) = 1;
1797 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
bbbbb16a 1798 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
85d9c13c
TS
1799
1800 if (total_size > 2000)
1801 {
1802 /* Save the back chain ptr */
1803 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
85d9c13c
TS
1804 }
1805
1806 if (frame_pointer_needed)
1807 {
1808 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1809 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
38173d38 1810 + crtl->outgoing_args_size;
85d9c13c 1811 /* Set the new frame_pointer */
10d55907
UW
1812 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1813 RTX_FRAME_RELATED_P (insn) = 1;
1814 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
bbbbb16a 1815 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
73701e27 1816 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
85d9c13c
TS
1817 }
1818 }
1819
a11e0df4 1820 if (flag_stack_usage_info)
4c825c02 1821 current_function_static_stack_size = total_size;
85d9c13c
TS
1822}
1823
1824void
1825spu_expand_epilogue (bool sibcall_p)
1826{
1827 int size = get_frame_size (), offset, regno;
1828 HOST_WIDE_INT saved_regs_size, total_size;
1829 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
c187d33c 1830 rtx scratch_reg_0;
85d9c13c 1831
85d9c13c
TS
1832 if (spu_naked_function_p (current_function_decl))
1833 return;
1834
1835 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1836
1837 saved_regs_size = spu_saved_regs_size ();
1838 total_size = size + saved_regs_size
38173d38
JH
1839 + crtl->outgoing_args_size
1840 + crtl->args.pretend_args_size;
85d9c13c 1841
416ff32e 1842 if (!crtl->is_leaf
e3b5732b 1843 || cfun->calls_alloca || total_size > 0)
85d9c13c
TS
1844 total_size += STACK_POINTER_OFFSET;
1845
1846 if (total_size > 0)
1847 {
e3b5732b 1848 if (cfun->calls_alloca)
85d9c13c
TS
1849 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1850 else
1851 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1852
1853
1854 if (saved_regs_size > 0)
1855 {
38173d38 1856 offset = -crtl->args.pretend_args_size;
85d9c13c
TS
1857 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1858 if (need_to_save_reg (regno, 1))
1859 {
1860 offset -= 0x10;
1861 frame_emit_load (regno, sp_reg, offset);
1862 }
1863 }
1864 }
1865
416ff32e 1866 if (!crtl->is_leaf)
85d9c13c
TS
1867 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1868
1869 if (!sibcall_p)
1870 {
c41c1387 1871 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
c187d33c 1872 emit_jump_insn (gen__return ());
85d9c13c 1873 }
85d9c13c
TS
1874}
1875
1876rtx
1877spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1878{
1879 if (count != 0)
1880 return 0;
1881 /* This is inefficient because it ends up copying to a save-register
1882 which then gets saved even though $lr has already been saved. But
1883 it does generate better code for leaf functions and we don't need
1884 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1885 used for __builtin_return_address anyway, so maybe we don't care if
1886 it's inefficient. */
1887 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1888}
1889\f
1890
1891/* Given VAL, generate a constant appropriate for MODE.
1892 If MODE is a vector mode, every element will be VAL.
1893 For TImode, VAL will be zero extended to 128 bits. */
1894rtx
ef4bddc2 1895spu_const (machine_mode mode, HOST_WIDE_INT val)
85d9c13c
TS
1896{
1897 rtx inner;
1898 rtvec v;
1899 int units, i;
1900
1901 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1902 || GET_MODE_CLASS (mode) == MODE_FLOAT
1903 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1904 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1905
1906 if (GET_MODE_CLASS (mode) == MODE_INT)
1907 return immed_double_const (val, 0, mode);
1908
1909 /* val is the bit representation of the float */
1910 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1911 return hwint_to_const_double (mode, val);
1912
1913 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1914 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1915 else
1916 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1917
1918 units = GET_MODE_NUNITS (mode);
1919
1920 v = rtvec_alloc (units);
1921
1922 for (i = 0; i < units; ++i)
1923 RTVEC_ELT (v, i) = inner;
1924
1925 return gen_rtx_CONST_VECTOR (mode, v);
1926}
85d9c13c 1927
39aeae85
SL
1928/* Create a MODE vector constant from 4 ints. */
1929rtx
ef4bddc2 1930spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
39aeae85
SL
1931{
1932 unsigned char arr[16];
1933 arr[0] = (a >> 24) & 0xff;
1934 arr[1] = (a >> 16) & 0xff;
1935 arr[2] = (a >> 8) & 0xff;
1936 arr[3] = (a >> 0) & 0xff;
1937 arr[4] = (b >> 24) & 0xff;
1938 arr[5] = (b >> 16) & 0xff;
1939 arr[6] = (b >> 8) & 0xff;
1940 arr[7] = (b >> 0) & 0xff;
1941 arr[8] = (c >> 24) & 0xff;
1942 arr[9] = (c >> 16) & 0xff;
1943 arr[10] = (c >> 8) & 0xff;
1944 arr[11] = (c >> 0) & 0xff;
1945 arr[12] = (d >> 24) & 0xff;
1946 arr[13] = (d >> 16) & 0xff;
1947 arr[14] = (d >> 8) & 0xff;
1948 arr[15] = (d >> 0) & 0xff;
1949 return array_to_constant(mode, arr);
1950}
9dcc2e87
TS
1951\f
1952/* branch hint stuff */
39aeae85 1953
85d9c13c
TS
1954/* An array of these is used to propagate hints to predecessor blocks. */
1955struct spu_bb_info
1956{
23c39aaa 1957 rtx_insn *prop_jump; /* propagated from another block */
9dcc2e87 1958 int bb_index; /* the original block. */
85d9c13c 1959};
9dcc2e87 1960static struct spu_bb_info *spu_bb_info;
85d9c13c 1961
9dcc2e87 1962#define STOP_HINT_P(INSN) \
b64925dc 1963 (CALL_P(INSN) \
9dcc2e87
TS
1964 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1965 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1966
1967/* 1 when RTX is a hinted branch or its target. We keep track of
1968 what has been hinted so the safe-hint code can test it easily. */
1969#define HINTED_P(RTX) \
1970 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1971
1972/* 1 when RTX is an insn that must be scheduled on an even boundary. */
1973#define SCHED_ON_EVEN_P(RTX) \
1974 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1975
1976/* Emit a nop for INSN such that the two will dual issue. This assumes
1977 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1978 We check for TImode to handle a MULTI1 insn which has dual issued its
b3d45ff0 1979 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
9dcc2e87 1980static void
23c39aaa 1981emit_nop_for_insn (rtx_insn *insn)
85d9c13c 1982{
9dcc2e87 1983 int p;
23c39aaa 1984 rtx_insn *new_insn;
b3d45ff0
UW
1985
1986 /* We need to handle JUMP_TABLE_DATA separately. */
1987 if (JUMP_TABLE_DATA_P (insn))
1988 {
1989 new_insn = emit_insn_after (gen_lnop(), insn);
1990 recog_memoized (new_insn);
1991 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1992 return;
1993 }
1994
9dcc2e87
TS
1995 p = get_pipe (insn);
1996 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1997 new_insn = emit_insn_after (gen_lnop (), insn);
1998 else if (p == 1 && GET_MODE (insn) == TImode)
85d9c13c 1999 {
9dcc2e87
TS
2000 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2001 PUT_MODE (new_insn, TImode);
2002 PUT_MODE (insn, VOIDmode);
2003 }
2004 else
2005 new_insn = emit_insn_after (gen_lnop (), insn);
2006 recog_memoized (new_insn);
9d12bc68 2007 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
9dcc2e87
TS
2008}
2009
2010/* Insert nops in basic blocks to meet dual issue alignment
2011 requirements. Also make sure hbrp and hint instructions are at least
2012 one cycle apart, possibly inserting a nop. */
2013static void
2014pad_bb(void)
2015{
23c39aaa 2016 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
9dcc2e87
TS
2017 int length;
2018 int addr;
2019
2020 /* This sets up INSN_ADDRESSES. */
2021 shorten_branches (get_insns ());
2022
2023 /* Keep track of length added by nops. */
2024 length = 0;
2025
2026 prev_insn = 0;
2027 insn = get_insns ();
2028 if (!active_insn_p (insn))
2029 insn = next_active_insn (insn);
2030 for (; insn; insn = next_insn)
2031 {
2032 next_insn = next_active_insn (insn);
2033 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2034 || INSN_CODE (insn) == CODE_FOR_hbr)
85d9c13c 2035 {
9dcc2e87
TS
2036 if (hbr_insn)
2037 {
2038 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2039 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2040 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2041 || (a1 - a0 == 4))
2042 {
2043 prev_insn = emit_insn_before (gen_lnop (), insn);
2044 PUT_MODE (prev_insn, GET_MODE (insn));
2045 PUT_MODE (insn, TImode);
9d12bc68 2046 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
9dcc2e87
TS
2047 length += 4;
2048 }
2049 }
2050 hbr_insn = insn;
2051 }
7c40228a 2052 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
9dcc2e87
TS
2053 {
2054 if (GET_MODE (insn) == TImode)
2055 PUT_MODE (next_insn, TImode);
2056 insn = next_insn;
2057 next_insn = next_active_insn (insn);
2058 }
2059 addr = INSN_ADDRESSES (INSN_UID (insn));
2060 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2061 {
2062 if (((addr + length) & 7) != 0)
2063 {
2064 emit_nop_for_insn (prev_insn);
2065 length += 4;
2066 }
85d9c13c 2067 }
9dcc2e87
TS
2068 else if (GET_MODE (insn) == TImode
2069 && ((next_insn && GET_MODE (next_insn) != TImode)
2070 || get_attr_type (insn) == TYPE_MULTI0)
2071 && ((addr + length) & 7) != 0)
2072 {
2073 /* prev_insn will always be set because the first insn is
2074 always 8-byte aligned. */
2075 emit_nop_for_insn (prev_insn);
2076 length += 4;
2077 }
2078 prev_insn = insn;
85d9c13c 2079 }
85d9c13c
TS
2080}
2081
9dcc2e87
TS
2082\f
2083/* Routines for branch hints. */
2084
85d9c13c 2085static void
23c39aaa 2086spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
9dcc2e87 2087 int distance, sbitmap blocks)
85d9c13c 2088{
9dcc2e87 2089 rtx branch_label = 0;
23c39aaa
DM
2090 rtx_insn *hint;
2091 rtx_insn *insn;
8942ee0f 2092 rtx_jump_table_data *table;
85d9c13c
TS
2093
2094 if (before == 0 || branch == 0 || target == 0)
2095 return;
2096
9dcc2e87
TS
2097 /* While scheduling we require hints to be no further than 600, so
2098 we need to enforce that here too */
85d9c13c
TS
2099 if (distance > 600)
2100 return;
2101
9dcc2e87 2102 /* If we have a Basic block note, emit it after the basic block note. */
051de0eb 2103 if (NOTE_INSN_BASIC_BLOCK_P (before))
9dcc2e87 2104 before = NEXT_INSN (before);
85d9c13c
TS
2105
2106 branch_label = gen_label_rtx ();
2107 LABEL_NUSES (branch_label)++;
2108 LABEL_PRESERVE_P (branch_label) = 1;
2109 insn = emit_label_before (branch_label, branch);
2110 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
d7c028c0 2111 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
9dcc2e87
TS
2112
2113 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2114 recog_memoized (hint);
9d12bc68 2115 INSN_LOCATION (hint) = INSN_LOCATION (branch);
9dcc2e87 2116 HINTED_P (branch) = 1;
85d9c13c 2117
9dcc2e87
TS
2118 if (GET_CODE (target) == LABEL_REF)
2119 HINTED_P (XEXP (target, 0)) = 1;
2120 else if (tablejump_p (branch, 0, &table))
85d9c13c 2121 {
9dcc2e87
TS
2122 rtvec vec;
2123 int j;
2124 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2125 vec = XVEC (PATTERN (table), 0);
2126 else
2127 vec = XVEC (PATTERN (table), 1);
2128 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2129 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
85d9c13c 2130 }
9dcc2e87
TS
2131
2132 if (distance >= 588)
85d9c13c 2133 {
9dcc2e87
TS
2134 /* Make sure the hint isn't scheduled any earlier than this point,
2135 which could make it too far for the branch offest to fit */
f626b979
UW
2136 insn = emit_insn_before (gen_blockage (), hint);
2137 recog_memoized (insn);
9d12bc68 2138 INSN_LOCATION (insn) = INSN_LOCATION (hint);
9dcc2e87
TS
2139 }
2140 else if (distance <= 8 * 4)
2141 {
2142 /* To guarantee at least 8 insns between the hint and branch we
2143 insert nops. */
2144 int d;
2145 for (d = distance; d < 8 * 4; d += 4)
2146 {
2147 insn =
2148 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2149 recog_memoized (insn);
9d12bc68 2150 INSN_LOCATION (insn) = INSN_LOCATION (hint);
9dcc2e87
TS
2151 }
2152
2153 /* Make sure any nops inserted aren't scheduled before the hint. */
f626b979
UW
2154 insn = emit_insn_after (gen_blockage (), hint);
2155 recog_memoized (insn);
9d12bc68 2156 INSN_LOCATION (insn) = INSN_LOCATION (hint);
9dcc2e87
TS
2157
2158 /* Make sure any nops inserted aren't scheduled after the call. */
2159 if (CALL_P (branch) && distance < 8 * 4)
f626b979
UW
2160 {
2161 insn = emit_insn_before (gen_blockage (), branch);
2162 recog_memoized (insn);
9d12bc68 2163 INSN_LOCATION (insn) = INSN_LOCATION (branch);
f626b979 2164 }
85d9c13c 2165 }
85d9c13c
TS
2166}
2167
2168/* Returns 0 if we don't want a hint for this branch. Otherwise return
2169 the rtx for the branch target. */
2170static rtx
23c39aaa 2171get_branch_target (rtx_insn *branch)
85d9c13c 2172{
b64925dc 2173 if (JUMP_P (branch))
85d9c13c
TS
2174 {
2175 rtx set, src;
2176
2177 /* Return statements */
2178 if (GET_CODE (PATTERN (branch)) == RETURN)
2179 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2180
aa633255 2181 /* ASM GOTOs. */
3f254607 2182 if (extract_asm_operands (PATTERN (branch)) != NULL)
aa633255
AP
2183 return NULL;
2184
85d9c13c
TS
2185 set = single_set (branch);
2186 src = SET_SRC (set);
2187 if (GET_CODE (SET_DEST (set)) != PC)
2188 abort ();
2189
2190 if (GET_CODE (src) == IF_THEN_ELSE)
2191 {
2192 rtx lab = 0;
2193 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2194 if (note)
2195 {
2196 /* If the more probable case is not a fall through, then
2197 try a branch hint. */
e5af9ddd 2198 int prob = XINT (note, 0);
85d9c13c
TS
2199 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2200 && GET_CODE (XEXP (src, 1)) != PC)
2201 lab = XEXP (src, 1);
2202 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2203 && GET_CODE (XEXP (src, 2)) != PC)
2204 lab = XEXP (src, 2);
2205 }
2206 if (lab)
2207 {
2208 if (GET_CODE (lab) == RETURN)
2209 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2210 return lab;
2211 }
2212 return 0;
2213 }
2214
2215 return src;
2216 }
b64925dc 2217 else if (CALL_P (branch))
85d9c13c
TS
2218 {
2219 rtx call;
2220 /* All of our call patterns are in a PARALLEL and the CALL is
2221 the first pattern in the PARALLEL. */
2222 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2223 abort ();
2224 call = XVECEXP (PATTERN (branch), 0, 0);
2225 if (GET_CODE (call) == SET)
2226 call = SET_SRC (call);
2227 if (GET_CODE (call) != CALL)
2228 abort ();
2229 return XEXP (XEXP (call, 0), 0);
2230 }
2231 return 0;
2232}
2233
9dcc2e87
TS
2234/* The special $hbr register is used to prevent the insn scheduler from
2235 moving hbr insns across instructions which invalidate them. It
2236 should only be used in a clobber, and this function searches for
2237 insns which clobber it. */
2238static bool
23c39aaa 2239insn_clobbers_hbr (rtx_insn *insn)
9dcc2e87
TS
2240{
2241 if (INSN_P (insn)
2242 && GET_CODE (PATTERN (insn)) == PARALLEL)
2243 {
2244 rtx parallel = PATTERN (insn);
2245 rtx clobber;
2246 int j;
2247 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2248 {
2249 clobber = XVECEXP (parallel, 0, j);
2250 if (GET_CODE (clobber) == CLOBBER
2251 && GET_CODE (XEXP (clobber, 0)) == REG
2252 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2253 return 1;
2254 }
2255 }
2256 return 0;
2257}
2258
2259/* Search up to 32 insns starting at FIRST:
2260 - at any kind of hinted branch, just return
2261 - at any unconditional branch in the first 15 insns, just return
2262 - at a call or indirect branch, after the first 15 insns, force it to
2263 an even address and return
2264 - at any unconditional branch, after the first 15 insns, force it to
2265 an even address.
2266 At then end of the search, insert an hbrp within 4 insns of FIRST,
2267 and an hbrp within 16 instructions of FIRST.
2268 */
85d9c13c 2269static void
23c39aaa 2270insert_hbrp_for_ilb_runout (rtx_insn *first)
85d9c13c 2271{
23c39aaa 2272 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
9dcc2e87
TS
2273 int addr = 0, length, first_addr = -1;
2274 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2275 int insert_lnop_after = 0;
2276 for (insn = first; insn; insn = NEXT_INSN (insn))
2277 if (INSN_P (insn))
2278 {
2279 if (first_addr == -1)
2280 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2281 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2282 length = get_attr_length (insn);
2283
2284 if (before_4 == 0 && addr + length >= 4 * 4)
2285 before_4 = insn;
2286 /* We test for 14 instructions because the first hbrp will add
2287 up to 2 instructions. */
2288 if (before_16 == 0 && addr + length >= 14 * 4)
2289 before_16 = insn;
2290
2291 if (INSN_CODE (insn) == CODE_FOR_hbr)
2292 {
2293 /* Make sure an hbrp is at least 2 cycles away from a hint.
2294 Insert an lnop after the hbrp when necessary. */
2295 if (before_4 == 0 && addr > 0)
2296 {
2297 before_4 = insn;
2298 insert_lnop_after |= 1;
2299 }
2300 else if (before_4 && addr <= 4 * 4)
2301 insert_lnop_after |= 1;
2302 if (before_16 == 0 && addr > 10 * 4)
2303 {
2304 before_16 = insn;
2305 insert_lnop_after |= 2;
2306 }
2307 else if (before_16 && addr <= 14 * 4)
2308 insert_lnop_after |= 2;
2309 }
85d9c13c 2310
9dcc2e87
TS
2311 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2312 {
2313 if (addr < hbrp_addr0)
2314 hbrp_addr0 = addr;
2315 else if (addr < hbrp_addr1)
2316 hbrp_addr1 = addr;
2317 }
85d9c13c 2318
9dcc2e87
TS
2319 if (CALL_P (insn) || JUMP_P (insn))
2320 {
2321 if (HINTED_P (insn))
2322 return;
2323
2324 /* Any branch after the first 15 insns should be on an even
2325 address to avoid a special case branch. There might be
2326 some nops and/or hbrps inserted, so we test after 10
2327 insns. */
2328 if (addr > 10 * 4)
2329 SCHED_ON_EVEN_P (insn) = 1;
2330 }
85d9c13c 2331
9dcc2e87
TS
2332 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2333 return;
2334
2335
2336 if (addr + length >= 32 * 4)
85d9c13c 2337 {
9dcc2e87
TS
2338 gcc_assert (before_4 && before_16);
2339 if (hbrp_addr0 > 4 * 4)
85d9c13c 2340 {
9dcc2e87
TS
2341 insn =
2342 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2343 recog_memoized (insn);
9d12bc68 2344 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
9dcc2e87
TS
2345 INSN_ADDRESSES_NEW (insn,
2346 INSN_ADDRESSES (INSN_UID (before_4)));
2347 PUT_MODE (insn, GET_MODE (before_4));
2348 PUT_MODE (before_4, TImode);
2349 if (insert_lnop_after & 1)
85d9c13c 2350 {
9dcc2e87
TS
2351 insn = emit_insn_before (gen_lnop (), before_4);
2352 recog_memoized (insn);
9d12bc68 2353 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
9dcc2e87
TS
2354 INSN_ADDRESSES_NEW (insn,
2355 INSN_ADDRESSES (INSN_UID (before_4)));
2356 PUT_MODE (insn, TImode);
85d9c13c 2357 }
85d9c13c 2358 }
9dcc2e87
TS
2359 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2360 && hbrp_addr1 > 16 * 4)
85d9c13c 2361 {
9dcc2e87
TS
2362 insn =
2363 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2364 recog_memoized (insn);
9d12bc68 2365 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
9dcc2e87
TS
2366 INSN_ADDRESSES_NEW (insn,
2367 INSN_ADDRESSES (INSN_UID (before_16)));
2368 PUT_MODE (insn, GET_MODE (before_16));
2369 PUT_MODE (before_16, TImode);
2370 if (insert_lnop_after & 2)
85d9c13c 2371 {
9dcc2e87
TS
2372 insn = emit_insn_before (gen_lnop (), before_16);
2373 recog_memoized (insn);
9d12bc68 2374 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
9dcc2e87
TS
2375 INSN_ADDRESSES_NEW (insn,
2376 INSN_ADDRESSES (INSN_UID
2377 (before_16)));
2378 PUT_MODE (insn, TImode);
85d9c13c
TS
2379 }
2380 }
9dcc2e87 2381 return;
85d9c13c 2382 }
85d9c13c 2383 }
9dcc2e87
TS
2384 else if (BARRIER_P (insn))
2385 return;
85d9c13c 2386
85d9c13c 2387}
9dcc2e87
TS
2388
2389/* The SPU might hang when it executes 48 inline instructions after a
2390 hinted branch jumps to its hinted target. The beginning of a
dd5a833e
MS
2391 function and the return from a call might have been hinted, and
2392 must be handled as well. To prevent a hang we insert 2 hbrps. The
2393 first should be within 6 insns of the branch target. The second
2394 should be within 22 insns of the branch target. When determining
2395 if hbrps are necessary, we look for only 32 inline instructions,
2396 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2397 when inserting new hbrps, we insert them within 4 and 16 insns of
2398 the target. */
85d9c13c 2399static void
9dcc2e87 2400insert_hbrp (void)
85d9c13c 2401{
23c39aaa 2402 rtx_insn *insn;
9dcc2e87 2403 if (TARGET_SAFE_HINTS)
85d9c13c 2404 {
9dcc2e87
TS
2405 shorten_branches (get_insns ());
2406 /* Insert hbrp at beginning of function */
2407 insn = next_active_insn (get_insns ());
2408 if (insn)
2409 insert_hbrp_for_ilb_runout (insn);
2410 /* Insert hbrp after hinted targets. */
2411 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2412 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2413 insert_hbrp_for_ilb_runout (next_active_insn (insn));
85d9c13c 2414 }
85d9c13c
TS
2415}
2416
9dcc2e87
TS
2417static int in_spu_reorg;
2418
2ba42841
AO
2419static void
2420spu_var_tracking (void)
2421{
2422 if (flag_var_tracking)
2423 {
2424 df_analyze ();
2425 timevar_push (TV_VAR_TRACKING);
2426 variable_tracking_main ();
2427 timevar_pop (TV_VAR_TRACKING);
2428 df_finish_pass (false);
2429 }
2430}
2431
9dcc2e87
TS
2432/* Insert branch hints. There are no branch optimizations after this
2433 pass, so it's safe to set our branch hints now. */
85d9c13c 2434static void
9dcc2e87 2435spu_machine_dependent_reorg (void)
85d9c13c 2436{
9dcc2e87
TS
2437 sbitmap blocks;
2438 basic_block bb;
23c39aaa 2439 rtx_insn *branch, *insn;
9dcc2e87
TS
2440 rtx branch_target = 0;
2441 int branch_addr = 0, insn_addr, required_dist = 0;
2442 int i;
2443 unsigned int j;
85d9c13c 2444
9dcc2e87
TS
2445 if (!TARGET_BRANCH_HINTS || optimize == 0)
2446 {
2447 /* We still do it for unoptimized code because an external
2448 function might have hinted a call or return. */
b4d80e56 2449 compute_bb_for_insn ();
9dcc2e87
TS
2450 insert_hbrp ();
2451 pad_bb ();
2ba42841 2452 spu_var_tracking ();
b4d80e56 2453 free_bb_for_insn ();
9dcc2e87
TS
2454 return;
2455 }
85d9c13c 2456
8b1c6fd7 2457 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
f61e445a 2458 bitmap_clear (blocks);
85d9c13c 2459
9dcc2e87
TS
2460 in_spu_reorg = 1;
2461 compute_bb_for_insn ();
2462
66b038ce
UW
2463 /* (Re-)discover loops so that bb->loop_father can be used
2464 in the analysis below. */
2465 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2466
9dcc2e87
TS
2467 compact_blocks ();
2468
2469 spu_bb_info =
0cae8d31 2470 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
9dcc2e87
TS
2471 sizeof (struct spu_bb_info));
2472
2473 /* We need exact insn addresses and lengths. */
2474 shorten_branches (get_insns ());
2475
0cae8d31 2476 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
85d9c13c 2477 {
06e28de2 2478 bb = BASIC_BLOCK_FOR_FN (cfun, i);
9dcc2e87
TS
2479 branch = 0;
2480 if (spu_bb_info[i].prop_jump)
85d9c13c 2481 {
9dcc2e87
TS
2482 branch = spu_bb_info[i].prop_jump;
2483 branch_target = get_branch_target (branch);
2484 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2485 required_dist = spu_hint_dist;
2486 }
2487 /* Search from end of a block to beginning. In this loop, find
2488 jumps which need a branch and emit them only when:
2489 - it's an indirect branch and we're at the insn which sets
2490 the register
2491 - we're at an insn that will invalidate the hint. e.g., a
2492 call, another hint insn, inline asm that clobbers $hbr, and
2493 some inlined operations (divmodsi4). Don't consider jumps
2494 because they are only at the end of a block and are
2495 considered when we are deciding whether to propagate
2496 - we're getting too far away from the branch. The hbr insns
2497 only have a signed 10 bit offset
2498 We go back as far as possible so the branch will be considered
2499 for propagation when we get to the beginning of the block. */
2500 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2501 {
2502 if (INSN_P (insn))
2503 {
2504 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2505 if (branch
2506 && ((GET_CODE (branch_target) == REG
2507 && set_of (branch_target, insn) != NULL_RTX)
2508 || insn_clobbers_hbr (insn)
2509 || branch_addr - insn_addr > 600))
2510 {
23c39aaa 2511 rtx_insn *next = NEXT_INSN (insn);
9dcc2e87
TS
2512 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2513 if (insn != BB_END (bb)
2514 && branch_addr - next_addr >= required_dist)
2515 {
2516 if (dump_file)
2517 fprintf (dump_file,
2518 "hint for %i in block %i before %i\n",
2519 INSN_UID (branch), bb->index,
2520 INSN_UID (next));
2521 spu_emit_branch_hint (next, branch, branch_target,
2522 branch_addr - next_addr, blocks);
2523 }
2524 branch = 0;
2525 }
2526
2527 /* JUMP_P will only be true at the end of a block. When
2528 branch is already set it means we've previously decided
2529 to propagate a hint for that branch into this block. */
2530 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2531 {
2532 branch = 0;
2533 if ((branch_target = get_branch_target (insn)))
2534 {
2535 branch = insn;
2536 branch_addr = insn_addr;
2537 required_dist = spu_hint_dist;
2538 }
2539 }
2540 }
2541 if (insn == BB_HEAD (bb))
2542 break;
2543 }
2544
2545 if (branch)
2546 {
2547 /* If we haven't emitted a hint for this branch yet, it might
2548 be profitable to emit it in one of the predecessor blocks,
2549 especially for loops. */
23c39aaa 2550 rtx_insn *bbend;
9dcc2e87
TS
2551 basic_block prev = 0, prop = 0, prev2 = 0;
2552 int loop_exit = 0, simple_loop = 0;
2553 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2554
2555 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2556 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2557 prev = EDGE_PRED (bb, j)->src;
2558 else
2559 prev2 = EDGE_PRED (bb, j)->src;
2560
2561 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2562 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2563 loop_exit = 1;
2564 else if (EDGE_SUCC (bb, j)->dest == bb)
2565 simple_loop = 1;
2566
2567 /* If this branch is a loop exit then propagate to previous
2568 fallthru block. This catches the cases when it is a simple
2569 loop or when there is an initial branch into the loop. */
2570 if (prev && (loop_exit || simple_loop)
66b038ce 2571 && bb_loop_depth (prev) <= bb_loop_depth (bb))
9dcc2e87
TS
2572 prop = prev;
2573
2574 /* If there is only one adjacent predecessor. Don't propagate
66b038ce 2575 outside this loop. */
9dcc2e87 2576 else if (prev && single_pred_p (bb)
66b038ce 2577 && prev->loop_father == bb->loop_father)
9dcc2e87
TS
2578 prop = prev;
2579
2580 /* If this is the JOIN block of a simple IF-THEN then
073a8998 2581 propagate the hint to the HEADER block. */
9dcc2e87
TS
2582 else if (prev && prev2
2583 && EDGE_COUNT (bb->preds) == 2
2584 && EDGE_COUNT (prev->preds) == 1
2585 && EDGE_PRED (prev, 0)->src == prev2
66b038ce 2586 && prev2->loop_father == bb->loop_father
9dcc2e87
TS
2587 && GET_CODE (branch_target) != REG)
2588 prop = prev;
2589
2590 /* Don't propagate when:
2591 - this is a simple loop and the hint would be too far
2592 - this is not a simple loop and there are 16 insns in
2593 this block already
2594 - the predecessor block ends in a branch that will be
2595 hinted
2596 - the predecessor block ends in an insn that invalidates
2597 the hint */
2598 if (prop
2599 && prop->index >= 0
2600 && (bbend = BB_END (prop))
2601 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2602 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2603 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2604 {
2605 if (dump_file)
2606 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2607 "for %i (loop_exit %i simple_loop %i dist %i)\n",
66b038ce 2608 bb->index, prop->index, bb_loop_depth (bb),
9dcc2e87
TS
2609 INSN_UID (branch), loop_exit, simple_loop,
2610 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2611
2612 spu_bb_info[prop->index].prop_jump = branch;
2613 spu_bb_info[prop->index].bb_index = i;
2614 }
2615 else if (branch_addr - next_addr >= required_dist)
2616 {
2617 if (dump_file)
2618 fprintf (dump_file, "hint for %i in block %i before %i\n",
2619 INSN_UID (branch), bb->index,
2620 INSN_UID (NEXT_INSN (insn)));
2621 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2622 branch_addr - next_addr, blocks);
2623 }
2624 branch = 0;
85d9c13c 2625 }
85d9c13c 2626 }
9dcc2e87 2627 free (spu_bb_info);
85d9c13c 2628
f61e445a 2629 if (!bitmap_empty_p (blocks))
9dcc2e87
TS
2630 find_many_sub_basic_blocks (blocks);
2631
2632 /* We have to schedule to make sure alignment is ok. */
11cd3bed 2633 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
9dcc2e87
TS
2634
2635 /* The hints need to be scheduled, so call it again. */
2636 schedule_insns ();
f626b979 2637 df_finish_pass (true);
9dcc2e87
TS
2638
2639 insert_hbrp ();
2640
2641 pad_bb ();
2642
6e37f6d4
TS
2643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2644 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2645 {
2646 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2647 between its branch label and the branch . We don't move the
2648 label because GCC expects it at the beginning of the block. */
2649 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2650 rtx label_ref = XVECEXP (unspec, 0, 0);
dc01c3d1
DM
2651 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2652 rtx_insn *branch;
6e37f6d4
TS
2653 int offset = 0;
2654 for (branch = NEXT_INSN (label);
2655 !JUMP_P (branch) && !CALL_P (branch);
2656 branch = NEXT_INSN (branch))
2657 if (NONJUMP_INSN_P (branch))
2658 offset += get_attr_length (branch);
2659 if (offset > 0)
0a81f074 2660 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
6e37f6d4 2661 }
9dcc2e87 2662
2ba42841 2663 spu_var_tracking ();
9dcc2e87 2664
66b038ce
UW
2665 loop_optimizer_finalize ();
2666
9dcc2e87
TS
2667 free_bb_for_insn ();
2668
2669 in_spu_reorg = 0;
85d9c13c
TS
2670}
2671\f
2672
2673/* Insn scheduling routines, primarily for dual issue. */
2674static int
2675spu_sched_issue_rate (void)
2676{
2677 return 2;
2678}
2679
2680static int
23c39aaa 2681uses_ls_unit(rtx_insn *insn)
85d9c13c 2682{
9dcc2e87
TS
2683 rtx set = single_set (insn);
2684 if (set != 0
2685 && (GET_CODE (SET_DEST (set)) == MEM
2686 || GET_CODE (SET_SRC (set)) == MEM))
2687 return 1;
2688 return 0;
85d9c13c
TS
2689}
2690
2691static int
23c39aaa 2692get_pipe (rtx_insn *insn)
85d9c13c
TS
2693{
2694 enum attr_type t;
2695 /* Handle inline asm */
2696 if (INSN_CODE (insn) == -1)
2697 return -1;
2698 t = get_attr_type (insn);
2699 switch (t)
2700 {
2701 case TYPE_CONVERT:
2702 return -2;
2703 case TYPE_MULTI0:
2704 return -1;
2705
2706 case TYPE_FX2:
2707 case TYPE_FX3:
2708 case TYPE_SPR:
2709 case TYPE_NOP:
2710 case TYPE_FXB:
2711 case TYPE_FPD:
2712 case TYPE_FP6:
2713 case TYPE_FP7:
85d9c13c
TS
2714 return 0;
2715
2716 case TYPE_LNOP:
2717 case TYPE_SHUF:
2718 case TYPE_LOAD:
2719 case TYPE_STORE:
2720 case TYPE_BR:
2721 case TYPE_MULTI1:
2722 case TYPE_HBR:
9dcc2e87 2723 case TYPE_IPREFETCH:
85d9c13c
TS
2724 return 1;
2725 default:
2726 abort ();
2727 }
2728}
2729
9dcc2e87
TS
2730
2731/* haifa-sched.c has a static variable that keeps track of the current
2732 cycle. It is passed to spu_sched_reorder, and we record it here for
2733 use by spu_sched_variable_issue. It won't be accurate if the
2734 scheduler updates it's clock_var between the two calls. */
2735static int clock_var;
2736
2737/* This is used to keep track of insn alignment. Set to 0 at the
2738 beginning of each block and increased by the "length" attr of each
2739 insn scheduled. */
2740static int spu_sched_length;
2741
2742/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2743 ready list appropriately in spu_sched_reorder(). */
2744static int pipe0_clock;
2745static int pipe1_clock;
2746
2747static int prev_clock_var;
2748
2749static int prev_priority;
2750
2751/* The SPU needs to load the next ilb sometime during the execution of
2752 the previous ilb. There is a potential conflict if every cycle has a
2753 load or store. To avoid the conflict we make sure the load/store
2754 unit is free for at least one cycle during the execution of insns in
2755 the previous ilb. */
2756static int spu_ls_first;
2757static int prev_ls_clock;
2758
2759static void
2760spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2761 int max_ready ATTRIBUTE_UNUSED)
2762{
2763 spu_sched_length = 0;
2764}
2765
2766static void
2767spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2768 int max_ready ATTRIBUTE_UNUSED)
2769{
2770 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2771 {
2772 /* When any block might be at least 8-byte aligned, assume they
2773 will all be at least 8-byte aligned to make sure dual issue
2774 works out correctly. */
2775 spu_sched_length = 0;
2776 }
2777 spu_ls_first = INT_MAX;
2778 clock_var = -1;
2779 prev_ls_clock = -1;
2780 pipe0_clock = -1;
2781 pipe1_clock = -1;
2782 prev_clock_var = -1;
2783 prev_priority = -1;
2784}
2785
85d9c13c 2786static int
9dcc2e87 2787spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
23c39aaa 2788 int verbose ATTRIBUTE_UNUSED,
ac44248e 2789 rtx_insn *insn, int more)
85d9c13c 2790{
9dcc2e87
TS
2791 int len;
2792 int p;
85d9c13c
TS
2793 if (GET_CODE (PATTERN (insn)) == USE
2794 || GET_CODE (PATTERN (insn)) == CLOBBER
9dcc2e87
TS
2795 || (len = get_attr_length (insn)) == 0)
2796 return more;
2797
2798 spu_sched_length += len;
2799
2800 /* Reset on inline asm */
2801 if (INSN_CODE (insn) == -1)
2802 {
2803 spu_ls_first = INT_MAX;
2804 pipe0_clock = -1;
2805 pipe1_clock = -1;
2806 return 0;
2807 }
2808 p = get_pipe (insn);
2809 if (p == 0)
2810 pipe0_clock = clock_var;
2811 else
2812 pipe1_clock = clock_var;
2813
2814 if (in_spu_reorg)
2815 {
2816 if (clock_var - prev_ls_clock > 1
2817 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2818 spu_ls_first = INT_MAX;
2819 if (uses_ls_unit (insn))
2820 {
2821 if (spu_ls_first == INT_MAX)
2822 spu_ls_first = spu_sched_length;
2823 prev_ls_clock = clock_var;
2824 }
2825
2826 /* The scheduler hasn't inserted the nop, but we will later on.
2827 Include those nops in spu_sched_length. */
2828 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2829 spu_sched_length += 4;
2830 prev_clock_var = clock_var;
2831
2832 /* more is -1 when called from spu_sched_reorder for new insns
2833 that don't have INSN_PRIORITY */
2834 if (more >= 0)
2835 prev_priority = INSN_PRIORITY (insn);
2836 }
2837
073a8998 2838 /* Always try issuing more insns. spu_sched_reorder will decide
9dcc2e87
TS
2839 when the cycle should be advanced. */
2840 return 1;
2841}
2842
2843/* This function is called for both TARGET_SCHED_REORDER and
2844 TARGET_SCHED_REORDER2. */
2845static int
2846spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
ce1ce33a 2847 rtx_insn **ready, int *nreadyp, int clock)
9dcc2e87
TS
2848{
2849 int i, nready = *nreadyp;
2850 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
ce1ce33a 2851 rtx_insn *insn;
9dcc2e87
TS
2852
2853 clock_var = clock;
2854
2855 if (nready <= 0 || pipe1_clock >= clock)
2856 return 0;
2857
2858 /* Find any rtl insns that don't generate assembly insns and schedule
2859 them first. */
2860 for (i = nready - 1; i >= 0; i--)
2861 {
2862 insn = ready[i];
2863 if (INSN_CODE (insn) == -1
2864 || INSN_CODE (insn) == CODE_FOR_blockage
eec9405e 2865 || (INSN_P (insn) && get_attr_length (insn) == 0))
9dcc2e87
TS
2866 {
2867 ready[i] = ready[nready - 1];
2868 ready[nready - 1] = insn;
2869 return 1;
2870 }
2871 }
2872
2873 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2874 for (i = 0; i < nready; i++)
2875 if (INSN_CODE (ready[i]) != -1)
2876 {
2877 insn = ready[i];
2878 switch (get_attr_type (insn))
2879 {
2880 default:
2881 case TYPE_MULTI0:
2882 case TYPE_CONVERT:
2883 case TYPE_FX2:
2884 case TYPE_FX3:
2885 case TYPE_SPR:
2886 case TYPE_NOP:
2887 case TYPE_FXB:
2888 case TYPE_FPD:
2889 case TYPE_FP6:
2890 case TYPE_FP7:
2891 pipe_0 = i;
2892 break;
2893 case TYPE_LOAD:
2894 case TYPE_STORE:
2895 pipe_ls = i;
2896 case TYPE_LNOP:
2897 case TYPE_SHUF:
2898 case TYPE_BR:
2899 case TYPE_MULTI1:
2900 case TYPE_HBR:
2901 pipe_1 = i;
2902 break;
2903 case TYPE_IPREFETCH:
2904 pipe_hbrp = i;
2905 break;
2906 }
2907 }
2908
2909 /* In the first scheduling phase, schedule loads and stores together
2910 to increase the chance they will get merged during postreload CSE. */
2911 if (!reload_completed && pipe_ls >= 0)
2912 {
2913 insn = ready[pipe_ls];
2914 ready[pipe_ls] = ready[nready - 1];
2915 ready[nready - 1] = insn;
2916 return 1;
2917 }
2918
2919 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2920 if (pipe_hbrp >= 0)
2921 pipe_1 = pipe_hbrp;
2922
2923 /* When we have loads/stores in every cycle of the last 15 insns and
2924 we are about to schedule another load/store, emit an hbrp insn
2925 instead. */
2926 if (in_spu_reorg
2927 && spu_sched_length - spu_ls_first >= 4 * 15
2928 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2929 {
2930 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2931 recog_memoized (insn);
2932 if (pipe0_clock < clock)
2933 PUT_MODE (insn, TImode);
2934 spu_sched_variable_issue (file, verbose, insn, -1);
2935 return 0;
2936 }
2937
2938 /* In general, we want to emit nops to increase dual issue, but dual
2939 issue isn't faster when one of the insns could be scheduled later
2940 without effecting the critical path. We look at INSN_PRIORITY to
2941 make a good guess, but it isn't perfect so -mdual-nops=n can be
2942 used to effect it. */
2943 if (in_spu_reorg && spu_dual_nops < 10)
2944 {
073a8998 2945 /* When we are at an even address and we are not issuing nops to
9dcc2e87
TS
2946 improve scheduling then we need to advance the cycle. */
2947 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2948 && (spu_dual_nops == 0
2949 || (pipe_1 != -1
2950 && prev_priority >
2951 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2952 return 0;
2953
2954 /* When at an odd address, schedule the highest priority insn
2955 without considering pipeline. */
2956 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2957 && (spu_dual_nops == 0
2958 || (prev_priority >
2959 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2960 return 1;
2961 }
2962
2963
2964 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2965 pipe0 insn in the ready list, schedule it. */
2966 if (pipe0_clock < clock && pipe_0 >= 0)
2967 schedule_i = pipe_0;
2968
2969 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2970 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2971 else
2972 schedule_i = pipe_1;
2973
2974 if (schedule_i > -1)
2975 {
2976 insn = ready[schedule_i];
2977 ready[schedule_i] = ready[nready - 1];
2978 ready[nready - 1] = insn;
2979 return 1;
2980 }
2981 return 0;
85d9c13c
TS
2982}
2983
2984/* INSN is dependent on DEP_INSN. */
2985static int
ac44248e 2986spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
85d9c13c 2987{
9dcc2e87
TS
2988 rtx set;
2989
2990 /* The blockage pattern is used to prevent instructions from being
2991 moved across it and has no cost. */
2992 if (INSN_CODE (insn) == CODE_FOR_blockage
2993 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2994 return 0;
2995
eec9405e
TS
2996 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2997 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
9dcc2e87
TS
2998 return 0;
2999
3000 /* Make sure hbrps are spread out. */
3001 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3002 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3003 return 8;
3004
3005 /* Make sure hints and hbrps are 2 cycles apart. */
3006 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3007 || INSN_CODE (insn) == CODE_FOR_hbr)
3008 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3009 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3010 return 2;
3011
3012 /* An hbrp has no real dependency on other insns. */
3013 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3014 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3015 return 0;
3016
3017 /* Assuming that it is unlikely an argument register will be used in
3018 the first cycle of the called function, we reduce the cost for
3019 slightly better scheduling of dep_insn. When not hinted, the
3020 mispredicted branch would hide the cost as well. */
3021 if (CALL_P (insn))
3022 {
3023 rtx target = get_branch_target (insn);
3024 if (GET_CODE (target) != REG || !set_of (target, insn))
3025 return cost - 2;
3026 return cost;
3027 }
3028
3029 /* And when returning from a function, let's assume the return values
3030 are completed sooner too. */
3031 if (CALL_P (dep_insn))
85d9c13c 3032 return cost - 2;
9dcc2e87
TS
3033
3034 /* Make sure an instruction that loads from the back chain is schedule
3035 away from the return instruction so a hint is more likely to get
3036 issued. */
3037 if (INSN_CODE (insn) == CODE_FOR__return
3038 && (set = single_set (dep_insn))
3039 && GET_CODE (SET_DEST (set)) == REG
3040 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3041 return 20;
3042
85d9c13c
TS
3043 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3044 scheduler makes every insn in a block anti-dependent on the final
3045 jump_insn. We adjust here so higher cost insns will get scheduled
3046 earlier. */
9dcc2e87 3047 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
b198261f 3048 return insn_cost (dep_insn) - 3;
9dcc2e87 3049
85d9c13c
TS
3050 return cost;
3051}
3052\f
3053/* Create a CONST_DOUBLE from a string. */
984514ac 3054rtx
ef4bddc2 3055spu_float_const (const char *string, machine_mode mode)
85d9c13c
TS
3056{
3057 REAL_VALUE_TYPE value;
3058 value = REAL_VALUE_ATOF (string, mode);
555affd7 3059 return const_double_from_real_value (value, mode);
85d9c13c
TS
3060}
3061
85d9c13c
TS
3062int
3063spu_constant_address_p (rtx x)
3064{
3065 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3066 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3067 || GET_CODE (x) == HIGH);
3068}
3069
3070static enum spu_immediate
3071which_immediate_load (HOST_WIDE_INT val)
3072{
3073 gcc_assert (val == trunc_int_for_mode (val, SImode));
3074
3075 if (val >= -0x8000 && val <= 0x7fff)
3076 return SPU_IL;
3077 if (val >= 0 && val <= 0x3ffff)
3078 return SPU_ILA;
3079 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3080 return SPU_ILH;
3081 if ((val & 0xffff) == 0)
3082 return SPU_ILHU;
3083
3084 return SPU_NONE;
3085}
3086
a1c6e4b8
TS
3087/* Return true when OP can be loaded by one of the il instructions, or
3088 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
85d9c13c 3089int
ef4bddc2 3090immediate_load_p (rtx op, machine_mode mode)
a1c6e4b8
TS
3091{
3092 if (CONSTANT_P (op))
3093 {
3094 enum immediate_class c = classify_immediate (op, mode);
73701e27 3095 return c == IC_IL1 || c == IC_IL1s
6fb5fa3c 3096 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
a1c6e4b8
TS
3097 }
3098 return 0;
3099}
3100
3101/* Return true if the first SIZE bytes of arr is a constant that can be
3102 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3103 represent the size and offset of the instruction to use. */
3104static int
3105cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3106{
3107 int cpat, run, i, start;
3108 cpat = 1;
3109 run = 0;
3110 start = -1;
3111 for (i = 0; i < size && cpat; i++)
3112 if (arr[i] != i+16)
3113 {
3114 if (!run)
3115 {
3116 start = i;
3117 if (arr[i] == 3)
3118 run = 1;
3119 else if (arr[i] == 2 && arr[i+1] == 3)
3120 run = 2;
3121 else if (arr[i] == 0)
3122 {
3123 while (arr[i+run] == run && i+run < 16)
3124 run++;
3125 if (run != 4 && run != 8)
3126 cpat = 0;
3127 }
3128 else
3129 cpat = 0;
3130 if ((i & (run-1)) != 0)
3131 cpat = 0;
3132 i += run;
3133 }
3134 else
3135 cpat = 0;
3136 }
1f49ae6e 3137 if (cpat && (run || size < 16))
a1c6e4b8
TS
3138 {
3139 if (run == 0)
3140 run = 1;
3141 if (prun)
3142 *prun = run;
3143 if (pstart)
3144 *pstart = start == -1 ? 16-run : start;
3145 return 1;
3146 }
3147 return 0;
3148}
3149
3150/* OP is a CONSTANT_P. Determine what instructions can be used to load
24fc18b9 3151 it into a register. MODE is only valid when OP is a CONST_INT. */
a1c6e4b8 3152static enum immediate_class
ef4bddc2 3153classify_immediate (rtx op, machine_mode mode)
85d9c13c
TS
3154{
3155 HOST_WIDE_INT val;
3156 unsigned char arr[16];
73701e27 3157 int i, j, repeated, fsmbi, repeat;
a1c6e4b8
TS
3158
3159 gcc_assert (CONSTANT_P (op));
3160
85d9c13c
TS
3161 if (GET_MODE (op) != VOIDmode)
3162 mode = GET_MODE (op);
3163
a1c6e4b8 3164 /* A V4SI const_vector with all identical symbols is ok. */
73701e27
TS
3165 if (!flag_pic
3166 && mode == V4SImode
a1c6e4b8
TS
3167 && GET_CODE (op) == CONST_VECTOR
3168 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
92695fbb
RS
3169 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3170 op = unwrap_const_vec_duplicate (op);
85d9c13c 3171
a1c6e4b8
TS
3172 switch (GET_CODE (op))
3173 {
3174 case SYMBOL_REF:
3175 case LABEL_REF:
3176 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
85d9c13c 3177
a1c6e4b8 3178 case CONST:
3f61b42f
UW
3179 /* We can never know if the resulting address fits in 18 bits and can be
3180 loaded with ila. For now, assume the address will not overflow if
3181 the displacement is "small" (fits 'K' constraint). */
3182 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3183 {
3184 rtx sym = XEXP (XEXP (op, 0), 0);
3185 rtx cst = XEXP (XEXP (op, 0), 1);
3186
3187 if (GET_CODE (sym) == SYMBOL_REF
3188 && GET_CODE (cst) == CONST_INT
3189 && satisfies_constraint_K (cst))
3190 return IC_IL1s;
3191 }
3192 return IC_IL2s;
85d9c13c 3193
a1c6e4b8
TS
3194 case HIGH:
3195 return IC_IL1s;
3196
3197 case CONST_VECTOR:
3198 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3199 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3200 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3201 return IC_POOL;
3202 /* Fall through. */
3203
3204 case CONST_INT:
3205 case CONST_DOUBLE:
3206 constant_to_array (mode, op, arr);
85d9c13c 3207
a1c6e4b8
TS
3208 /* Check that each 4-byte slot is identical. */
3209 repeated = 1;
3210 for (i = 4; i < 16; i += 4)
3211 for (j = 0; j < 4; j++)
3212 if (arr[j] != arr[i + j])
3213 repeated = 0;
3214
3215 if (repeated)
3216 {
3217 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3218 val = trunc_int_for_mode (val, SImode);
3219
3220 if (which_immediate_load (val) != SPU_NONE)
3221 return IC_IL1;
3222 }
3223
3224 /* Any mode of 2 bytes or smaller can be loaded with an il
3225 instruction. */
3226 gcc_assert (GET_MODE_SIZE (mode) > 2);
3227
3228 fsmbi = 1;
73701e27 3229 repeat = 0;
a1c6e4b8 3230 for (i = 0; i < 16 && fsmbi; i++)
73701e27
TS
3231 if (arr[i] != 0 && repeat == 0)
3232 repeat = arr[i];
3233 else if (arr[i] != 0 && arr[i] != repeat)
a1c6e4b8
TS
3234 fsmbi = 0;
3235 if (fsmbi)
73701e27 3236 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
a1c6e4b8
TS
3237
3238 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3239 return IC_CPAT;
3240
3241 if (repeated)
3242 return IC_IL2;
3243
3244 return IC_POOL;
3245 default:
3246 break;
3247 }
3248 gcc_unreachable ();
85d9c13c
TS
3249}
3250
3251static enum spu_immediate
3252which_logical_immediate (HOST_WIDE_INT val)
3253{
3254 gcc_assert (val == trunc_int_for_mode (val, SImode));
3255
3256 if (val >= -0x200 && val <= 0x1ff)
3257 return SPU_ORI;
3258 if (val >= 0 && val <= 0xffff)
3259 return SPU_IOHL;
3260 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3261 {
3262 val = trunc_int_for_mode (val, HImode);
3263 if (val >= -0x200 && val <= 0x1ff)
3264 return SPU_ORHI;
3265 if ((val & 0xff) == ((val >> 8) & 0xff))
3266 {
3267 val = trunc_int_for_mode (val, QImode);
3268 if (val >= -0x200 && val <= 0x1ff)
3269 return SPU_ORBI;
3270 }
3271 }
3272 return SPU_NONE;
3273}
3274
73701e27
TS
3275/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3276 CONST_DOUBLEs. */
3277static int
3278const_vector_immediate_p (rtx x)
3279{
3280 int i;
3281 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3282 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3283 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3284 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3285 return 0;
3286 return 1;
3287}
3288
85d9c13c 3289int
ef4bddc2 3290logical_immediate_p (rtx op, machine_mode mode)
85d9c13c
TS
3291{
3292 HOST_WIDE_INT val;
3293 unsigned char arr[16];
3294 int i, j;
3295
3296 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3297 || GET_CODE (op) == CONST_VECTOR);
3298
73701e27
TS
3299 if (GET_CODE (op) == CONST_VECTOR
3300 && !const_vector_immediate_p (op))
3301 return 0;
3302
85d9c13c
TS
3303 if (GET_MODE (op) != VOIDmode)
3304 mode = GET_MODE (op);
3305
3306 constant_to_array (mode, op, arr);
3307
3308 /* Check that bytes are repeated. */
3309 for (i = 4; i < 16; i += 4)
3310 for (j = 0; j < 4; j++)
3311 if (arr[j] != arr[i + j])
3312 return 0;
3313
3314 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3315 val = trunc_int_for_mode (val, SImode);
3316
3317 i = which_logical_immediate (val);
3318 return i != SPU_NONE && i != SPU_IOHL;
3319}
3320
3321int
ef4bddc2 3322iohl_immediate_p (rtx op, machine_mode mode)
85d9c13c
TS
3323{
3324 HOST_WIDE_INT val;
3325 unsigned char arr[16];
3326 int i, j;
3327
3328 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3329 || GET_CODE (op) == CONST_VECTOR);
3330
73701e27
TS
3331 if (GET_CODE (op) == CONST_VECTOR
3332 && !const_vector_immediate_p (op))
3333 return 0;
3334
85d9c13c
TS
3335 if (GET_MODE (op) != VOIDmode)
3336 mode = GET_MODE (op);
3337
3338 constant_to_array (mode, op, arr);
3339
3340 /* Check that bytes are repeated. */
3341 for (i = 4; i < 16; i += 4)
3342 for (j = 0; j < 4; j++)
3343 if (arr[j] != arr[i + j])
3344 return 0;
3345
3346 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3347 val = trunc_int_for_mode (val, SImode);
3348
3349 return val >= 0 && val <= 0xffff;
3350}
3351
3352int
ef4bddc2 3353arith_immediate_p (rtx op, machine_mode mode,
85d9c13c
TS
3354 HOST_WIDE_INT low, HOST_WIDE_INT high)
3355{
3356 HOST_WIDE_INT val;
3357 unsigned char arr[16];
3358 int bytes, i, j;
3359
3360 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3361 || GET_CODE (op) == CONST_VECTOR);
3362
73701e27
TS
3363 if (GET_CODE (op) == CONST_VECTOR
3364 && !const_vector_immediate_p (op))
3365 return 0;
3366
85d9c13c
TS
3367 if (GET_MODE (op) != VOIDmode)
3368 mode = GET_MODE (op);
3369
3370 constant_to_array (mode, op, arr);
3371
cb5ca315 3372 bytes = GET_MODE_UNIT_SIZE (mode);
6c825cd4 3373 mode = mode_for_size (GET_MODE_UNIT_BITSIZE (mode), MODE_INT, 0);
85d9c13c
TS
3374
3375 /* Check that bytes are repeated. */
3376 for (i = bytes; i < 16; i += bytes)
3377 for (j = 0; j < bytes; j++)
3378 if (arr[j] != arr[i + j])
3379 return 0;
3380
3381 val = arr[0];
3382 for (j = 1; j < bytes; j++)
3383 val = (val << 8) | arr[j];
3384
3385 val = trunc_int_for_mode (val, mode);
3386
3387 return val >= low && val <= high;
3388}
3389
5345cf68
TS
3390/* TRUE when op is an immediate and an exact power of 2, and given that
3391 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3392 all entries must be the same. */
3393bool
ef4bddc2 3394exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
5345cf68 3395{
ef4bddc2 3396 machine_mode int_mode;
5345cf68
TS
3397 HOST_WIDE_INT val;
3398 unsigned char arr[16];
3399 int bytes, i, j;
3400
3401 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3402 || GET_CODE (op) == CONST_VECTOR);
3403
3404 if (GET_CODE (op) == CONST_VECTOR
3405 && !const_vector_immediate_p (op))
3406 return 0;
3407
3408 if (GET_MODE (op) != VOIDmode)
3409 mode = GET_MODE (op);
3410
3411 constant_to_array (mode, op, arr);
3412
cb5ca315 3413 mode = GET_MODE_INNER (mode);
5345cf68
TS
3414
3415 bytes = GET_MODE_SIZE (mode);
3416 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3417
3418 /* Check that bytes are repeated. */
3419 for (i = bytes; i < 16; i += bytes)
3420 for (j = 0; j < bytes; j++)
3421 if (arr[j] != arr[i + j])
3422 return 0;
3423
3424 val = arr[0];
3425 for (j = 1; j < bytes; j++)
3426 val = (val << 8) | arr[j];
3427
3428 val = trunc_int_for_mode (val, int_mode);
3429
3430 /* Currently, we only handle SFmode */
3431 gcc_assert (mode == SFmode);
3432 if (mode == SFmode)
3433 {
3434 int exp = (val >> 23) - 127;
3435 return val > 0 && (val & 0x007fffff) == 0
3436 && exp >= low && exp <= high;
3437 }
3438 return FALSE;
3439}
3440
299456f3
BE
3441/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3442
3dfc96ea
RS
3443static bool
3444ea_symbol_ref_p (const_rtx x)
299456f3 3445{
299456f3
BE
3446 tree decl;
3447
3448 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3449 {
3450 rtx plus = XEXP (x, 0);
3451 rtx op0 = XEXP (plus, 0);
3452 rtx op1 = XEXP (plus, 1);
3453 if (GET_CODE (op1) == CONST_INT)
3454 x = op0;
3455 }
3456
3457 return (GET_CODE (x) == SYMBOL_REF
3458 && (decl = SYMBOL_REF_DECL (x)) != 0
3459 && TREE_CODE (decl) == VAR_DECL
3460 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3461}
3462
85d9c13c 3463/* We accept:
a7b376ee 3464 - any 32-bit constant (SImode, SFmode)
85d9c13c 3465 - any constant that can be generated with fsmbi (any mode)
a7b376ee 3466 - a 64-bit constant where the high and low bits are identical
85d9c13c 3467 (DImode, DFmode)
a7b376ee 3468 - a 128-bit constant where the four 32-bit words match. */
1a627b35 3469bool
ef4bddc2 3470spu_legitimate_constant_p (machine_mode mode, rtx x)
85d9c13c 3471{
3dfc96ea 3472 subrtx_iterator::array_type array;
73701e27
TS
3473 if (GET_CODE (x) == HIGH)
3474 x = XEXP (x, 0);
299456f3
BE
3475
3476 /* Reject any __ea qualified reference. These can't appear in
3477 instructions but must be forced to the constant pool. */
3dfc96ea
RS
3478 FOR_EACH_SUBRTX (iter, array, x, ALL)
3479 if (ea_symbol_ref_p (*iter))
3480 return 0;
299456f3 3481
85d9c13c 3482 /* V4SI with all identical symbols is valid. */
73701e27 3483 if (!flag_pic
1a627b35 3484 && mode == V4SImode
85d9c13c
TS
3485 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3486 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
73701e27 3487 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
92695fbb 3488 return const_vec_duplicate_p (x);
85d9c13c 3489
73701e27
TS
3490 if (GET_CODE (x) == CONST_VECTOR
3491 && !const_vector_immediate_p (x))
3492 return 0;
85d9c13c
TS
3493 return 1;
3494}
3495
3496/* Valid address are:
3497 - symbol_ref, label_ref, const
3498 - reg
eec9405e 3499 - reg + const_int, where const_int is 16 byte aligned
85d9c13c
TS
3500 - reg + reg, alignment doesn't matter
3501 The alignment matters in the reg+const case because lqd and stqd
eec9405e
TS
3502 ignore the 4 least significant bits of the const. We only care about
3503 16 byte modes because the expand phase will change all smaller MEM
3504 references to TImode. */
3505static bool
ef4bddc2 3506spu_legitimate_address_p (machine_mode mode,
c6c3dba9 3507 rtx x, bool reg_ok_strict)
85d9c13c 3508{
eec9405e
TS
3509 int aligned = GET_MODE_SIZE (mode) >= 16;
3510 if (aligned
3511 && GET_CODE (x) == AND
85d9c13c 3512 && GET_CODE (XEXP (x, 1)) == CONST_INT
eec9405e 3513 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
85d9c13c
TS
3514 x = XEXP (x, 0);
3515 switch (GET_CODE (x))
3516 {
85d9c13c 3517 case LABEL_REF:
299456f3
BE
3518 return !TARGET_LARGE_MEM;
3519
eec9405e 3520 case SYMBOL_REF:
85d9c13c 3521 case CONST:
299456f3
BE
3522 /* Keep __ea references until reload so that spu_expand_mov can see them
3523 in MEMs. */
3dfc96ea 3524 if (ea_symbol_ref_p (x))
299456f3 3525 return !reload_in_progress && !reload_completed;
eec9405e 3526 return !TARGET_LARGE_MEM;
85d9c13c
TS
3527
3528 case CONST_INT:
3529 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3530
3531 case SUBREG:
3532 x = XEXP (x, 0);
eec9405e
TS
3533 if (REG_P (x))
3534 return 0;
85d9c13c
TS
3535
3536 case REG:
3537 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3538
3539 case PLUS:
3540 case LO_SUM:
3541 {
3542 rtx op0 = XEXP (x, 0);
3543 rtx op1 = XEXP (x, 1);
3544 if (GET_CODE (op0) == SUBREG)
3545 op0 = XEXP (op0, 0);
3546 if (GET_CODE (op1) == SUBREG)
3547 op1 = XEXP (op1, 0);
85d9c13c
TS
3548 if (GET_CODE (op0) == REG
3549 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3550 && GET_CODE (op1) == CONST_INT
2ea0be59
UW
3551 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3552 /* If virtual registers are involved, the displacement will
3553 change later on anyway, so checking would be premature.
3554 Reload will make sure the final displacement after
3555 register elimination is OK. */
3556 || op0 == arg_pointer_rtx
3557 || op0 == frame_pointer_rtx
3558 || op0 == virtual_stack_vars_rtx)
eec9405e
TS
3559 && (!aligned || (INTVAL (op1) & 15) == 0))
3560 return TRUE;
85d9c13c
TS
3561 if (GET_CODE (op0) == REG
3562 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3563 && GET_CODE (op1) == REG
3564 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
eec9405e 3565 return TRUE;
85d9c13c
TS
3566 }
3567 break;
3568
3569 default:
3570 break;
3571 }
eec9405e 3572 return FALSE;
85d9c13c
TS
3573}
3574
299456f3
BE
3575/* Like spu_legitimate_address_p, except with named addresses. */
3576static bool
ef4bddc2 3577spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
299456f3
BE
3578 bool reg_ok_strict, addr_space_t as)
3579{
3580 if (as == ADDR_SPACE_EA)
3581 return (REG_P (x) && (GET_MODE (x) == EAmode));
3582
3583 else if (as != ADDR_SPACE_GENERIC)
3584 gcc_unreachable ();
3585
3586 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3587}
3588
85d9c13c 3589/* When the address is reg + const_int, force the const_int into a
2f8e468b 3590 register. */
c9c72699 3591static rtx
85d9c13c 3592spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
ef4bddc2 3593 machine_mode mode ATTRIBUTE_UNUSED)
85d9c13c
TS
3594{
3595 rtx op0, op1;
3596 /* Make sure both operands are registers. */
3597 if (GET_CODE (x) == PLUS)
3598 {
3599 op0 = XEXP (x, 0);
3600 op1 = XEXP (x, 1);
3601 if (ALIGNED_SYMBOL_REF_P (op0))
3602 {
3603 op0 = force_reg (Pmode, op0);
3604 mark_reg_pointer (op0, 128);
3605 }
3606 else if (GET_CODE (op0) != REG)
3607 op0 = force_reg (Pmode, op0);
3608 if (ALIGNED_SYMBOL_REF_P (op1))
3609 {
3610 op1 = force_reg (Pmode, op1);
3611 mark_reg_pointer (op1, 128);
3612 }
3613 else if (GET_CODE (op1) != REG)
3614 op1 = force_reg (Pmode, op1);
3615 x = gen_rtx_PLUS (Pmode, op0, op1);
85d9c13c 3616 }
506d7b68 3617 return x;
85d9c13c
TS
3618}
3619
299456f3
BE
3620/* Like spu_legitimate_address, except with named address support. */
3621static rtx
ef4bddc2 3622spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
299456f3
BE
3623 addr_space_t as)
3624{
3625 if (as != ADDR_SPACE_GENERIC)
3626 return x;
3627
3628 return spu_legitimize_address (x, oldx, mode);
3629}
3630
2ea0be59
UW
3631/* Reload reg + const_int for out-of-range displacements. */
3632rtx
ef4bddc2 3633spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
2ea0be59
UW
3634 int opnum, int type)
3635{
3636 bool removed_and = false;
3637
3638 if (GET_CODE (ad) == AND
3639 && CONST_INT_P (XEXP (ad, 1))
3640 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3641 {
3642 ad = XEXP (ad, 0);
3643 removed_and = true;
3644 }
3645
3646 if (GET_CODE (ad) == PLUS
3647 && REG_P (XEXP (ad, 0))
3648 && CONST_INT_P (XEXP (ad, 1))
3649 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3650 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3651 {
3652 /* Unshare the sum. */
3653 ad = copy_rtx (ad);
3654
3655 /* Reload the displacement. */
3656 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3657 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3658 opnum, (enum reload_type) type);
3659
3660 /* Add back AND for alignment if we stripped it. */
3661 if (removed_and)
3662 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3663
3664 return ad;
3665 }
3666
3667 return NULL_RTX;
3668}
3669
85d9c13c
TS
3670/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3671 struct attribute_spec.handler. */
3672static tree
3673spu_handle_fndecl_attribute (tree * node,
3674 tree name,
3675 tree args ATTRIBUTE_UNUSED,
3676 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3677{
3678 if (TREE_CODE (*node) != FUNCTION_DECL)
3679 {
29d08eba
JM
3680 warning (0, "%qE attribute only applies to functions",
3681 name);
85d9c13c
TS
3682 *no_add_attrs = true;
3683 }
3684
3685 return NULL_TREE;
3686}
3687
3688/* Handle the "vector" attribute. */
3689static tree
3690spu_handle_vector_attribute (tree * node, tree name,
3691 tree args ATTRIBUTE_UNUSED,
3692 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3693{
3694 tree type = *node, result = NULL_TREE;
ef4bddc2 3695 machine_mode mode;
85d9c13c
TS
3696 int unsigned_p;
3697
3698 while (POINTER_TYPE_P (type)
3699 || TREE_CODE (type) == FUNCTION_TYPE
3700 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3701 type = TREE_TYPE (type);
3702
3703 mode = TYPE_MODE (type);
3704
3705 unsigned_p = TYPE_UNSIGNED (type);
3706 switch (mode)
3707 {
3708 case DImode:
3709 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3710 break;
3711 case SImode:
3712 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3713 break;
3714 case HImode:
3715 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3716 break;
3717 case QImode:
3718 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3719 break;
3720 case SFmode:
3721 result = V4SF_type_node;
3722 break;
3723 case DFmode:
3724 result = V2DF_type_node;
3725 break;
3726 default:
3727 break;
3728 }
3729
3730 /* Propagate qualifiers attached to the element type
3731 onto the vector type. */
3732 if (result && result != type && TYPE_QUALS (type))
3733 result = build_qualified_type (result, TYPE_QUALS (type));
3734
3735 *no_add_attrs = true; /* No need to hang on to the attribute. */
3736
3737 if (!result)
29d08eba 3738 warning (0, "%qE attribute ignored", name);
85d9c13c 3739 else
5dc11954 3740 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
85d9c13c
TS
3741
3742 return NULL_TREE;
3743}
3744
9f5ed61a 3745/* Return nonzero if FUNC is a naked function. */
85d9c13c
TS
3746static int
3747spu_naked_function_p (tree func)
3748{
3749 tree a;
3750
3751 if (TREE_CODE (func) != FUNCTION_DECL)
3752 abort ();
3753
3754 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3755 return a != NULL_TREE;
3756}
3757
3758int
3759spu_initial_elimination_offset (int from, int to)
3760{
3761 int saved_regs_size = spu_saved_regs_size ();
3762 int sp_offset = 0;
416ff32e 3763 if (!crtl->is_leaf || crtl->outgoing_args_size
85d9c13c
TS
3764 || get_frame_size () || saved_regs_size)
3765 sp_offset = STACK_POINTER_OFFSET;
3766 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7310a2da 3767 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
85d9c13c 3768 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7310a2da 3769 return get_frame_size ();
85d9c13c 3770 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38173d38 3771 return sp_offset + crtl->outgoing_args_size
85d9c13c
TS
3772 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3773 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3774 return get_frame_size () + saved_regs_size + sp_offset;
7310a2da
SSF
3775 else
3776 gcc_unreachable ();
85d9c13c
TS
3777}
3778
3779rtx
586de218 3780spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
85d9c13c 3781{
ef4bddc2 3782 machine_mode mode = TYPE_MODE (type);
85d9c13c
TS
3783 int byte_size = ((mode == BLKmode)
3784 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3785
3786 /* Make sure small structs are left justified in a register. */
3787 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3788 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3789 {
ef4bddc2 3790 machine_mode smode;
85d9c13c
TS
3791 rtvec v;
3792 int i;
3793 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3794 int n = byte_size / UNITS_PER_WORD;
3795 v = rtvec_alloc (nregs);
3796 for (i = 0; i < n; i++)
3797 {
3798 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3799 gen_rtx_REG (TImode,
3800 FIRST_RETURN_REGNUM
3801 + i),
3802 GEN_INT (UNITS_PER_WORD * i));
3803 byte_size -= UNITS_PER_WORD;
3804 }
3805
3806 if (n < nregs)
3807 {
3808 if (byte_size < 4)
3809 byte_size = 4;
3810 smode =
3811 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3812 RTVEC_ELT (v, n) =
3813 gen_rtx_EXPR_LIST (VOIDmode,
3814 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3815 GEN_INT (UNITS_PER_WORD * n));
3816 }
3817 return gen_rtx_PARALLEL (mode, v);
3818 }
3819 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3820}
3821
925ed112 3822static rtx
d5cc9181 3823spu_function_arg (cumulative_args_t cum_v,
ef4bddc2 3824 machine_mode mode,
925ed112 3825 const_tree type, bool named ATTRIBUTE_UNUSED)
85d9c13c 3826{
d5cc9181 3827 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
85d9c13c
TS
3828 int byte_size;
3829
0aa88287 3830 if (*cum >= MAX_REGISTER_ARGS)
85d9c13c
TS
3831 return 0;
3832
3833 byte_size = ((mode == BLKmode)
3834 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3835
3836 /* The ABI does not allow parameters to be passed partially in
3837 reg and partially in stack. */
0aa88287 3838 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
85d9c13c
TS
3839 return 0;
3840
3841 /* Make sure small structs are left justified in a register. */
3842 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3843 && byte_size < UNITS_PER_WORD && byte_size > 0)
3844 {
ef4bddc2 3845 machine_mode smode;
85d9c13c
TS
3846 rtx gr_reg;
3847 if (byte_size < 4)
3848 byte_size = 4;
3849 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3850 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
0aa88287 3851 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
85d9c13c
TS
3852 const0_rtx);
3853 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3854 }
3855 else
0aa88287 3856 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
85d9c13c
TS
3857}
3858
925ed112 3859static void
ef4bddc2 3860spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
925ed112
NF
3861 const_tree type, bool named ATTRIBUTE_UNUSED)
3862{
d5cc9181
JR
3863 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3864
925ed112
NF
3865 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3866 ? 1
3867 : mode == BLKmode
3868 ? ((int_size_in_bytes (type) + 15) / 16)
3869 : mode == VOIDmode
3870 ? 1
3871 : HARD_REGNO_NREGS (cum, mode));
3872}
3873
85d9c13c
TS
3874/* Variable sized types are passed by reference. */
3875static bool
d5cc9181 3876spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
ef4bddc2 3877 machine_mode mode ATTRIBUTE_UNUSED,
586de218 3878 const_tree type, bool named ATTRIBUTE_UNUSED)
85d9c13c
TS
3879{
3880 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3881}
3882\f
3883
3884/* Var args. */
3885
3886/* Create and return the va_list datatype.
3887
3888 On SPU, va_list is an array type equivalent to
3889
3890 typedef struct __va_list_tag
3891 {
3892 void *__args __attribute__((__aligned(16)));
3893 void *__skip __attribute__((__aligned(16)));
3894
3895 } va_list[1];
3896
2f8e468b 3897 where __args points to the arg that will be returned by the next
85d9c13c
TS
3898 va_arg(), and __skip points to the previous stack frame such that
3899 when __args == __skip we should advance __args by 32 bytes. */
3900static tree
3901spu_build_builtin_va_list (void)
3902{
3903 tree f_args, f_skip, record, type_decl;
3904 bool owp;
3905
3906 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3907
3908 type_decl =
4c4bde29
AH
3909 build_decl (BUILTINS_LOCATION,
3910 TYPE_DECL, get_identifier ("__va_list_tag"), record);
85d9c13c 3911
4c4bde29
AH
3912 f_args = build_decl (BUILTINS_LOCATION,
3913 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3914 f_skip = build_decl (BUILTINS_LOCATION,
3915 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
85d9c13c
TS
3916
3917 DECL_FIELD_CONTEXT (f_args) = record;
3918 DECL_ALIGN (f_args) = 128;
3919 DECL_USER_ALIGN (f_args) = 1;
3920
3921 DECL_FIELD_CONTEXT (f_skip) = record;
3922 DECL_ALIGN (f_skip) = 128;
3923 DECL_USER_ALIGN (f_skip) = 1;
3924
0fd2eac2 3925 TYPE_STUB_DECL (record) = type_decl;
85d9c13c
TS
3926 TYPE_NAME (record) = type_decl;
3927 TYPE_FIELDS (record) = f_args;
910ad8de 3928 DECL_CHAIN (f_args) = f_skip;
85d9c13c
TS
3929
3930 /* We know this is being padded and we want it too. It is an internal
3931 type so hide the warnings from the user. */
3932 owp = warn_padded;
3933 warn_padded = false;
3934
3935 layout_type (record);
3936
3937 warn_padded = owp;
3938
3939 /* The correct type is an array type of one element. */
3940 return build_array_type (record, build_index_type (size_zero_node));
3941}
3942
3943/* Implement va_start by filling the va_list structure VALIST.
3944 NEXTARG points to the first anonymous stack argument.
3945
3946 The following global variables are used to initialize
3947 the va_list structure:
3948
38173d38 3949 crtl->args.info;
85d9c13c
TS
3950 the CUMULATIVE_ARGS for this function
3951
38173d38 3952 crtl->args.arg_offset_rtx:
85d9c13c
TS
3953 holds the offset of the first anonymous stack argument
3954 (relative to the virtual arg pointer). */
3955
d7bd8aeb 3956static void
85d9c13c
TS
3957spu_va_start (tree valist, rtx nextarg)
3958{
3959 tree f_args, f_skip;
3960 tree args, skip, t;
3961
3962 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
910ad8de 3963 f_skip = DECL_CHAIN (f_args);
85d9c13c 3964
86710a8b 3965 valist = build_simple_mem_ref (valist);
85d9c13c
TS
3966 args =
3967 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3968 skip =
3969 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3970
3971 /* Find the __args area. */
3972 t = make_tree (TREE_TYPE (args), nextarg);
38173d38 3973 if (crtl->args.pretend_args_size > 0)
5d49b6a7 3974 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
726a989a 3975 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
85d9c13c
TS
3976 TREE_SIDE_EFFECTS (t) = 1;
3977 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3978
3979 /* Find the __skip area. */
3980 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
5d49b6a7
RG
3981 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3982 - STACK_POINTER_OFFSET));
726a989a 3983 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
85d9c13c
TS
3984 TREE_SIDE_EFFECTS (t) = 1;
3985 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3986}
3987
3988/* Gimplify va_arg by updating the va_list structure
3989 VALIST as required to retrieve an argument of type
3990 TYPE, and returning that argument.
3991
3992 ret = va_arg(VALIST, TYPE);
3993
3994 generates code equivalent to:
3995
3996 paddedsize = (sizeof(TYPE) + 15) & -16;
3997 if (VALIST.__args + paddedsize > VALIST.__skip
3998 && VALIST.__args <= VALIST.__skip)
3999 addr = VALIST.__skip + 32;
4000 else
4001 addr = VALIST.__args;
4002 VALIST.__args = addr + paddedsize;
4003 ret = *(TYPE *)addr;
4004 */
4005static tree
726a989a
RB
4006spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4007 gimple_seq * post_p ATTRIBUTE_UNUSED)
85d9c13c
TS
4008{
4009 tree f_args, f_skip;
4010 tree args, skip;
4011 HOST_WIDE_INT size, rsize;
5d49b6a7 4012 tree addr, tmp;
85d9c13c
TS
4013 bool pass_by_reference_p;
4014
4015 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
910ad8de 4016 f_skip = DECL_CHAIN (f_args);
85d9c13c 4017
85d9c13c
TS
4018 args =
4019 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4020 skip =
4021 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4022
4023 addr = create_tmp_var (ptr_type_node, "va_arg");
85d9c13c
TS
4024
4025 /* if an object is dynamically sized, a pointer to it is passed
4026 instead of the object itself. */
a207915a
UW
4027 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4028 false);
85d9c13c
TS
4029 if (pass_by_reference_p)
4030 type = build_pointer_type (type);
4031 size = int_size_in_bytes (type);
4032 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4033
4034 /* build conditional expression to calculate addr. The expression
4035 will be gimplified later. */
5d49b6a7 4036 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
85d9c13c 4037 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
726a989a
RB
4038 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4039 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4040 unshare_expr (skip)));
85d9c13c
TS
4041
4042 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
5d49b6a7
RG
4043 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4044 unshare_expr (args));
85d9c13c 4045
726a989a 4046 gimplify_assign (addr, tmp, pre_p);
85d9c13c
TS
4047
4048 /* update VALIST.__args */
5d49b6a7 4049 tmp = fold_build_pointer_plus_hwi (addr, rsize);
726a989a 4050 gimplify_assign (unshare_expr (args), tmp, pre_p);
85d9c13c 4051
5b21f0f3
RG
4052 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4053 addr);
85d9c13c
TS
4054
4055 if (pass_by_reference_p)
4056 addr = build_va_arg_indirect_ref (addr);
4057
4058 return build_va_arg_indirect_ref (addr);
4059}
4060
4061/* Save parameter registers starting with the register that corresponds
4062 to the first unnamed parameters. If the first unnamed parameter is
4063 in the stack then save no registers. Set pretend_args_size to the
4064 amount of space needed to save the registers. */
d5cc9181 4065static void
ef4bddc2 4066spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
85d9c13c
TS
4067 tree type, int *pretend_size, int no_rtl)
4068{
4069 if (!no_rtl)
4070 {
4071 rtx tmp;
4072 int regno;
4073 int offset;
d5cc9181 4074 int ncum = *get_cumulative_args (cum);
85d9c13c
TS
4075
4076 /* cum currently points to the last named argument, we want to
4077 start at the next argument. */
d5cc9181 4078 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
85d9c13c
TS
4079
4080 offset = -STACK_POINTER_OFFSET;
4081 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4082 {
4083 tmp = gen_frame_mem (V4SImode,
0a81f074 4084 plus_constant (Pmode, virtual_incoming_args_rtx,
85d9c13c
TS
4085 offset));
4086 emit_move_insn (tmp,
4087 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4088 offset += 16;
4089 }
4090 *pretend_size = offset + STACK_POINTER_OFFSET;
4091 }
4092}
4093\f
5efd84c5 4094static void
85d9c13c
TS
4095spu_conditional_register_usage (void)
4096{
4097 if (flag_pic)
4098 {
4099 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4100 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4101 }
85d9c13c
TS
4102}
4103
eec9405e
TS
4104/* This is called any time we inspect the alignment of a register for
4105 addresses. */
85d9c13c 4106static int
eec9405e 4107reg_aligned_for_addr (rtx x)
85d9c13c 4108{
eec9405e
TS
4109 int regno =
4110 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4111 return REGNO_POINTER_ALIGN (regno) >= 128;
85d9c13c
TS
4112}
4113
3d9cd79a
UW
4114/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4115 into its SYMBOL_REF_FLAGS. */
4116static void
4117spu_encode_section_info (tree decl, rtx rtl, int first)
4118{
4119 default_encode_section_info (decl, rtl, first);
4120
4121 /* If a variable has a forced alignment to < 16 bytes, mark it with
4122 SYMBOL_FLAG_ALIGN1. */
4123 if (TREE_CODE (decl) == VAR_DECL
4124 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4125 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4126}
4127
85d9c13c
TS
4128/* Return TRUE if we are certain the mem refers to a complete object
4129 which is both 16-byte aligned and padded to a 16-byte boundary. This
4130 would make it safe to store with a single instruction.
4131 We guarantee the alignment and padding for static objects by aligning
4132 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4133 FIXME: We currently cannot guarantee this for objects on the stack
4134 because assign_parm_setup_stack calls assign_stack_local with the
4135 alignment of the parameter mode and in that case the alignment never
4136 gets adjusted by LOCAL_ALIGNMENT. */
4137static int
4138store_with_one_insn_p (rtx mem)
4139{
ef4bddc2 4140 machine_mode mode = GET_MODE (mem);
85d9c13c 4141 rtx addr = XEXP (mem, 0);
eec9405e 4142 if (mode == BLKmode)
85d9c13c 4143 return 0;
eec9405e
TS
4144 if (GET_MODE_SIZE (mode) >= 16)
4145 return 1;
85d9c13c
TS
4146 /* Only static objects. */
4147 if (GET_CODE (addr) == SYMBOL_REF)
4148 {
4149 /* We use the associated declaration to make sure the access is
2f8e468b 4150 referring to the whole object.
dd5a833e 4151 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
85d9c13c
TS
4152 if it is necessary. Will there be cases where one exists, and
4153 the other does not? Will there be cases where both exist, but
4154 have different types? */
4155 tree decl = MEM_EXPR (mem);
4156 if (decl
4157 && TREE_CODE (decl) == VAR_DECL
4158 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4159 return 1;
4160 decl = SYMBOL_REF_DECL (addr);
4161 if (decl
4162 && TREE_CODE (decl) == VAR_DECL
4163 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4164 return 1;
4165 }
4166 return 0;
4167}
4168
eec9405e
TS
4169/* Return 1 when the address is not valid for a simple load and store as
4170 required by the '_mov*' patterns. We could make this less strict
4171 for loads, but we prefer mem's to look the same so they are more
4172 likely to be merged. */
4173static int
4174address_needs_split (rtx mem)
4175{
4176 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4177 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4178 || !(store_with_one_insn_p (mem)
4179 || mem_is_padded_component_ref (mem))))
4180 return 1;
4181
4182 return 0;
4183}
4184
299456f3
BE
4185static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4186static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4187static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4188
4189/* MEM is known to be an __ea qualified memory access. Emit a call to
4190 fetch the ppu memory to local store, and return its address in local
4191 store. */
4192
4193static void
4194ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4195{
4196 if (is_store)
4197 {
4198 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4199 if (!cache_fetch_dirty)
4200 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4201 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4202 2, ea_addr, EAmode, ndirty, SImode);
4203 }
4204 else
4205 {
4206 if (!cache_fetch)
4207 cache_fetch = init_one_libfunc ("__cache_fetch");
4208 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4209 1, ea_addr, EAmode);
4210 }
4211}
4212
4213/* Like ea_load_store, but do the cache tag comparison and, for stores,
4214 dirty bit marking, inline.
4215
4216 The cache control data structure is an array of
4217
4218 struct __cache_tag_array
4219 {
4220 unsigned int tag_lo[4];
4221 unsigned int tag_hi[4];
4222 void *data_pointer[4];
4223 int reserved[4];
4224 vector unsigned short dirty_bits[4];
4225 } */
4226
4227static void
4228ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4229{
4230 rtx ea_addr_si;
4231 HOST_WIDE_INT v;
4232 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4233 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4234 rtx index_mask = gen_reg_rtx (SImode);
4235 rtx tag_arr = gen_reg_rtx (Pmode);
4236 rtx splat_mask = gen_reg_rtx (TImode);
4237 rtx splat = gen_reg_rtx (V4SImode);
4238 rtx splat_hi = NULL_RTX;
4239 rtx tag_index = gen_reg_rtx (Pmode);
4240 rtx block_off = gen_reg_rtx (SImode);
4241 rtx tag_addr = gen_reg_rtx (Pmode);
4242 rtx tag = gen_reg_rtx (V4SImode);
4243 rtx cache_tag = gen_reg_rtx (V4SImode);
4244 rtx cache_tag_hi = NULL_RTX;
4245 rtx cache_ptrs = gen_reg_rtx (TImode);
4246 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4247 rtx tag_equal = gen_reg_rtx (V4SImode);
4248 rtx tag_equal_hi = NULL_RTX;
4249 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4250 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4251 rtx eq_index = gen_reg_rtx (SImode);
23c39aaa
DM
4252 rtx bcomp, hit_label, hit_ref, cont_label;
4253 rtx_insn *insn;
299456f3
BE
4254
4255 if (spu_ea_model != 32)
4256 {
4257 splat_hi = gen_reg_rtx (V4SImode);
4258 cache_tag_hi = gen_reg_rtx (V4SImode);
4259 tag_equal_hi = gen_reg_rtx (V4SImode);
4260 }
4261
0a81f074 4262 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
299456f3
BE
4263 emit_move_insn (tag_arr, tag_arr_sym);
4264 v = 0x0001020300010203LL;
4265 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4266 ea_addr_si = ea_addr;
4267 if (spu_ea_model != 32)
4268 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4269
4270 /* tag_index = ea_addr & (tag_array_size - 128) */
4271 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4272
4273 /* splat ea_addr to all 4 slots. */
4274 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4275 /* Similarly for high 32 bits of ea_addr. */
4276 if (spu_ea_model != 32)
4277 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4278
4279 /* block_off = ea_addr & 127 */
4280 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4281
4282 /* tag_addr = tag_arr + tag_index */
4283 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4284
4285 /* Read cache tags. */
4286 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4287 if (spu_ea_model != 32)
4288 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
0a81f074
RS
4289 plus_constant (Pmode,
4290 tag_addr, 16)));
299456f3
BE
4291
4292 /* tag = ea_addr & -128 */
4293 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4294
4295 /* Read all four cache data pointers. */
4296 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
0a81f074
RS
4297 plus_constant (Pmode,
4298 tag_addr, 32)));
299456f3
BE
4299
4300 /* Compare tags. */
4301 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4302 if (spu_ea_model != 32)
4303 {
4304 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4305 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4306 }
4307
4308 /* At most one of the tags compare equal, so tag_equal has one
4309 32-bit slot set to all 1's, with the other slots all zero.
4310 gbb picks off low bit from each byte in the 128-bit registers,
4311 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4312 we have a hit. */
4313 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4314 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4315
4316 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4317 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4318
4319 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4320 (rotating eq_index mod 16 bytes). */
4321 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4322 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4323
4324 /* Add block offset to form final data address. */
4325 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4326
4327 /* Check that we did hit. */
4328 hit_label = gen_label_rtx ();
4329 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4330 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
f7df4a84 4331 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
299456f3
BE
4332 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4333 hit_ref, pc_rtx)));
4334 /* Say that this branch is very likely to happen. */
4335 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
e5af9ddd 4336 add_int_reg_note (insn, REG_BR_PROB, v);
299456f3
BE
4337
4338 ea_load_store (mem, is_store, ea_addr, data_addr);
4339 cont_label = gen_label_rtx ();
4340 emit_jump_insn (gen_jump (cont_label));
4341 emit_barrier ();
4342
4343 emit_label (hit_label);
4344
4345 if (is_store)
4346 {
4347 HOST_WIDE_INT v_hi;
4348 rtx dirty_bits = gen_reg_rtx (TImode);
4349 rtx dirty_off = gen_reg_rtx (SImode);
4350 rtx dirty_128 = gen_reg_rtx (TImode);
4351 rtx neg_block_off = gen_reg_rtx (SImode);
4352
4353 /* Set up mask with one dirty bit per byte of the mem we are
4354 writing, starting from top bit. */
4355 v_hi = v = -1;
4356 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4357 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4358 {
4359 v_hi = v;
4360 v = 0;
4361 }
4362 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4363
4364 /* Form index into cache dirty_bits. eq_index is one of
4365 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4366 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4367 offset to each of the four dirty_bits elements. */
4368 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4369
4370 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4371
4372 /* Rotate bit mask to proper bit. */
4373 emit_insn (gen_negsi2 (neg_block_off, block_off));
4374 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4375 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4376
4377 /* Or in the new dirty bits. */
4378 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4379
4380 /* Store. */
4381 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4382 }
4383
4384 emit_label (cont_label);
4385}
4386
4387static rtx
4388expand_ea_mem (rtx mem, bool is_store)
4389{
4390 rtx ea_addr;
4391 rtx data_addr = gen_reg_rtx (Pmode);
4392 rtx new_mem;
4393
4394 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4395 if (optimize_size || optimize == 0)
4396 ea_load_store (mem, is_store, ea_addr, data_addr);
4397 else
4398 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4399
4400 if (ea_alias_set == -1)
4401 ea_alias_set = new_alias_set ();
4402
4403 /* We generate a new MEM RTX to refer to the copy of the data
4404 in the cache. We do not copy memory attributes (except the
4405 alignment) from the original MEM, as they may no longer apply
4406 to the cache copy. */
4407 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4408 set_mem_alias_set (new_mem, ea_alias_set);
4409 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4410
4411 return new_mem;
4412}
4413
85d9c13c 4414int
ef4bddc2 4415spu_expand_mov (rtx * ops, machine_mode mode)
85d9c13c
TS
4416{
4417 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
46fc2305
UW
4418 {
4419 /* Perform the move in the destination SUBREG's inner mode. */
4420 ops[0] = SUBREG_REG (ops[0]);
4421 mode = GET_MODE (ops[0]);
4422 ops[1] = gen_lowpart_common (mode, ops[1]);
4423 gcc_assert (ops[1]);
4424 }
85d9c13c
TS
4425
4426 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4427 {
4428 rtx from = SUBREG_REG (ops[1]);
ef4bddc2 4429 machine_mode imode = int_mode_for_mode (GET_MODE (from));
85d9c13c
TS
4430
4431 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4432 && GET_MODE_CLASS (imode) == MODE_INT
4433 && subreg_lowpart_p (ops[1]));
4434
4435 if (GET_MODE_SIZE (imode) < 4)
4caab5ba
UW
4436 imode = SImode;
4437 if (imode != GET_MODE (from))
4438 from = gen_rtx_SUBREG (imode, from, 0);
85d9c13c
TS
4439
4440 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4441 {
947131ba
RS
4442 enum insn_code icode = convert_optab_handler (trunc_optab,
4443 mode, imode);
85d9c13c
TS
4444 emit_insn (GEN_FCN (icode) (ops[0], from));
4445 }
4446 else
4447 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4448 return 1;
4449 }
4450
4451 /* At least one of the operands needs to be a register. */
4452 if ((reload_in_progress | reload_completed) == 0
4453 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4454 {
4455 rtx temp = force_reg (mode, ops[1]);
4456 emit_move_insn (ops[0], temp);
4457 return 1;
4458 }
4459 if (reload_in_progress || reload_completed)
4460 {
a1c6e4b8
TS
4461 if (CONSTANT_P (ops[1]))
4462 return spu_split_immediate (ops);
85d9c13c
TS
4463 return 0;
4464 }
eec9405e
TS
4465
4466 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4467 extend them. */
4468 if (GET_CODE (ops[1]) == CONST_INT)
85d9c13c 4469 {
eec9405e
TS
4470 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4471 if (val != INTVAL (ops[1]))
85d9c13c 4472 {
eec9405e
TS
4473 emit_move_insn (ops[0], GEN_INT (val));
4474 return 1;
85d9c13c
TS
4475 }
4476 }
eec9405e 4477 if (MEM_P (ops[0]))
299456f3
BE
4478 {
4479 if (MEM_ADDR_SPACE (ops[0]))
4480 ops[0] = expand_ea_mem (ops[0], true);
4481 return spu_split_store (ops);
4482 }
eec9405e 4483 if (MEM_P (ops[1]))
299456f3
BE
4484 {
4485 if (MEM_ADDR_SPACE (ops[1]))
4486 ops[1] = expand_ea_mem (ops[1], false);
4487 return spu_split_load (ops);
4488 }
eec9405e 4489
85d9c13c
TS
4490 return 0;
4491}
4492
eec9405e
TS
4493static void
4494spu_convert_move (rtx dst, rtx src)
85d9c13c 4495{
ef4bddc2
RS
4496 machine_mode mode = GET_MODE (dst);
4497 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
eec9405e
TS
4498 rtx reg;
4499 gcc_assert (GET_MODE (src) == TImode);
4500 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
f7df4a84 4501 emit_insn (gen_rtx_SET (reg,
eec9405e
TS
4502 gen_rtx_TRUNCATE (int_mode,
4503 gen_rtx_LSHIFTRT (TImode, src,
4504 GEN_INT (int_mode == DImode ? 64 : 96)))));
4505 if (int_mode != mode)
4506 {
4507 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4508 emit_move_insn (dst, reg);
4509 }
4510}
85d9c13c 4511
eec9405e
TS
4512/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4513 the address from SRC and SRC+16. Return a REG or CONST_INT that
4514 specifies how many bytes to rotate the loaded registers, plus any
4515 extra from EXTRA_ROTQBY. The address and rotate amounts are
4516 normalized to improve merging of loads and rotate computations. */
4517static rtx
4518spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4519{
4520 rtx addr = XEXP (src, 0);
4521 rtx p0, p1, rot, addr0, addr1;
4522 int rot_amt;
85d9c13c
TS
4523
4524 rot = 0;
4525 rot_amt = 0;
eec9405e
TS
4526
4527 if (MEM_ALIGN (src) >= 128)
4528 /* Address is already aligned; simply perform a TImode load. */ ;
4529 else if (GET_CODE (addr) == PLUS)
85d9c13c
TS
4530 {
4531 /* 8 cases:
4532 aligned reg + aligned reg => lqx
4533 aligned reg + unaligned reg => lqx, rotqby
4534 aligned reg + aligned const => lqd
4535 aligned reg + unaligned const => lqd, rotqbyi
4536 unaligned reg + aligned reg => lqx, rotqby
4537 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4538 unaligned reg + aligned const => lqd, rotqby
4539 unaligned reg + unaligned const -> not allowed by legitimate address
4540 */
4541 p0 = XEXP (addr, 0);
4542 p1 = XEXP (addr, 1);
eec9405e 4543 if (!reg_aligned_for_addr (p0))
85d9c13c 4544 {
eec9405e 4545 if (REG_P (p1) && !reg_aligned_for_addr (p1))
85d9c13c 4546 {
eec9405e
TS
4547 rot = gen_reg_rtx (SImode);
4548 emit_insn (gen_addsi3 (rot, p0, p1));
4549 }
4550 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4551 {
4552 if (INTVAL (p1) > 0
4553 && REG_POINTER (p0)
4554 && INTVAL (p1) * BITS_PER_UNIT
4555 < REGNO_POINTER_ALIGN (REGNO (p0)))
4556 {
4557 rot = gen_reg_rtx (SImode);
4558 emit_insn (gen_addsi3 (rot, p0, p1));
4559 addr = p0;
4560 }
4561 else
4562 {
4563 rtx x = gen_reg_rtx (SImode);
4564 emit_move_insn (x, p1);
4565 if (!spu_arith_operand (p1, SImode))
4566 p1 = x;
4567 rot = gen_reg_rtx (SImode);
4568 emit_insn (gen_addsi3 (rot, p0, p1));
4569 addr = gen_rtx_PLUS (Pmode, p0, x);
4570 }
85d9c13c
TS
4571 }
4572 else
4573 rot = p0;
4574 }
4575 else
4576 {
4577 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4578 {
4579 rot_amt = INTVAL (p1) & 15;
eec9405e
TS
4580 if (INTVAL (p1) & -16)
4581 {
4582 p1 = GEN_INT (INTVAL (p1) & -16);
4583 addr = gen_rtx_PLUS (SImode, p0, p1);
4584 }
4585 else
4586 addr = p0;
85d9c13c 4587 }
eec9405e 4588 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
85d9c13c
TS
4589 rot = p1;
4590 }
4591 }
eec9405e 4592 else if (REG_P (addr))
85d9c13c 4593 {
eec9405e 4594 if (!reg_aligned_for_addr (addr))
85d9c13c
TS
4595 rot = addr;
4596 }
4597 else if (GET_CODE (addr) == CONST)
4598 {
4599 if (GET_CODE (XEXP (addr, 0)) == PLUS
4600 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4601 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4602 {
4603 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4604 if (rot_amt & -16)
4605 addr = gen_rtx_CONST (Pmode,
4606 gen_rtx_PLUS (Pmode,
4607 XEXP (XEXP (addr, 0), 0),
4608 GEN_INT (rot_amt & -16)));
4609 else
4610 addr = XEXP (XEXP (addr, 0), 0);
4611 }
4612 else
eec9405e
TS
4613 {
4614 rot = gen_reg_rtx (Pmode);
4615 emit_move_insn (rot, addr);
4616 }
85d9c13c
TS
4617 }
4618 else if (GET_CODE (addr) == CONST_INT)
4619 {
4620 rot_amt = INTVAL (addr);
4621 addr = GEN_INT (rot_amt & -16);
4622 }
4623 else if (!ALIGNED_SYMBOL_REF_P (addr))
eec9405e
TS
4624 {
4625 rot = gen_reg_rtx (Pmode);
4626 emit_move_insn (rot, addr);
4627 }
85d9c13c 4628
eec9405e 4629 rot_amt += extra_rotby;
85d9c13c
TS
4630
4631 rot_amt &= 15;
4632
4633 if (rot && rot_amt)
4634 {
eec9405e
TS
4635 rtx x = gen_reg_rtx (SImode);
4636 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4637 rot = x;
85d9c13c
TS
4638 rot_amt = 0;
4639 }
eec9405e
TS
4640 if (!rot && rot_amt)
4641 rot = GEN_INT (rot_amt);
4642
4643 addr0 = copy_rtx (addr);
4644 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4645 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4646
4647 if (dst1)
4648 {
0a81f074 4649 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
eec9405e
TS
4650 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4651 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4652 }
85d9c13c 4653
eec9405e
TS
4654 return rot;
4655}
4656
4657int
4658spu_split_load (rtx * ops)
4659{
ef4bddc2 4660 machine_mode mode = GET_MODE (ops[0]);
eec9405e
TS
4661 rtx addr, load, rot;
4662 int rot_amt;
85d9c13c 4663
eec9405e
TS
4664 if (GET_MODE_SIZE (mode) >= 16)
4665 return 0;
85d9c13c 4666
eec9405e
TS
4667 addr = XEXP (ops[1], 0);
4668 gcc_assert (GET_CODE (addr) != AND);
4669
4670 if (!address_needs_split (ops[1]))
4671 {
4672 ops[1] = change_address (ops[1], TImode, addr);
4673 load = gen_reg_rtx (TImode);
4674 emit_insn (gen__movti (load, ops[1]));
4675 spu_convert_move (ops[0], load);
4676 return 1;
4677 }
4678
4679 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4680
4681 load = gen_reg_rtx (TImode);
4682 rot = spu_expand_load (load, 0, ops[1], rot_amt);
85d9c13c
TS
4683
4684 if (rot)
4685 emit_insn (gen_rotqby_ti (load, load, rot));
85d9c13c 4686
eec9405e
TS
4687 spu_convert_move (ops[0], load);
4688 return 1;
85d9c13c
TS
4689}
4690
eec9405e 4691int
85d9c13c
TS
4692spu_split_store (rtx * ops)
4693{
ef4bddc2 4694 machine_mode mode = GET_MODE (ops[0]);
eec9405e 4695 rtx reg;
85d9c13c
TS
4696 rtx addr, p0, p1, p1_lo, smem;
4697 int aform;
4698 int scalar;
4699
eec9405e
TS
4700 if (GET_MODE_SIZE (mode) >= 16)
4701 return 0;
4702
85d9c13c 4703 addr = XEXP (ops[0], 0);
eec9405e
TS
4704 gcc_assert (GET_CODE (addr) != AND);
4705
4706 if (!address_needs_split (ops[0]))
4707 {
4708 reg = gen_reg_rtx (TImode);
4709 emit_insn (gen_spu_convert (reg, ops[1]));
4710 ops[0] = change_address (ops[0], TImode, addr);
4711 emit_move_insn (ops[0], reg);
4712 return 1;
4713 }
85d9c13c
TS
4714
4715 if (GET_CODE (addr) == PLUS)
4716 {
4717 /* 8 cases:
4718 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4719 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4720 aligned reg + aligned const => lqd, c?d, shuf, stqx
4721 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4722 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4723 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4724 unaligned reg + aligned const => lqd, c?d, shuf, stqx
eec9405e 4725 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
85d9c13c
TS
4726 */
4727 aform = 0;
4728 p0 = XEXP (addr, 0);
4729 p1 = p1_lo = XEXP (addr, 1);
eec9405e 4730 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
85d9c13c
TS
4731 {
4732 p1_lo = GEN_INT (INTVAL (p1) & 15);
eec9405e
TS
4733 if (reg_aligned_for_addr (p0))
4734 {
4735 p1 = GEN_INT (INTVAL (p1) & -16);
4736 if (p1 == const0_rtx)
4737 addr = p0;
4738 else
4739 addr = gen_rtx_PLUS (SImode, p0, p1);
4740 }
4741 else
4742 {
4743 rtx x = gen_reg_rtx (SImode);
4744 emit_move_insn (x, p1);
4745 addr = gen_rtx_PLUS (SImode, p0, x);
4746 }
85d9c13c
TS
4747 }
4748 }
eec9405e 4749 else if (REG_P (addr))
85d9c13c
TS
4750 {
4751 aform = 0;
4752 p0 = addr;
4753 p1 = p1_lo = const0_rtx;
4754 }
4755 else
4756 {
4757 aform = 1;
4758 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4759 p1 = 0; /* aform doesn't use p1 */
4760 p1_lo = addr;
4761 if (ALIGNED_SYMBOL_REF_P (addr))
4762 p1_lo = const0_rtx;
eec9405e
TS
4763 else if (GET_CODE (addr) == CONST
4764 && GET_CODE (XEXP (addr, 0)) == PLUS
4765 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4766 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
85d9c13c 4767 {
eec9405e
TS
4768 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4769 if ((v & -16) != 0)
4770 addr = gen_rtx_CONST (Pmode,
4771 gen_rtx_PLUS (Pmode,
4772 XEXP (XEXP (addr, 0), 0),
4773 GEN_INT (v & -16)));
4774 else
4775 addr = XEXP (XEXP (addr, 0), 0);
4776 p1_lo = GEN_INT (v & 15);
85d9c13c
TS
4777 }
4778 else if (GET_CODE (addr) == CONST_INT)
4779 {
4780 p1_lo = GEN_INT (INTVAL (addr) & 15);
4781 addr = GEN_INT (INTVAL (addr) & -16);
4782 }
eec9405e
TS
4783 else
4784 {
4785 p1_lo = gen_reg_rtx (SImode);
4786 emit_move_insn (p1_lo, addr);
4787 }
85d9c13c
TS
4788 }
4789
d707fc77 4790 gcc_assert (aform == 0 || aform == 1);
eec9405e 4791 reg = gen_reg_rtx (TImode);
09aad82b 4792
85d9c13c
TS
4793 scalar = store_with_one_insn_p (ops[0]);
4794 if (!scalar)
4795 {
4796 /* We could copy the flags from the ops[0] MEM to mem here,
4797 We don't because we want this load to be optimized away if
4798 possible, and copying the flags will prevent that in certain
4799 cases, e.g. consider the volatile flag. */
4800
eec9405e 4801 rtx pat = gen_reg_rtx (TImode);
09aad82b
TS
4802 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4803 set_mem_alias_set (lmem, 0);
4804 emit_insn (gen_movti (reg, lmem));
85d9c13c 4805
eec9405e 4806 if (!p0 || reg_aligned_for_addr (p0))
85d9c13c
TS
4807 p0 = stack_pointer_rtx;
4808 if (!p1_lo)
4809 p1_lo = const0_rtx;
4810
4811 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4812 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4813 }
85d9c13c
TS
4814 else
4815 {
4816 if (GET_CODE (ops[1]) == REG)
4817 emit_insn (gen_spu_convert (reg, ops[1]));
4818 else if (GET_CODE (ops[1]) == SUBREG)
4819 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4820 else
4821 abort ();
4822 }
4823
4824 if (GET_MODE_SIZE (mode) < 4 && scalar)
eec9405e
TS
4825 emit_insn (gen_ashlti3
4826 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
85d9c13c 4827
eec9405e 4828 smem = change_address (ops[0], TImode, copy_rtx (addr));
85d9c13c
TS
4829 /* We can't use the previous alias set because the memory has changed
4830 size and can potentially overlap objects of other types. */
4831 set_mem_alias_set (smem, 0);
4832
09aad82b 4833 emit_insn (gen_movti (smem, reg));
eec9405e 4834 return 1;
85d9c13c
TS
4835}
4836
4837/* Return TRUE if X is MEM which is a struct member reference
4838 and the member can safely be loaded and stored with a single
4839 instruction because it is padded. */
4840static int
4841mem_is_padded_component_ref (rtx x)
4842{
4843 tree t = MEM_EXPR (x);
4844 tree r;
4845 if (!t || TREE_CODE (t) != COMPONENT_REF)
4846 return 0;
4847 t = TREE_OPERAND (t, 1);
4848 if (!t || TREE_CODE (t) != FIELD_DECL
4849 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4850 return 0;
4851 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4852 r = DECL_FIELD_CONTEXT (t);
4853 if (!r || TREE_CODE (r) != RECORD_TYPE)
4854 return 0;
4855 /* Make sure they are the same mode */
4856 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4857 return 0;
4858 /* If there are no following fields then the field alignment assures
2f8e468b
KH
4859 the structure is padded to the alignment which means this field is
4860 padded too. */
85d9c13c
TS
4861 if (TREE_CHAIN (t) == 0)
4862 return 1;
4863 /* If the following field is also aligned then this field will be
4864 padded. */
4865 t = TREE_CHAIN (t);
4866 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4867 return 1;
4868 return 0;
4869}
4870
32fb22af
SL
4871/* Parse the -mfixed-range= option string. */
4872static void
4873fix_range (const char *const_str)
4874{
4875 int i, first, last;
4876 char *str, *dash, *comma;
4877
4878 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4879 REG2 are either register names or register numbers. The effect
4880 of this option is to mark the registers in the range from REG1 to
4881 REG2 as ``fixed'' so they won't be used by the compiler. */
4882
4883 i = strlen (const_str);
4884 str = (char *) alloca (i + 1);
4885 memcpy (str, const_str, i + 1);
4886
4887 while (1)
4888 {
4889 dash = strchr (str, '-');
4890 if (!dash)
4891 {
4892 warning (0, "value of -mfixed-range must have form REG1-REG2");
4893 return;
4894 }
4895 *dash = '\0';
4896 comma = strchr (dash + 1, ',');
4897 if (comma)
4898 *comma = '\0';
4899
4900 first = decode_reg_name (str);
4901 if (first < 0)
4902 {
4903 warning (0, "unknown register name: %s", str);
4904 return;
4905 }
4906
4907 last = decode_reg_name (dash + 1);
4908 if (last < 0)
4909 {
4910 warning (0, "unknown register name: %s", dash + 1);
4911 return;
4912 }
4913
4914 *dash = '-';
4915
4916 if (first > last)
4917 {
4918 warning (0, "%s-%s is an empty range", str, dash + 1);
4919 return;
4920 }
4921
4922 for (i = first; i <= last; ++i)
4923 fixed_regs[i] = call_used_regs[i] = 1;
4924
4925 if (!comma)
4926 break;
4927
4928 *comma = ',';
4929 str = comma + 1;
4930 }
4931}
4932
85d9c13c
TS
4933/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4934 can be generated using the fsmbi instruction. */
4935int
4936fsmbi_const_p (rtx x)
4937{
a1c6e4b8
TS
4938 if (CONSTANT_P (x))
4939 {
73701e27 4940 /* We can always choose TImode for CONST_INT because the high bits
a1c6e4b8 4941 of an SImode will always be all 1s, i.e., valid for fsmbi. */
73701e27 4942 enum immediate_class c = classify_immediate (x, TImode);
6fb5fa3c 4943 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
a1c6e4b8
TS
4944 }
4945 return 0;
4946}
4947
4948/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4949 can be generated using the cbd, chd, cwd or cdd instruction. */
4950int
ef4bddc2 4951cpat_const_p (rtx x, machine_mode mode)
a1c6e4b8
TS
4952{
4953 if (CONSTANT_P (x))
4954 {
4955 enum immediate_class c = classify_immediate (x, mode);
4956 return c == IC_CPAT;
4957 }
4958 return 0;
4959}
85d9c13c 4960
a1c6e4b8
TS
4961rtx
4962gen_cpat_const (rtx * ops)
4963{
4964 unsigned char dst[16];
4965 int i, offset, shift, isize;
4966 if (GET_CODE (ops[3]) != CONST_INT
4967 || GET_CODE (ops[2]) != CONST_INT
4968 || (GET_CODE (ops[1]) != CONST_INT
4969 && GET_CODE (ops[1]) != REG))
4970 return 0;
4971 if (GET_CODE (ops[1]) == REG
4972 && (!REG_POINTER (ops[1])
4973 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4974 return 0;
85d9c13c
TS
4975
4976 for (i = 0; i < 16; i++)
a1c6e4b8
TS
4977 dst[i] = i + 16;
4978 isize = INTVAL (ops[3]);
4979 if (isize == 1)
4980 shift = 3;
4981 else if (isize == 2)
4982 shift = 2;
4983 else
4984 shift = 0;
4985 offset = (INTVAL (ops[2]) +
4986 (GET_CODE (ops[1]) ==
4987 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4988 for (i = 0; i < isize; i++)
4989 dst[offset + i] = i + shift;
4990 return array_to_constant (TImode, dst);
85d9c13c
TS
4991}
4992
4993/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4994 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4995 than 16 bytes, the value is repeated across the rest of the array. */
4996void
ef4bddc2 4997constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
85d9c13c
TS
4998{
4999 HOST_WIDE_INT val;
5000 int i, j, first;
5001
5002 memset (arr, 0, 16);
5003 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5004 if (GET_CODE (x) == CONST_INT
5005 || (GET_CODE (x) == CONST_DOUBLE
5006 && (mode == SFmode || mode == DFmode)))
5007 {
5008 gcc_assert (mode != VOIDmode && mode != BLKmode);
5009
5010 if (GET_CODE (x) == CONST_DOUBLE)
5011 val = const_double_to_hwint (x);
5012 else
5013 val = INTVAL (x);
5014 first = GET_MODE_SIZE (mode) - 1;
5015 for (i = first; i >= 0; i--)
5016 {
5017 arr[i] = val & 0xff;
5018 val >>= 8;
5019 }
5020 /* Splat the constant across the whole array. */
5021 for (j = 0, i = first + 1; i < 16; i++)
5022 {
5023 arr[i] = arr[j];
5024 j = (j == first) ? 0 : j + 1;
5025 }
5026 }
5027 else if (GET_CODE (x) == CONST_DOUBLE)
5028 {
5029 val = CONST_DOUBLE_LOW (x);
5030 for (i = 15; i >= 8; i--)
5031 {
5032 arr[i] = val & 0xff;
5033 val >>= 8;
5034 }
5035 val = CONST_DOUBLE_HIGH (x);
5036 for (i = 7; i >= 0; i--)
5037 {
5038 arr[i] = val & 0xff;
5039 val >>= 8;
5040 }
5041 }
5042 else if (GET_CODE (x) == CONST_VECTOR)
5043 {
5044 int units;
5045 rtx elt;
5046 mode = GET_MODE_INNER (mode);
5047 units = CONST_VECTOR_NUNITS (x);
5048 for (i = 0; i < units; i++)
5049 {
5050 elt = CONST_VECTOR_ELT (x, i);
5051 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5052 {
5053 if (GET_CODE (elt) == CONST_DOUBLE)
5054 val = const_double_to_hwint (elt);
5055 else
5056 val = INTVAL (elt);
5057 first = GET_MODE_SIZE (mode) - 1;
5058 if (first + i * GET_MODE_SIZE (mode) > 16)
5059 abort ();
5060 for (j = first; j >= 0; j--)
5061 {
5062 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5063 val >>= 8;
5064 }
5065 }
5066 }
5067 }
5068 else
5069 gcc_unreachable();
5070}
5071
5072/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5073 smaller than 16 bytes, use the bytes that would represent that value
5074 in a register, e.g., for QImode return the value of arr[3]. */
5075rtx
ef4bddc2 5076array_to_constant (machine_mode mode, const unsigned char arr[16])
85d9c13c 5077{
ef4bddc2 5078 machine_mode inner_mode;
85d9c13c
TS
5079 rtvec v;
5080 int units, size, i, j, k;
5081 HOST_WIDE_INT val;
5082
5083 if (GET_MODE_CLASS (mode) == MODE_INT
5084 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5085 {
5086 j = GET_MODE_SIZE (mode);
5087 i = j < 4 ? 4 - j : 0;
5088 for (val = 0; i < j; i++)
5089 val = (val << 8) | arr[i];
5090 val = trunc_int_for_mode (val, mode);
5091 return GEN_INT (val);
5092 }
5093
5094 if (mode == TImode)
5095 {
5096 HOST_WIDE_INT high;
5097 for (i = high = 0; i < 8; i++)
5098 high = (high << 8) | arr[i];
5099 for (i = 8, val = 0; i < 16; i++)
5100 val = (val << 8) | arr[i];
5101 return immed_double_const (val, high, TImode);
5102 }
5103 if (mode == SFmode)
5104 {
5105 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5106 val = trunc_int_for_mode (val, SImode);
9dc5f9ba 5107 return hwint_to_const_double (SFmode, val);
85d9c13c
TS
5108 }
5109 if (mode == DFmode)
5110 {
e41e2ab4
UW
5111 for (i = 0, val = 0; i < 8; i++)
5112 val = (val << 8) | arr[i];
9dc5f9ba 5113 return hwint_to_const_double (DFmode, val);
85d9c13c
TS
5114 }
5115
5116 if (!VECTOR_MODE_P (mode))
5117 abort ();
5118
5119 units = GET_MODE_NUNITS (mode);
5120 size = GET_MODE_UNIT_SIZE (mode);
5121 inner_mode = GET_MODE_INNER (mode);
5122 v = rtvec_alloc (units);
5123
5124 for (k = i = 0; i < units; ++i)
5125 {
5126 val = 0;
5127 for (j = 0; j < size; j++, k++)
5128 val = (val << 8) | arr[k];
5129
5130 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5131 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5132 else
5133 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5134 }
5135 if (k > 16)
5136 abort ();
5137
5138 return gen_rtx_CONST_VECTOR (mode, v);
5139}
5140
5141static void
5142reloc_diagnostic (rtx x)
5143{
c5d75364 5144 tree decl = 0;
85d9c13c
TS
5145 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5146 return;
5147
5148 if (GET_CODE (x) == SYMBOL_REF)
5149 decl = SYMBOL_REF_DECL (x);
5150 else if (GET_CODE (x) == CONST
5151 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5152 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5153
5154 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5155 if (decl && !DECL_P (decl))
5156 decl = 0;
5157
85d9c13c
TS
5158 /* The decl could be a string constant. */
5159 if (decl && DECL_P (decl))
c5d75364
MLI
5160 {
5161 location_t loc;
5162 /* We use last_assemble_variable_decl to get line information. It's
5163 not always going to be right and might not even be close, but will
5164 be right for the more common cases. */
5165 if (!last_assemble_variable_decl || in_section == ctors_section)
5166 loc = DECL_SOURCE_LOCATION (decl);
5167 else
5168 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
85d9c13c 5169
c5d75364
MLI
5170 if (TARGET_WARN_RELOC)
5171 warning_at (loc, 0,
5172 "creating run-time relocation for %qD", decl);
5173 else
5174 error_at (loc,
5175 "creating run-time relocation for %qD", decl);
5176 }
5177 else
5178 {
5179 if (TARGET_WARN_RELOC)
5180 warning_at (input_location, 0, "creating run-time relocation");
5181 else
5182 error_at (input_location, "creating run-time relocation");
5183 }
85d9c13c
TS
5184}
5185
5186/* Hook into assemble_integer so we can generate an error for run-time
5187 relocations. The SPU ABI disallows them. */
5188static bool
5189spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5190{
5191 /* By default run-time relocations aren't supported, but we allow them
5192 in case users support it in their own run-time loader. And we provide
5193 a warning for those users that don't. */
5194 if ((GET_CODE (x) == SYMBOL_REF)
5195 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5196 reloc_diagnostic (x);
5197
5198 return default_assemble_integer (x, size, aligned_p);
5199}
5200
5201static void
5202spu_asm_globalize_label (FILE * file, const char *name)
5203{
5204 fputs ("\t.global\t", file);
5205 assemble_name (file, name);
5206 fputs ("\n", file);
5207}
5208
5209static bool
e548c9df 5210spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
68f932c4 5211 int opno ATTRIBUTE_UNUSED, int *total,
f40751dd 5212 bool speed ATTRIBUTE_UNUSED)
85d9c13c 5213{
e548c9df 5214 int code = GET_CODE (x);
85d9c13c
TS
5215 int cost = COSTS_N_INSNS (2);
5216
5217 /* Folding to a CONST_VECTOR will use extra space but there might
5218 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9fc4da9d 5219 only if it allows us to fold away multiple insns. Changing the cost
85d9c13c
TS
5220 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5221 because this cost will only be compared against a single insn.
5222 if (code == CONST_VECTOR)
1a627b35 5223 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
85d9c13c
TS
5224 */
5225
5226 /* Use defaults for float operations. Not accurate but good enough. */
5227 if (mode == DFmode)
5228 {
5229 *total = COSTS_N_INSNS (13);
5230 return true;
5231 }
5232 if (mode == SFmode)
5233 {
5234 *total = COSTS_N_INSNS (6);
5235 return true;
5236 }
5237 switch (code)
5238 {
5239 case CONST_INT:
5240 if (satisfies_constraint_K (x))
5241 *total = 0;
5242 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5243 *total = COSTS_N_INSNS (1);
5244 else
5245 *total = COSTS_N_INSNS (3);
5246 return true;
5247
5248 case CONST:
5249 *total = COSTS_N_INSNS (3);
5250 return true;
5251
5252 case LABEL_REF:
5253 case SYMBOL_REF:
5254 *total = COSTS_N_INSNS (0);
5255 return true;
5256
5257 case CONST_DOUBLE:
5258 *total = COSTS_N_INSNS (5);
5259 return true;
5260
5261 case FLOAT_EXTEND:
5262 case FLOAT_TRUNCATE:
5263 case FLOAT:
5264 case UNSIGNED_FLOAT:
5265 case FIX:
5266 case UNSIGNED_FIX:
5267 *total = COSTS_N_INSNS (7);
5268 return true;
5269
5270 case PLUS:
5271 if (mode == TImode)
5272 {
5273 *total = COSTS_N_INSNS (9);
5274 return true;
5275 }
5276 break;
5277
5278 case MULT:
5279 cost =
5280 GET_CODE (XEXP (x, 0)) ==
5281 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5282 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5283 {
5284 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5285 {
5286 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5287 cost = COSTS_N_INSNS (14);
5288 if ((val & 0xffff) == 0)
5289 cost = COSTS_N_INSNS (9);
5290 else if (val > 0 && val < 0x10000)
5291 cost = COSTS_N_INSNS (11);
5292 }
5293 }
5294 *total = cost;
5295 return true;
5296 case DIV:
5297 case UDIV:
5298 case MOD:
5299 case UMOD:
5300 *total = COSTS_N_INSNS (20);
5301 return true;
5302 case ROTATE:
5303 case ROTATERT:
5304 case ASHIFT:
5305 case ASHIFTRT:
5306 case LSHIFTRT:
5307 *total = COSTS_N_INSNS (4);
5308 return true;
5309 case UNSPEC:
5310 if (XINT (x, 1) == UNSPEC_CONVERT)
5311 *total = COSTS_N_INSNS (0);
5312 else
5313 *total = COSTS_N_INSNS (4);
5314 return true;
5315 }
5316 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5317 if (GET_MODE_CLASS (mode) == MODE_INT
5318 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5319 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5320 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5321 *total = cost;
5322 return true;
5323}
5324
ef4bddc2 5325static machine_mode
7b0518e3 5326spu_unwind_word_mode (void)
85d9c13c 5327{
7b0518e3 5328 return SImode;
85d9c13c
TS
5329}
5330
5331/* Decide whether we can make a sibling call to a function. DECL is the
5332 declaration of the function being targeted by the call and EXP is the
5333 CALL_EXPR representing the call. */
5334static bool
5335spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5336{
5337 return decl && !TARGET_LARGE_MEM;
5338}
5339
5340/* We need to correctly update the back chain pointer and the Available
5341 Stack Size (which is in the second slot of the sp register.) */
5342void
5343spu_allocate_stack (rtx op0, rtx op1)
5344{
5345 HOST_WIDE_INT v;
5346 rtx chain = gen_reg_rtx (V4SImode);
5347 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5348 rtx sp = gen_reg_rtx (V4SImode);
5349 rtx splatted = gen_reg_rtx (V4SImode);
5350 rtx pat = gen_reg_rtx (TImode);
5351
5352 /* copy the back chain so we can save it back again. */
5353 emit_move_insn (chain, stack_bot);
5354
5355 op1 = force_reg (SImode, op1);
5356
5357 v = 0x1020300010203ll;
5358 emit_move_insn (pat, immed_double_const (v, v, TImode));
5359 emit_insn (gen_shufb (splatted, op1, op1, pat));
5360
5361 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5362 emit_insn (gen_subv4si3 (sp, sp, splatted));
5363
5364 if (flag_stack_check)
5365 {
5366 rtx avail = gen_reg_rtx(SImode);
5367 rtx result = gen_reg_rtx(SImode);
5368 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5369 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5370 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5371 }
5372
5373 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5374
5375 emit_move_insn (stack_bot, chain);
5376
5377 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5378}
5379
5380void
5381spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5382{
5383 static unsigned char arr[16] =
5384 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5385 rtx temp = gen_reg_rtx (SImode);
5386 rtx temp2 = gen_reg_rtx (SImode);
5387 rtx temp3 = gen_reg_rtx (V4SImode);
5388 rtx temp4 = gen_reg_rtx (V4SImode);
5389 rtx pat = gen_reg_rtx (TImode);
5390 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5391
5392 /* Restore the backchain from the first word, sp from the second. */
5393 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5394 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5395
5396 emit_move_insn (pat, array_to_constant (TImode, arr));
5397
5398 /* Compute Available Stack Size for sp */
5399 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5400 emit_insn (gen_shufb (temp3, temp, temp, pat));
5401
5402 /* Compute Available Stack Size for back chain */
5403 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5404 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5405 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5406
5407 emit_insn (gen_addv4si3 (sp, sp, temp3));
5408 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5409}
5410
5411static void
5412spu_init_libfuncs (void)
5413{
5414 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5415 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5416 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5417 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5418 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5419 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5420 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5421 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5422 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4dfe3ad5 5423 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
85d9c13c
TS
5424 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5425 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5426
5427 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5428 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
9bf85028 5429
b46ae6da
UW
5430 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5431 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5432 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5433 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5434 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5435 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5436 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5437 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5438 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5439 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5440 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5441 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5442
9bf85028
TS
5443 set_optab_libfunc (smul_optab, TImode, "__multi3");
5444 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5445 set_optab_libfunc (smod_optab, TImode, "__modti3");
5446 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5447 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5448 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
85d9c13c
TS
5449}
5450
5451/* Make a subreg, stripping any existing subreg. We could possibly just
5452 call simplify_subreg, but in this case we know what we want. */
5453rtx
ef4bddc2 5454spu_gen_subreg (machine_mode mode, rtx x)
85d9c13c
TS
5455{
5456 if (GET_CODE (x) == SUBREG)
5457 x = SUBREG_REG (x);
5458 if (GET_MODE (x) == mode)
5459 return x;
5460 return gen_rtx_SUBREG (mode, x, 0);
5461}
5462
5463static bool
586de218 5464spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
85d9c13c
TS
5465{
5466 return (TYPE_MODE (type) == BLKmode
5467 && ((type) == 0
5468 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5469 || int_size_in_bytes (type) >
5470 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5471}
5472\f
5473/* Create the built-in types and functions */
5474
4a3a2376
UW
5475enum spu_function_code
5476{
5477#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5478#include "spu-builtins.def"
5479#undef DEF_BUILTIN
5480 NUM_SPU_BUILTINS
5481};
5482
5483extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5484
85d9c13c
TS
5485struct spu_builtin_description spu_builtins[] = {
5486#define DEF_BUILTIN(fcode, icode, name, type, params) \
8dc9f5bd 5487 {fcode, icode, name, type, params},
85d9c13c
TS
5488#include "spu-builtins.def"
5489#undef DEF_BUILTIN
5490};
5491
8dc9f5bd
UW
5492static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5493
5494/* Returns the spu builtin decl for CODE. */
2c93399f
AP
5495
5496static tree
5497spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5498{
5499 if (code >= NUM_SPU_BUILTINS)
5500 return error_mark_node;
5501
8dc9f5bd 5502 return spu_builtin_decls[code];
2c93399f
AP
5503}
5504
5505
85d9c13c
TS
5506static void
5507spu_init_builtins (void)
5508{
5509 struct spu_builtin_description *d;
5510 unsigned int i;
5511
5512 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5513 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5514 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5515 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5516 V4SF_type_node = build_vector_type (float_type_node, 4);
5517 V2DF_type_node = build_vector_type (double_type_node, 2);
5518
5519 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5520 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5521 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5522 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5523
60c9cf8d 5524 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
85d9c13c
TS
5525
5526 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5527 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5528 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5529 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5530 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5531 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5532 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5533 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5534 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5535 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5536 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5537 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5538
5539 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5540 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5541 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5542 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5543 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5544 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5545 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5546 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5547
5548 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5549 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5550
5551 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5552
5553 spu_builtin_types[SPU_BTI_PTR] =
5554 build_pointer_type (build_qualified_type
5555 (void_type_node,
5556 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5557
5558 /* For each builtin we build a new prototype. The tree code will make
5559 sure nodes are shared. */
5560 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5561 {
5562 tree p;
5563 char name[64]; /* build_function will make a copy. */
5564 int parm;
5565
5566 if (d->name == 0)
5567 continue;
5568
e47f8bba 5569 /* Find last parm. */
85d9c13c 5570 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
e47f8bba 5571 ;
85d9c13c
TS
5572
5573 p = void_list_node;
5574 while (parm > 1)
5575 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5576
5577 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5578
5579 sprintf (name, "__builtin_%s", d->name);
8dc9f5bd 5580 spu_builtin_decls[i] =
fec6e65b 5581 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
bbea461b 5582 if (d->fcode == SPU_MASK_FOR_LOAD)
8dc9f5bd 5583 TREE_READONLY (spu_builtin_decls[i]) = 1;
e47f8bba
BE
5584
5585 /* These builtins don't throw. */
8dc9f5bd 5586 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
85d9c13c
TS
5587 }
5588}
5589
e1f1d97f
SL
5590void
5591spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5592{
5593 static unsigned char arr[16] =
5594 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5595
5596 rtx temp = gen_reg_rtx (Pmode);
5597 rtx temp2 = gen_reg_rtx (V4SImode);
5598 rtx temp3 = gen_reg_rtx (V4SImode);
5599 rtx pat = gen_reg_rtx (TImode);
5600 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5601
5602 emit_move_insn (pat, array_to_constant (TImode, arr));
5603
5604 /* Restore the sp. */
5605 emit_move_insn (temp, op1);
5606 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5607
5608 /* Compute available stack size for sp. */
5609 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5610 emit_insn (gen_shufb (temp3, temp, temp, pat));
5611
5612 emit_insn (gen_addv4si3 (sp, sp, temp3));
5613 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5614}
5615
85d9c13c
TS
5616int
5617spu_safe_dma (HOST_WIDE_INT channel)
5618{
4230d0fe 5619 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
85d9c13c
TS
5620}
5621
5622void
5623spu_builtin_splats (rtx ops[])
5624{
ef4bddc2 5625 machine_mode mode = GET_MODE (ops[0]);
85d9c13c
TS
5626 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5627 {
5628 unsigned char arr[16];
5629 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5630 emit_move_insn (ops[0], array_to_constant (mode, arr));
5631 }
85d9c13c
TS
5632 else
5633 {
5634 rtx reg = gen_reg_rtx (TImode);
5635 rtx shuf;
5636 if (GET_CODE (ops[1]) != REG
5637 && GET_CODE (ops[1]) != SUBREG)
5638 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5639 switch (mode)
5640 {
5641 case V2DImode:
5642 case V2DFmode:
5643 shuf =
5644 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5645 TImode);
5646 break;
5647 case V4SImode:
5648 case V4SFmode:
5649 shuf =
5650 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5651 TImode);
5652 break;
5653 case V8HImode:
5654 shuf =
5655 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5656 TImode);
5657 break;
5658 case V16QImode:
5659 shuf =
5660 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5661 TImode);
5662 break;
5663 default:
5664 abort ();
5665 }
5666 emit_move_insn (reg, shuf);
5667 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5668 }
5669}
5670
5671void
5672spu_builtin_extract (rtx ops[])
5673{
ef4bddc2 5674 machine_mode mode;
85d9c13c
TS
5675 rtx rot, from, tmp;
5676
5677 mode = GET_MODE (ops[1]);
5678
5679 if (GET_CODE (ops[2]) == CONST_INT)
5680 {
5681 switch (mode)
5682 {
5683 case V16QImode:
5684 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5685 break;
5686 case V8HImode:
5687 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5688 break;
5689 case V4SFmode:
5690 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5691 break;
5692 case V4SImode:
5693 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5694 break;
5695 case V2DImode:
5696 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5697 break;
5698 case V2DFmode:
5699 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5700 break;
5701 default:
5702 abort ();
5703 }
5704 return;
5705 }
5706
5707 from = spu_gen_subreg (TImode, ops[1]);
5708 rot = gen_reg_rtx (TImode);
5709 tmp = gen_reg_rtx (SImode);
5710
5711 switch (mode)
5712 {
5713 case V16QImode:
5714 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5715 break;
5716 case V8HImode:
5717 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5718 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5719 break;
5720 case V4SFmode:
5721 case V4SImode:
5722 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5723 break;
5724 case V2DImode:
5725 case V2DFmode:
5726 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5727 break;
5728 default:
5729 abort ();
5730 }
5731 emit_insn (gen_rotqby_ti (rot, from, tmp));
5732
5733 emit_insn (gen_spu_convert (ops[0], rot));
5734}
5735
5736void
5737spu_builtin_insert (rtx ops[])
5738{
ef4bddc2
RS
5739 machine_mode mode = GET_MODE (ops[0]);
5740 machine_mode imode = GET_MODE_INNER (mode);
85d9c13c
TS
5741 rtx mask = gen_reg_rtx (TImode);
5742 rtx offset;
5743
5744 if (GET_CODE (ops[3]) == CONST_INT)
5745 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5746 else
5747 {
5748 offset = gen_reg_rtx (SImode);
5749 emit_insn (gen_mulsi3
5750 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5751 }
5752 emit_insn (gen_cpat
5753 (mask, stack_pointer_rtx, offset,
5754 GEN_INT (GET_MODE_SIZE (imode))));
5755 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5756}
5757
5758void
5759spu_builtin_promote (rtx ops[])
5760{
ef4bddc2 5761 machine_mode mode, imode;
85d9c13c
TS
5762 rtx rot, from, offset;
5763 HOST_WIDE_INT pos;
5764
5765 mode = GET_MODE (ops[0]);
5766 imode = GET_MODE_INNER (mode);
5767
5768 from = gen_reg_rtx (TImode);
5769 rot = spu_gen_subreg (TImode, ops[0]);
5770
5771 emit_insn (gen_spu_convert (from, ops[1]));
5772
5773 if (GET_CODE (ops[2]) == CONST_INT)
5774 {
5775 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5776 if (GET_MODE_SIZE (imode) < 4)
5777 pos += 4 - GET_MODE_SIZE (imode);
5778 offset = GEN_INT (pos & 15);
5779 }
5780 else
5781 {
5782 offset = gen_reg_rtx (SImode);
5783 switch (mode)
5784 {
5785 case V16QImode:
5786 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5787 break;
5788 case V8HImode:
5789 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5790 emit_insn (gen_addsi3 (offset, offset, offset));
5791 break;
5792 case V4SFmode:
5793 case V4SImode:
5794 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5795 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5796 break;
5797 case V2DImode:
5798 case V2DFmode:
5799 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5800 break;
5801 default:
5802 abort ();
5803 }
5804 }
5805 emit_insn (gen_rotqby_ti (rot, from, offset));
5806}
5807
a85b4c91
RH
5808static void
5809spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
85d9c13c 5810{
a85b4c91 5811 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
85d9c13c
TS
5812 rtx shuf = gen_reg_rtx (V4SImode);
5813 rtx insn = gen_reg_rtx (V4SImode);
5814 rtx shufc;
5815 rtx insnc;
5816 rtx mem;
5817
5818 fnaddr = force_reg (SImode, fnaddr);
5819 cxt = force_reg (SImode, cxt);
5820
5821 if (TARGET_LARGE_MEM)
5822 {
5823 rtx rotl = gen_reg_rtx (V4SImode);
5824 rtx mask = gen_reg_rtx (V4SImode);
5825 rtx bi = gen_reg_rtx (SImode);
a85b4c91 5826 static unsigned char const shufa[16] = {
85d9c13c
TS
5827 2, 3, 0, 1, 18, 19, 16, 17,
5828 0, 1, 2, 3, 16, 17, 18, 19
5829 };
a85b4c91 5830 static unsigned char const insna[16] = {
85d9c13c
TS
5831 0x41, 0, 0, 79,
5832 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5833 0x60, 0x80, 0, 79,
5834 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5835 };
5836
5837 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5838 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5839
5840 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
71d46ca5 5841 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
85d9c13c
TS
5842 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5843 emit_insn (gen_selb (insn, insnc, rotl, mask));
5844
a85b4c91
RH
5845 mem = adjust_address (m_tramp, V4SImode, 0);
5846 emit_move_insn (mem, insn);
85d9c13c
TS
5847
5848 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
a85b4c91
RH
5849 mem = adjust_address (m_tramp, Pmode, 16);
5850 emit_move_insn (mem, bi);
85d9c13c
TS
5851 }
5852 else
5853 {
5854 rtx scxt = gen_reg_rtx (SImode);
5855 rtx sfnaddr = gen_reg_rtx (SImode);
a85b4c91 5856 static unsigned char const insna[16] = {
85d9c13c
TS
5857 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5858 0x30, 0, 0, 0,
5859 0, 0, 0, 0,
5860 0, 0, 0, 0
5861 };
5862
5863 shufc = gen_reg_rtx (TImode);
5864 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5865
5866 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5867 fits 18 bits and the last 4 are zeros. This will be true if
5868 the stack pointer is initialized to 0x3fff0 at program start,
5869 otherwise the ila instruction will be garbage. */
5870
5871 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5872 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5873 emit_insn (gen_cpat
5874 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5875 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5876 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5877
a85b4c91
RH
5878 mem = adjust_address (m_tramp, V4SImode, 0);
5879 emit_move_insn (mem, insn);
85d9c13c
TS
5880 }
5881 emit_insn (gen_sync ());
5882}
5883
d45eae79
SL
5884static bool
5885spu_warn_func_return (tree decl)
5886{
5887 /* Naked functions are implemented entirely in assembly, including the
5888 return sequence, so suppress warnings about this. */
5889 return !spu_naked_function_p (decl);
5890}
5891
85d9c13c
TS
5892void
5893spu_expand_sign_extend (rtx ops[])
5894{
5895 unsigned char arr[16];
5896 rtx pat = gen_reg_rtx (TImode);
5897 rtx sign, c;
5898 int i, last;
5899 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5900 if (GET_MODE (ops[1]) == QImode)
5901 {
5902 sign = gen_reg_rtx (HImode);
5903 emit_insn (gen_extendqihi2 (sign, ops[1]));
5904 for (i = 0; i < 16; i++)
5905 arr[i] = 0x12;
5906 arr[last] = 0x13;
5907 }
5908 else
5909 {
5910 for (i = 0; i < 16; i++)
5911 arr[i] = 0x10;
5912 switch (GET_MODE (ops[1]))
5913 {
5914 case HImode:
5915 sign = gen_reg_rtx (SImode);
5916 emit_insn (gen_extendhisi2 (sign, ops[1]));
5917 arr[last] = 0x03;
5918 arr[last - 1] = 0x02;
5919 break;
5920 case SImode:
5921 sign = gen_reg_rtx (SImode);
5922 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5923 for (i = 0; i < 4; i++)
5924 arr[last - i] = 3 - i;
5925 break;
5926 case DImode:
5927 sign = gen_reg_rtx (SImode);
5928 c = gen_reg_rtx (SImode);
5929 emit_insn (gen_spu_convert (c, ops[1]));
5930 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5931 for (i = 0; i < 8; i++)
5932 arr[last - i] = 7 - i;
5933 break;
5934 default:
5935 abort ();
5936 }
5937 }
5938 emit_move_insn (pat, array_to_constant (TImode, arr));
5939 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5940}
5941
5942/* expand vector initialization. If there are any constant parts,
5943 load constant parts first. Then load any non-constant parts. */
5944void
5945spu_expand_vector_init (rtx target, rtx vals)
5946{
ef4bddc2 5947 machine_mode mode = GET_MODE (target);
85d9c13c
TS
5948 int n_elts = GET_MODE_NUNITS (mode);
5949 int n_var = 0;
5950 bool all_same = true;
b509487e 5951 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
85d9c13c
TS
5952 int i;
5953
5954 first = XVECEXP (vals, 0, 0);
5955 for (i = 0; i < n_elts; ++i)
5956 {
5957 x = XVECEXP (vals, 0, i);
d74032d9
UW
5958 if (!(CONST_INT_P (x)
5959 || GET_CODE (x) == CONST_DOUBLE
5960 || GET_CODE (x) == CONST_FIXED))
85d9c13c
TS
5961 ++n_var;
5962 else
5963 {
5964 if (first_constant == NULL_RTX)
5965 first_constant = x;
5966 }
5967 if (i > 0 && !rtx_equal_p (x, first))
5968 all_same = false;
5969 }
5970
5971 /* if all elements are the same, use splats to repeat elements */
5972 if (all_same)
5973 {
5974 if (!CONSTANT_P (first)
5975 && !register_operand (first, GET_MODE (x)))
5976 first = force_reg (GET_MODE (first), first);
5977 emit_insn (gen_spu_splats (target, first));
5978 return;
5979 }
5980
5981 /* load constant parts */
5982 if (n_var != n_elts)
5983 {
5984 if (n_var == 0)
5985 {
5986 emit_move_insn (target,
5987 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5988 }
5989 else
5990 {
5991 rtx constant_parts_rtx = copy_rtx (vals);
5992
5993 gcc_assert (first_constant != NULL_RTX);
5994 /* fill empty slots with the first constant, this increases
5995 our chance of using splats in the recursive call below. */
5996 for (i = 0; i < n_elts; ++i)
d74032d9
UW
5997 {
5998 x = XVECEXP (constant_parts_rtx, 0, i);
5999 if (!(CONST_INT_P (x)
6000 || GET_CODE (x) == CONST_DOUBLE
6001 || GET_CODE (x) == CONST_FIXED))
6002 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6003 }
85d9c13c
TS
6004
6005 spu_expand_vector_init (target, constant_parts_rtx);
6006 }
6007 }
6008
6009 /* load variable parts */
6010 if (n_var != 0)
6011 {
6012 rtx insert_operands[4];
6013
6014 insert_operands[0] = target;
6015 insert_operands[2] = target;
6016 for (i = 0; i < n_elts; ++i)
6017 {
6018 x = XVECEXP (vals, 0, i);
d74032d9
UW
6019 if (!(CONST_INT_P (x)
6020 || GET_CODE (x) == CONST_DOUBLE
6021 || GET_CODE (x) == CONST_FIXED))
85d9c13c
TS
6022 {
6023 if (!register_operand (x, GET_MODE (x)))
6024 x = force_reg (GET_MODE (x), x);
6025 insert_operands[1] = x;
6026 insert_operands[3] = GEN_INT (i);
6027 spu_builtin_insert (insert_operands);
6028 }
6029 }
6030 }
6031}
b66b813d 6032
39aeae85
SL
6033/* Return insn index for the vector compare instruction for given CODE,
6034 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6035
6036static int
6037get_vec_cmp_insn (enum rtx_code code,
ef4bddc2
RS
6038 machine_mode dest_mode,
6039 machine_mode op_mode)
39aeae85
SL
6040
6041{
6042 switch (code)
6043 {
6044 case EQ:
6045 if (dest_mode == V16QImode && op_mode == V16QImode)
6046 return CODE_FOR_ceq_v16qi;
6047 if (dest_mode == V8HImode && op_mode == V8HImode)
6048 return CODE_FOR_ceq_v8hi;
6049 if (dest_mode == V4SImode && op_mode == V4SImode)
6050 return CODE_FOR_ceq_v4si;
6051 if (dest_mode == V4SImode && op_mode == V4SFmode)
6052 return CODE_FOR_ceq_v4sf;
6053 if (dest_mode == V2DImode && op_mode == V2DFmode)
6054 return CODE_FOR_ceq_v2df;
6055 break;
6056 case GT:
6057 if (dest_mode == V16QImode && op_mode == V16QImode)
6058 return CODE_FOR_cgt_v16qi;
6059 if (dest_mode == V8HImode && op_mode == V8HImode)
6060 return CODE_FOR_cgt_v8hi;
6061 if (dest_mode == V4SImode && op_mode == V4SImode)
6062 return CODE_FOR_cgt_v4si;
6063 if (dest_mode == V4SImode && op_mode == V4SFmode)
6064 return CODE_FOR_cgt_v4sf;
6065 if (dest_mode == V2DImode && op_mode == V2DFmode)
6066 return CODE_FOR_cgt_v2df;
6067 break;
6068 case GTU:
6069 if (dest_mode == V16QImode && op_mode == V16QImode)
6070 return CODE_FOR_clgt_v16qi;
6071 if (dest_mode == V8HImode && op_mode == V8HImode)
6072 return CODE_FOR_clgt_v8hi;
6073 if (dest_mode == V4SImode && op_mode == V4SImode)
6074 return CODE_FOR_clgt_v4si;
6075 break;
6076 default:
6077 break;
6078 }
6079 return -1;
6080}
6081
6082/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6083 DMODE is expected destination mode. This is a recursive function. */
6084
6085static rtx
6086spu_emit_vector_compare (enum rtx_code rcode,
6087 rtx op0, rtx op1,
ef4bddc2 6088 machine_mode dmode)
39aeae85
SL
6089{
6090 int vec_cmp_insn;
6091 rtx mask;
ef4bddc2
RS
6092 machine_mode dest_mode;
6093 machine_mode op_mode = GET_MODE (op1);
39aeae85
SL
6094
6095 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6096
6097 /* Floating point vector compare instructions uses destination V4SImode.
6098 Double floating point vector compare instructions uses destination V2DImode.
6099 Move destination to appropriate mode later. */
6100 if (dmode == V4SFmode)
6101 dest_mode = V4SImode;
6102 else if (dmode == V2DFmode)
6103 dest_mode = V2DImode;
6104 else
6105 dest_mode = dmode;
6106
6107 mask = gen_reg_rtx (dest_mode);
6108 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6109
6110 if (vec_cmp_insn == -1)
6111 {
6112 bool swap_operands = false;
6113 bool try_again = false;
6114 switch (rcode)
6115 {
6116 case LT:
6117 rcode = GT;
6118 swap_operands = true;
6119 try_again = true;
6120 break;
6121 case LTU:
6122 rcode = GTU;
6123 swap_operands = true;
6124 try_again = true;
6125 break;
6126 case NE:
7f9a3dcd
UW
6127 case UNEQ:
6128 case UNLE:
6129 case UNLT:
6130 case UNGE:
6131 case UNGT:
6132 case UNORDERED:
39aeae85
SL
6133 /* Treat A != B as ~(A==B). */
6134 {
7f9a3dcd 6135 enum rtx_code rev_code;
39aeae85 6136 enum insn_code nor_code;
7f9a3dcd
UW
6137 rtx rev_mask;
6138
6139 rev_code = reverse_condition_maybe_unordered (rcode);
6140 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6141
947131ba 6142 nor_code = optab_handler (one_cmpl_optab, dest_mode);
39aeae85 6143 gcc_assert (nor_code != CODE_FOR_nothing);
7f9a3dcd 6144 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
39aeae85
SL
6145 if (dmode != dest_mode)
6146 {
6147 rtx temp = gen_reg_rtx (dest_mode);
6148 convert_move (temp, mask, 0);
6149 return temp;
6150 }
6151 return mask;
6152 }
6153 break;
6154 case GE:
6155 case GEU:
6156 case LE:
6157 case LEU:
6158 /* Try GT/GTU/LT/LTU OR EQ */
6159 {
6160 rtx c_rtx, eq_rtx;
6161 enum insn_code ior_code;
6162 enum rtx_code new_code;
6163
6164 switch (rcode)
6165 {
6166 case GE: new_code = GT; break;
6167 case GEU: new_code = GTU; break;
6168 case LE: new_code = LT; break;
6169 case LEU: new_code = LTU; break;
6170 default:
6171 gcc_unreachable ();
6172 }
6173
6174 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6175 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6176
947131ba 6177 ior_code = optab_handler (ior_optab, dest_mode);
39aeae85
SL
6178 gcc_assert (ior_code != CODE_FOR_nothing);
6179 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6180 if (dmode != dest_mode)
6181 {
6182 rtx temp = gen_reg_rtx (dest_mode);
6183 convert_move (temp, mask, 0);
6184 return temp;
6185 }
6186 return mask;
6187 }
6188 break;
7f9a3dcd
UW
6189 case LTGT:
6190 /* Try LT OR GT */
6191 {
6192 rtx lt_rtx, gt_rtx;
6193 enum insn_code ior_code;
6194
6195 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6196 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6197
6198 ior_code = optab_handler (ior_optab, dest_mode);
6199 gcc_assert (ior_code != CODE_FOR_nothing);
6200 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6201 if (dmode != dest_mode)
6202 {
6203 rtx temp = gen_reg_rtx (dest_mode);
6204 convert_move (temp, mask, 0);
6205 return temp;
6206 }
6207 return mask;
6208 }
6209 break;
6210 case ORDERED:
6211 /* Implement as (A==A) & (B==B) */
6212 {
6213 rtx a_rtx, b_rtx;
6214 enum insn_code and_code;
6215
6216 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6217 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6218
6219 and_code = optab_handler (and_optab, dest_mode);
6220 gcc_assert (and_code != CODE_FOR_nothing);
6221 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6222 if (dmode != dest_mode)
6223 {
6224 rtx temp = gen_reg_rtx (dest_mode);
6225 convert_move (temp, mask, 0);
6226 return temp;
6227 }
6228 return mask;
6229 }
6230 break;
39aeae85
SL
6231 default:
6232 gcc_unreachable ();
6233 }
6234
6235 /* You only get two chances. */
6236 if (try_again)
6237 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6238
6239 gcc_assert (vec_cmp_insn != -1);
6240
6241 if (swap_operands)
6242 {
6243 rtx tmp;
6244 tmp = op0;
6245 op0 = op1;
6246 op1 = tmp;
6247 }
6248 }
6249
6250 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6251 if (dmode != dest_mode)
6252 {
6253 rtx temp = gen_reg_rtx (dest_mode);
6254 convert_move (temp, mask, 0);
6255 return temp;
6256 }
6257 return mask;
6258}
6259
6260
6261/* Emit vector conditional expression.
6262 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6263 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6264
6265int
6266spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6267 rtx cond, rtx cc_op0, rtx cc_op1)
6268{
ef4bddc2 6269 machine_mode dest_mode = GET_MODE (dest);
39aeae85
SL
6270 enum rtx_code rcode = GET_CODE (cond);
6271 rtx mask;
6272
6273 /* Get the vector mask for the given relational operations. */
6274 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6275
6276 emit_insn(gen_selb (dest, op2, op1, mask));
6277
6278 return 1;
6279}
6280
b66b813d 6281static rtx
ef4bddc2 6282spu_force_reg (machine_mode mode, rtx op)
b66b813d
AP
6283{
6284 rtx x, r;
6285 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6286 {
6287 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6288 || GET_MODE (op) == BLKmode)
6289 return force_reg (mode, convert_to_mode (mode, op, 0));
6290 abort ();
6291 }
6292
6293 r = force_reg (GET_MODE (op), op);
6294 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6295 {
6296 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6297 if (x)
6298 return x;
6299 }
6300
6301 x = gen_reg_rtx (mode);
6302 emit_insn (gen_spu_convert (x, r));
6303 return x;
6304}
6305
6306static void
6307spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6308{
6309 HOST_WIDE_INT v = 0;
6310 int lsbits;
6311 /* Check the range of immediate operands. */
6312 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6313 {
6314 int range = p - SPU_BTI_7;
73701e27
TS
6315
6316 if (!CONSTANT_P (op))
d8a07487 6317 error ("%s expects an integer literal in the range [%d, %d]",
b66b813d
AP
6318 d->name,
6319 spu_builtin_range[range].low, spu_builtin_range[range].high);
6320
6321 if (GET_CODE (op) == CONST
6322 && (GET_CODE (XEXP (op, 0)) == PLUS
6323 || GET_CODE (XEXP (op, 0)) == MINUS))
6324 {
6325 v = INTVAL (XEXP (XEXP (op, 0), 1));
6326 op = XEXP (XEXP (op, 0), 0);
6327 }
6328 else if (GET_CODE (op) == CONST_INT)
6329 v = INTVAL (op);
73701e27
TS
6330 else if (GET_CODE (op) == CONST_VECTOR
6331 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6332 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6333
6334 /* The default for v is 0 which is valid in every range. */
6335 if (v < spu_builtin_range[range].low
6336 || v > spu_builtin_range[range].high)
d8a07487 6337 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
73701e27
TS
6338 d->name,
6339 spu_builtin_range[range].low, spu_builtin_range[range].high,
6340 v);
b66b813d
AP
6341
6342 switch (p)
6343 {
6344 case SPU_BTI_S10_4:
6345 lsbits = 4;
6346 break;
6347 case SPU_BTI_U16_2:
6348 /* This is only used in lqa, and stqa. Even though the insns
6349 encode 16 bits of the address (all but the 2 least
6350 significant), only 14 bits are used because it is masked to
6351 be 16 byte aligned. */
6352 lsbits = 4;
6353 break;
6354 case SPU_BTI_S16_2:
6355 /* This is used for lqr and stqr. */
6356 lsbits = 2;
6357 break;
6358 default:
6359 lsbits = 0;
6360 }
6361
6362 if (GET_CODE (op) == LABEL_REF
6363 || (GET_CODE (op) == SYMBOL_REF
6364 && SYMBOL_REF_FUNCTION_P (op))
73701e27 6365 || (v & ((1 << lsbits) - 1)) != 0)
d8a07487 6366 warning (0, "%d least significant bits of %s are ignored", lsbits,
b66b813d
AP
6367 d->name);
6368 }
6369}
6370
6371
d7815554 6372static int
73701e27 6373expand_builtin_args (struct spu_builtin_description *d, tree exp,
b66b813d
AP
6374 rtx target, rtx ops[])
6375{
81f40b79 6376 enum insn_code icode = (enum insn_code) d->icode;
73701e27 6377 int i = 0, a;
b66b813d
AP
6378
6379 /* Expand the arguments into rtl. */
6380
6381 if (d->parm[0] != SPU_BTI_VOID)
6382 ops[i++] = target;
6383
d7815554 6384 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
b66b813d 6385 {
73701e27 6386 tree arg = CALL_EXPR_ARG (exp, a);
b66b813d
AP
6387 if (arg == 0)
6388 abort ();
bbbbb16a 6389 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
b66b813d 6390 }
d7815554 6391
f04713ee 6392 gcc_assert (i == insn_data[icode].n_generator_args);
d7815554 6393 return i;
b66b813d
AP
6394}
6395
6396static rtx
6397spu_expand_builtin_1 (struct spu_builtin_description *d,
73701e27 6398 tree exp, rtx target)
b66b813d
AP
6399{
6400 rtx pat;
6401 rtx ops[8];
81f40b79 6402 enum insn_code icode = (enum insn_code) d->icode;
ef4bddc2 6403 machine_mode mode, tmode;
b66b813d 6404 int i, p;
d7815554 6405 int n_operands;
b66b813d
AP
6406 tree return_type;
6407
6408 /* Set up ops[] with values from arglist. */
d7815554 6409 n_operands = expand_builtin_args (d, exp, target, ops);
b66b813d
AP
6410
6411 /* Handle the target operand which must be operand 0. */
6412 i = 0;
6413 if (d->parm[0] != SPU_BTI_VOID)
6414 {
6415
6416 /* We prefer the mode specified for the match_operand otherwise
6417 use the mode from the builtin function prototype. */
6418 tmode = insn_data[d->icode].operand[0].mode;
6419 if (tmode == VOIDmode)
6420 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6421
6422 /* Try to use target because not using it can lead to extra copies
6423 and when we are using all of the registers extra copies leads
6424 to extra spills. */
6425 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6426 ops[0] = target;
6427 else
6428 target = ops[0] = gen_reg_rtx (tmode);
6429
6430 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6431 abort ();
6432
6433 i++;
6434 }
6435
bbea461b
DN
6436 if (d->fcode == SPU_MASK_FOR_LOAD)
6437 {
ef4bddc2 6438 machine_mode mode = insn_data[icode].operand[1].mode;
bbea461b
DN
6439 tree arg;
6440 rtx addr, op, pat;
6441
6442 /* get addr */
73701e27 6443 arg = CALL_EXPR_ARG (exp, 0);
643afedb 6444 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
bbea461b
DN
6445 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6446 addr = memory_address (mode, op);
6447
6448 /* negate addr */
6449 op = gen_reg_rtx (GET_MODE (addr));
f7df4a84 6450 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
bbea461b
DN
6451 op = gen_rtx_MEM (mode, op);
6452
6453 pat = GEN_FCN (icode) (target, op);
6454 if (!pat)
6455 return 0;
6456 emit_insn (pat);
6457 return target;
6458 }
6459
b66b813d
AP
6460 /* Ignore align_hint, but still expand it's args in case they have
6461 side effects. */
6462 if (icode == CODE_FOR_spu_align_hint)
6463 return 0;
6464
6465 /* Handle the rest of the operands. */
d7815554 6466 for (p = 1; i < n_operands; i++, p++)
b66b813d
AP
6467 {
6468 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6469 mode = insn_data[d->icode].operand[i].mode;
6470 else
6471 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6472
6473 /* mode can be VOIDmode here for labels */
6474
6475 /* For specific intrinsics with an immediate operand, e.g.,
6476 si_ai(), we sometimes need to convert the scalar argument to a
6477 vector argument by splatting the scalar. */
6478 if (VECTOR_MODE_P (mode)
6479 && (GET_CODE (ops[i]) == CONST_INT
6480 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6717c544 6481 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
b66b813d
AP
6482 {
6483 if (GET_CODE (ops[i]) == CONST_INT)
6484 ops[i] = spu_const (mode, INTVAL (ops[i]));
6485 else
6486 {
6487 rtx reg = gen_reg_rtx (mode);
ef4bddc2 6488 machine_mode imode = GET_MODE_INNER (mode);
b66b813d
AP
6489 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6490 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6491 if (imode != GET_MODE (ops[i]))
6492 ops[i] = convert_to_mode (imode, ops[i],
6493 TYPE_UNSIGNED (spu_builtin_types
6494 [d->parm[i]]));
6495 emit_insn (gen_spu_splats (reg, ops[i]));
6496 ops[i] = reg;
6497 }
6498 }
6499
73701e27
TS
6500 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6501
b66b813d
AP
6502 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6503 ops[i] = spu_force_reg (mode, ops[i]);
b66b813d
AP
6504 }
6505
d7815554 6506 switch (n_operands)
b66b813d
AP
6507 {
6508 case 0:
6509 pat = GEN_FCN (icode) (0);
6510 break;
6511 case 1:
6512 pat = GEN_FCN (icode) (ops[0]);
6513 break;
6514 case 2:
6515 pat = GEN_FCN (icode) (ops[0], ops[1]);
6516 break;
6517 case 3:
6518 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6519 break;
6520 case 4:
6521 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6522 break;
6523 case 5:
6524 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6525 break;
6526 case 6:
6527 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6528 break;
6529 default:
6530 abort ();
6531 }
6532
6533 if (!pat)
6534 abort ();
6535
6536 if (d->type == B_CALL || d->type == B_BISLED)
6537 emit_call_insn (pat);
6538 else if (d->type == B_JUMP)
6539 {
6540 emit_jump_insn (pat);
6541 emit_barrier ();
6542 }
6543 else
6544 emit_insn (pat);
6545
6546 return_type = spu_builtin_types[d->parm[0]];
6547 if (d->parm[0] != SPU_BTI_VOID
6548 && GET_MODE (target) != TYPE_MODE (return_type))
6549 {
6550 /* target is the return value. It should always be the mode of
6551 the builtin function prototype. */
6552 target = spu_force_reg (TYPE_MODE (return_type), target);
6553 }
6554
6555 return target;
6556}
6557
6558rtx
6559spu_expand_builtin (tree exp,
6560 rtx target,
6561 rtx subtarget ATTRIBUTE_UNUSED,
ef4bddc2 6562 machine_mode mode ATTRIBUTE_UNUSED,
b66b813d
AP
6563 int ignore ATTRIBUTE_UNUSED)
6564{
73701e27 6565 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
fec6e65b 6566 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
b66b813d
AP
6567 struct spu_builtin_description *d;
6568
6569 if (fcode < NUM_SPU_BUILTINS)
6570 {
6571 d = &spu_builtins[fcode];
6572
73701e27 6573 return spu_expand_builtin_1 (d, exp, target);
b66b813d
AP
6574 }
6575 abort ();
6576}
6577
bbea461b
DN
6578/* Implement targetm.vectorize.builtin_mask_for_load. */
6579static tree
6580spu_builtin_mask_for_load (void)
6581{
8dc9f5bd 6582 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
bbea461b 6583}
73701e27 6584
e95b59d2
DN
6585/* Implement targetm.vectorize.builtin_vectorization_cost. */
6586static int
720f5239 6587spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
a21892ad 6588 tree vectype,
720f5239 6589 int misalign ATTRIBUTE_UNUSED)
35e1a5e7 6590{
a21892ad
BS
6591 unsigned elements;
6592
35e1a5e7
IR
6593 switch (type_of_cost)
6594 {
6595 case scalar_stmt:
6596 case vector_stmt:
6597 case vector_load:
6598 case vector_store:
6599 case vec_to_scalar:
6600 case scalar_to_vec:
6601 case cond_branch_not_taken:
6602 case vec_perm:
8bd37302 6603 case vec_promote_demote:
35e1a5e7
IR
6604 return 1;
6605
6606 case scalar_store:
6607 return 10;
6608
6609 case scalar_load:
6610 /* Load + rotate. */
6611 return 2;
6612
6613 case unaligned_load:
6614 return 2;
6615
6616 case cond_branch_taken:
6617 return 6;
6618
a21892ad
BS
6619 case vec_construct:
6620 elements = TYPE_VECTOR_SUBPARTS (vectype);
6621 return elements / 2 + 1;
6622
35e1a5e7
IR
6623 default:
6624 gcc_unreachable ();
6625 }
e95b59d2
DN
6626}
6627
c3e7ee41
BS
6628/* Implement targetm.vectorize.init_cost. */
6629
cf0d4e15 6630static void *
c3e7ee41
BS
6631spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6632{
92345349
BS
6633 unsigned *cost = XNEWVEC (unsigned, 3);
6634 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
c3e7ee41
BS
6635 return cost;
6636}
6637
6638/* Implement targetm.vectorize.add_stmt_cost. */
6639
cf0d4e15 6640static unsigned
c3e7ee41 6641spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
92345349
BS
6642 struct _stmt_vec_info *stmt_info, int misalign,
6643 enum vect_cost_model_location where)
c3e7ee41
BS
6644{
6645 unsigned *cost = (unsigned *) data;
6646 unsigned retval = 0;
6647
6648 if (flag_vect_cost_model)
6649 {
92345349 6650 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
c3e7ee41
BS
6651 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6652
6653 /* Statements in an inner loop relative to the loop being
6654 vectorized are weighted more heavily. The value here is
6655 arbitrary and could potentially be improved with analysis. */
92345349 6656 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
c3e7ee41
BS
6657 count *= 50; /* FIXME. */
6658
6659 retval = (unsigned) (count * stmt_cost);
92345349 6660 cost[where] += retval;
c3e7ee41
BS
6661 }
6662
6663 return retval;
6664}
6665
6666/* Implement targetm.vectorize.finish_cost. */
6667
92345349
BS
6668static void
6669spu_finish_cost (void *data, unsigned *prologue_cost,
6670 unsigned *body_cost, unsigned *epilogue_cost)
c3e7ee41 6671{
92345349
BS
6672 unsigned *cost = (unsigned *) data;
6673 *prologue_cost = cost[vect_prologue];
6674 *body_cost = cost[vect_body];
6675 *epilogue_cost = cost[vect_epilogue];
c3e7ee41
BS
6676}
6677
6678/* Implement targetm.vectorize.destroy_cost_data. */
6679
cf0d4e15 6680static void
c3e7ee41
BS
6681spu_destroy_cost_data (void *data)
6682{
6683 free (data);
6684}
6685
99c9c69a
DN
6686/* Return true iff, data reference of TYPE can reach vector alignment (16)
6687 after applying N number of iterations. This routine does not determine
6688 how may iterations are required to reach desired alignment. */
6689
6690static bool
3101faab 6691spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
99c9c69a
DN
6692{
6693 if (is_packed)
6694 return false;
6695
6696 /* All other types are naturally aligned. */
6697 return true;
6698}
6699
299456f3 6700/* Return the appropriate mode for a named address pointer. */
ef4bddc2 6701static machine_mode
299456f3
BE
6702spu_addr_space_pointer_mode (addr_space_t addrspace)
6703{
6704 switch (addrspace)
6705 {
6706 case ADDR_SPACE_GENERIC:
6707 return ptr_mode;
6708 case ADDR_SPACE_EA:
6709 return EAmode;
6710 default:
6711 gcc_unreachable ();
6712 }
6713}
6714
6715/* Return the appropriate mode for a named address address. */
ef4bddc2 6716static machine_mode
299456f3
BE
6717spu_addr_space_address_mode (addr_space_t addrspace)
6718{
6719 switch (addrspace)
6720 {
6721 case ADDR_SPACE_GENERIC:
6722 return Pmode;
6723 case ADDR_SPACE_EA:
6724 return EAmode;
6725 default:
6726 gcc_unreachable ();
6727 }
6728}
6729
6730/* Determine if one named address space is a subset of another. */
6731
6732static bool
6733spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6734{
6735 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6736 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6737
6738 if (subset == superset)
6739 return true;
6740
6741 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6742 being subsets but instead as disjoint address spaces. */
6743 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6744 return false;
6745
6746 else
6747 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6748}
6749
6750/* Convert from one address space to another. */
6751static rtx
6752spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6753{
6754 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6755 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6756
6757 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6758 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6759
6760 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6761 {
6762 rtx result, ls;
6763
6764 ls = gen_const_mem (DImode,
6765 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6766 set_mem_align (ls, 128);
6767
6768 result = gen_reg_rtx (Pmode);
6769 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6770 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6771 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6772 ls, const0_rtx, Pmode, 1);
6773
6774 emit_insn (gen_subsi3 (result, op, ls));
6775
6776 return result;
6777 }
6778
6779 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6780 {
6781 rtx result, ls;
6782
6783 ls = gen_const_mem (DImode,
6784 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6785 set_mem_align (ls, 128);
6786
6787 result = gen_reg_rtx (EAmode);
6788 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6789 op = force_reg (Pmode, op);
6790 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6791 ls, const0_rtx, EAmode, 1);
6792 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6793
6794 if (EAmode == SImode)
6795 emit_insn (gen_addsi3 (result, op, ls));
6796 else
6797 emit_insn (gen_adddi3 (result, op, ls));
6798
6799 return result;
6800 }
6801
6802 else
6803 gcc_unreachable ();
6804}
6805
6806
67186a97
TS
6807/* Count the total number of instructions in each pipe and return the
6808 maximum, which is used as the Minimum Iteration Interval (MII)
6809 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6810 -2 are instructions that can go in pipe0 or pipe1. */
6811static int
6812spu_sms_res_mii (struct ddg *g)
6813{
6814 int i;
6815 unsigned t[4] = {0, 0, 0, 0};
6816
6817 for (i = 0; i < g->num_nodes; i++)
6818 {
23c39aaa 6819 rtx_insn *insn = g->nodes[i].insn;
67186a97
TS
6820 int p = get_pipe (insn) + 2;
6821
819bfe0e
JM
6822 gcc_assert (p >= 0);
6823 gcc_assert (p < 4);
67186a97
TS
6824
6825 t[p]++;
6826 if (dump_file && INSN_P (insn))
6827 fprintf (dump_file, "i%d %s %d %d\n",
6828 INSN_UID (insn),
6829 insn_data[INSN_CODE(insn)].name,
6830 p, t[p]);
6831 }
6832 if (dump_file)
6833 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6834
6835 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6836}
6837
6838
73701e27
TS
6839void
6840spu_init_expanders (void)
eec9405e 6841{
73701e27 6842 if (cfun)
eec9405e
TS
6843 {
6844 rtx r0, r1;
6845 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6846 frame_pointer_needed is true. We don't know that until we're
6847 expanding the prologue. */
6848 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6849
6850 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6851 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6852 to be treated as aligned, so generate them here. */
6853 r0 = gen_reg_rtx (SImode);
6854 r1 = gen_reg_rtx (SImode);
6855 mark_reg_pointer (r0, 128);
6856 mark_reg_pointer (r1, 128);
6857 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6858 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6859 }
7fecf2c7
AP
6860}
6861
ef4bddc2 6862static machine_mode
7fecf2c7
AP
6863spu_libgcc_cmp_return_mode (void)
6864{
6865
6866/* For SPU word mode is TI mode so it is better to use SImode
6867 for compare returns. */
6868 return SImode;
6869}
6870
ef4bddc2 6871static machine_mode
7fecf2c7
AP
6872spu_libgcc_shift_count_mode (void)
6873{
6874/* For SPU word mode is TI mode so it is better to use SImode
6875 for shift counts. */
6876 return SImode;
6877}
9dcc2e87 6878
500a1f85
UW
6879/* Implement targetm.section_type_flags. */
6880static unsigned int
6881spu_section_type_flags (tree decl, const char *name, int reloc)
6882{
6883 /* .toe needs to have type @nobits. */
6884 if (strcmp (name, ".toe") == 0)
6885 return SECTION_BSS;
299456f3
BE
6886 /* Don't load _ea into the current address space. */
6887 if (strcmp (name, "._ea") == 0)
6888 return SECTION_WRITE | SECTION_DEBUG;
500a1f85
UW
6889 return default_section_type_flags (decl, name, reloc);
6890}
4a3a2376 6891
299456f3
BE
6892/* Implement targetm.select_section. */
6893static section *
6894spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6895{
6896 /* Variables and constants defined in the __ea address space
6897 go into a special section named "._ea". */
6898 if (TREE_TYPE (decl) != error_mark_node
6899 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6900 {
6901 /* We might get called with string constants, but get_named_section
6902 doesn't like them as they are not DECLs. Also, we need to set
6903 flags in that case. */
6904 if (!DECL_P (decl))
6905 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6906
6907 return get_named_section (decl, "._ea", reloc);
6908 }
6909
6910 return default_elf_select_section (decl, reloc, align);
6911}
6912
6913/* Implement targetm.unique_section. */
6914static void
6915spu_unique_section (tree decl, int reloc)
6916{
6917 /* We don't support unique section names in the __ea address
6918 space for now. */
6919 if (TREE_TYPE (decl) != error_mark_node
6920 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6921 return;
6922
6923 default_unique_section (decl, reloc);
6924}
6925
5345cf68
TS
6926/* Generate a constant or register which contains 2^SCALE. We assume
6927 the result is valid for MODE. Currently, MODE must be V4SFmode and
6928 SCALE must be SImode. */
6929rtx
ef4bddc2 6930spu_gen_exp2 (machine_mode mode, rtx scale)
5345cf68
TS
6931{
6932 gcc_assert (mode == V4SFmode);
6933 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6934 if (GET_CODE (scale) != CONST_INT)
6935 {
6936 /* unsigned int exp = (127 + scale) << 23;
6937 __vector float m = (__vector float) spu_splats (exp); */
6938 rtx reg = force_reg (SImode, scale);
6939 rtx exp = gen_reg_rtx (SImode);
6940 rtx mul = gen_reg_rtx (mode);
6941 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6942 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6943 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6944 return mul;
6945 }
6946 else
6947 {
6948 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6949 unsigned char arr[16];
6950 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6951 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6952 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6953 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6954 return array_to_constant (mode, arr);
6955 }
6956}
6957
eec9405e
TS
6958/* After reload, just change the convert into a move instruction
6959 or a dead instruction. */
6960void
6961spu_split_convert (rtx ops[])
6962{
6963 if (REGNO (ops[0]) == REGNO (ops[1]))
6964 emit_note (NOTE_INSN_DELETED);
6965 else
6966 {
6967 /* Use TImode always as this might help hard reg copyprop. */
6968 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6969 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6970 emit_insn (gen_move_insn (op0, op1));
6971 }
6972}
6973
75741fed 6974void
d707fc77 6975spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
75741fed
KW
6976{
6977 fprintf (file, "# profile\n");
6978 fprintf (file, "brsl $75, _mcount\n");
6979}
6980
d4f2460a
UW
6981/* Implement targetm.ref_may_alias_errno. */
6982static bool
6983spu_ref_may_alias_errno (ao_ref *ref)
6984{
6985 tree base = ao_ref_base (ref);
6986
6987 /* With SPU newlib, errno is defined as something like
6988 _impure_data._errno
6989 The default implementation of this target macro does not
6990 recognize such expressions, so special-code for it here. */
6991
6992 if (TREE_CODE (base) == VAR_DECL
6993 && !TREE_STATIC (base)
6994 && DECL_EXTERNAL (base)
6995 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
6996 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
6997 "_impure_data") == 0
6998 /* _errno is the first member of _impure_data. */
6999 && ref->offset == 0)
7000 return true;
7001
7002 return default_ref_may_alias_errno (ref);
7003}
7004
07ea0048
UW
7005/* Output thunk to FILE that implements a C++ virtual function call (with
7006 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7007 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7008 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7009 relative to the resulting this pointer. */
7010
7011static void
7012spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7013 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7014 tree function)
7015{
7016 rtx op[8];
7017
7018 /* Make sure unwind info is emitted for the thunk if needed. */
7019 final_start_function (emit_barrier (), file, 1);
7020
7021 /* Operand 0 is the target function. */
7022 op[0] = XEXP (DECL_RTL (function), 0);
7023
7024 /* Operand 1 is the 'this' pointer. */
7025 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7026 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7027 else
7028 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7029
7030 /* Operands 2/3 are the low/high halfwords of delta. */
7031 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7032 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7033
7034 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7035 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7036 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7037
7038 /* Operands 6/7 are temporary registers. */
7039 op[6] = gen_rtx_REG (Pmode, 79);
7040 op[7] = gen_rtx_REG (Pmode, 78);
7041
7042 /* Add DELTA to this pointer. */
7043 if (delta)
7044 {
7045 if (delta >= -0x200 && delta < 0x200)
7046 output_asm_insn ("ai\t%1,%1,%2", op);
7047 else if (delta >= -0x8000 && delta < 0x8000)
7048 {
7049 output_asm_insn ("il\t%6,%2", op);
7050 output_asm_insn ("a\t%1,%1,%6", op);
7051 }
7052 else
7053 {
7054 output_asm_insn ("ilhu\t%6,%3", op);
7055 output_asm_insn ("iohl\t%6,%2", op);
7056 output_asm_insn ("a\t%1,%1,%6", op);
7057 }
7058 }
7059
7060 /* Perform vcall adjustment. */
7061 if (vcall_offset)
7062 {
7063 output_asm_insn ("lqd\t%7,0(%1)", op);
7064 output_asm_insn ("rotqby\t%7,%7,%1", op);
7065
7066 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7067 output_asm_insn ("ai\t%7,%7,%4", op);
7068 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7069 {
7070 output_asm_insn ("il\t%6,%4", op);
7071 output_asm_insn ("a\t%7,%7,%6", op);
7072 }
7073 else
7074 {
7075 output_asm_insn ("ilhu\t%6,%5", op);
7076 output_asm_insn ("iohl\t%6,%4", op);
7077 output_asm_insn ("a\t%7,%7,%6", op);
7078 }
7079
7080 output_asm_insn ("lqd\t%6,0(%7)", op);
7081 output_asm_insn ("rotqby\t%6,%6,%7", op);
7082 output_asm_insn ("a\t%1,%1,%6", op);
7083 }
7084
7085 /* Jump to target. */
7086 output_asm_insn ("br\t%0", op);
7087
7088 final_end_function ();
7089}
7090
c354951b
AK
7091/* Canonicalize a comparison from one we don't have to one we do have. */
7092static void
7093spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7094 bool op0_preserve_value)
7095{
7096 if (!op0_preserve_value
7097 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7098 {
7099 rtx tem = *op0;
7100 *op0 = *op1;
7101 *op1 = tem;
7102 *code = (int)swap_condition ((enum rtx_code)*code);
7103 }
7104}
99400eed
UW
7105
7106/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7107 to perform. MEM is the memory on which to operate. VAL is the second
7108 operand of the binary operator. BEFORE and AFTER are optional locations to
7109 return the value of MEM either before of after the operation. */
7110void
7111spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7112 rtx orig_before, rtx orig_after)
7113{
7114 machine_mode mode = GET_MODE (mem);
7115 rtx before = orig_before, after = orig_after;
7116
7117 if (before == NULL_RTX)
7118 before = gen_reg_rtx (mode);
7119
7120 emit_move_insn (before, mem);
7121
7122 if (code == MULT) /* NAND operation */
7123 {
7124 rtx x = expand_simple_binop (mode, AND, before, val,
7125 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7126 after = expand_simple_unop (mode, NOT, x, after, 1);
7127 }
7128 else
7129 {
7130 after = expand_simple_binop (mode, code, before, val,
7131 after, 1, OPTAB_LIB_WIDEN);
7132 }
7133
7134 emit_move_insn (mem, after);
7135
7136 if (orig_after && after != orig_after)
7137 emit_move_insn (orig_after, after);
7138}
7139
c9c72699
UW
7140\f
7141/* Table of machine attributes. */
7142static const struct attribute_spec spu_attribute_table[] =
7143{
7144 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7145 affects_type_identity } */
7146 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7147 false },
7148 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7149 false },
7150 { NULL, 0, 0, false, false, false, NULL, false }
7151};
7152
7153/* TARGET overrides. */
7154
7155#undef TARGET_ADDR_SPACE_POINTER_MODE
7156#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7157
7158#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7159#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7160
7161#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7162#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7163 spu_addr_space_legitimate_address_p
7164
7165#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7166#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7167
7168#undef TARGET_ADDR_SPACE_SUBSET_P
7169#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7170
7171#undef TARGET_ADDR_SPACE_CONVERT
7172#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7173
7174#undef TARGET_INIT_BUILTINS
7175#define TARGET_INIT_BUILTINS spu_init_builtins
7176#undef TARGET_BUILTIN_DECL
7177#define TARGET_BUILTIN_DECL spu_builtin_decl
7178
7179#undef TARGET_EXPAND_BUILTIN
7180#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7181
7182#undef TARGET_UNWIND_WORD_MODE
7183#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7184
7185#undef TARGET_LEGITIMIZE_ADDRESS
7186#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7187
7188/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7189 and .quad for the debugger. When it is known that the assembler is fixed,
7190 these can be removed. */
7191#undef TARGET_ASM_UNALIGNED_SI_OP
7192#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7193
7194#undef TARGET_ASM_ALIGNED_DI_OP
7195#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7196
7197/* The .8byte directive doesn't seem to work well for a 32 bit
7198 architecture. */
7199#undef TARGET_ASM_UNALIGNED_DI_OP
7200#define TARGET_ASM_UNALIGNED_DI_OP NULL
7201
7202#undef TARGET_RTX_COSTS
7203#define TARGET_RTX_COSTS spu_rtx_costs
7204
7205#undef TARGET_ADDRESS_COST
b413068c 7206#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
c9c72699
UW
7207
7208#undef TARGET_SCHED_ISSUE_RATE
7209#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7210
7211#undef TARGET_SCHED_INIT_GLOBAL
7212#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7213
7214#undef TARGET_SCHED_INIT
7215#define TARGET_SCHED_INIT spu_sched_init
7216
7217#undef TARGET_SCHED_VARIABLE_ISSUE
7218#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7219
7220#undef TARGET_SCHED_REORDER
7221#define TARGET_SCHED_REORDER spu_sched_reorder
7222
7223#undef TARGET_SCHED_REORDER2
7224#define TARGET_SCHED_REORDER2 spu_sched_reorder
7225
7226#undef TARGET_SCHED_ADJUST_COST
7227#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7228
7229#undef TARGET_ATTRIBUTE_TABLE
7230#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7231
7232#undef TARGET_ASM_INTEGER
7233#define TARGET_ASM_INTEGER spu_assemble_integer
7234
7235#undef TARGET_SCALAR_MODE_SUPPORTED_P
7236#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7237
7238#undef TARGET_VECTOR_MODE_SUPPORTED_P
7239#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7240
7241#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7242#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7243
7244#undef TARGET_ASM_GLOBALIZE_LABEL
7245#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7246
7247#undef TARGET_PASS_BY_REFERENCE
7248#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7249
7250#undef TARGET_FUNCTION_ARG
7251#define TARGET_FUNCTION_ARG spu_function_arg
7252
7253#undef TARGET_FUNCTION_ARG_ADVANCE
7254#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7255
7256#undef TARGET_MUST_PASS_IN_STACK
7257#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7258
7259#undef TARGET_BUILD_BUILTIN_VA_LIST
7260#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7261
7262#undef TARGET_EXPAND_BUILTIN_VA_START
7263#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7264
7265#undef TARGET_SETUP_INCOMING_VARARGS
7266#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7267
7268#undef TARGET_MACHINE_DEPENDENT_REORG
7269#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7270
7271#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7272#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7273
7274#undef TARGET_INIT_LIBFUNCS
7275#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7276
7277#undef TARGET_RETURN_IN_MEMORY
7278#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7279
7280#undef TARGET_ENCODE_SECTION_INFO
7281#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7282
7283#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7284#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7285
7286#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7287#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7288
7289#undef TARGET_VECTORIZE_INIT_COST
7290#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7291
7292#undef TARGET_VECTORIZE_ADD_STMT_COST
7293#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7294
7295#undef TARGET_VECTORIZE_FINISH_COST
7296#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7297
7298#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7299#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7300
7301#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7302#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7303
7304#undef TARGET_LIBGCC_CMP_RETURN_MODE
7305#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7306
7307#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7308#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7309
7310#undef TARGET_SCHED_SMS_RES_MII
7311#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7312
7313#undef TARGET_SECTION_TYPE_FLAGS
7314#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7315
7316#undef TARGET_ASM_SELECT_SECTION
7317#define TARGET_ASM_SELECT_SECTION spu_select_section
7318
7319#undef TARGET_ASM_UNIQUE_SECTION
7320#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7321
7322#undef TARGET_LEGITIMATE_ADDRESS_P
7323#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7324
7325#undef TARGET_LEGITIMATE_CONSTANT_P
7326#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7327
7328#undef TARGET_TRAMPOLINE_INIT
7329#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7330
d45eae79
SL
7331#undef TARGET_WARN_FUNC_RETURN
7332#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7333
c9c72699
UW
7334#undef TARGET_OPTION_OVERRIDE
7335#define TARGET_OPTION_OVERRIDE spu_option_override
7336
7337#undef TARGET_CONDITIONAL_REGISTER_USAGE
7338#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7339
7340#undef TARGET_REF_MAY_ALIAS_ERRNO
7341#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7342
7343#undef TARGET_ASM_OUTPUT_MI_THUNK
7344#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7345#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7346#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7347
7348/* Variable tracking should be run after all optimizations which
7349 change order of insns. It also needs a valid CFG. */
7350#undef TARGET_DELAY_VARTRACK
7351#define TARGET_DELAY_VARTRACK true
7352
c354951b
AK
7353#undef TARGET_CANONICALIZE_COMPARISON
7354#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7355
1d0216c8
RS
7356#undef TARGET_CAN_USE_DOLOOP_P
7357#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7358
c9c72699
UW
7359struct gcc_target targetm = TARGET_INITIALIZER;
7360
4a3a2376 7361#include "gt-spu.h"