]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
Turn TRULY_NOOP_TRUNCATION into a hook
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cbe34bb5 1/* Copyright (C) 2006-2017 Free Software Foundation, Inc.
85d9c13c
TS
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
2f83c7d6 5 Software Foundation; either version 3 of the License, or (at your option)
85d9c13c
TS
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
2f83c7d6
NC
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
85d9c13c
TS
16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
c7131fb2 20#include "backend.h"
e11c4407
AM
21#include "target.h"
22#include "rtl.h"
c7131fb2
AM
23#include "tree.h"
24#include "gimple.h"
e11c4407
AM
25#include "cfghooks.h"
26#include "cfgloop.h"
c7131fb2 27#include "df.h"
4d0cdd0c 28#include "memmodel.h"
e11c4407
AM
29#include "tm_p.h"
30#include "stringpool.h"
314e6352 31#include "attribs.h"
e11c4407
AM
32#include "expmed.h"
33#include "optabs.h"
85d9c13c 34#include "regs.h"
e11c4407 35#include "emit-rtl.h"
85d9c13c 36#include "recog.h"
e11c4407
AM
37#include "diagnostic-core.h"
38#include "insn-attr.h"
40e23961 39#include "alias.h"
40e23961 40#include "fold-const.h"
d8a2d370
DN
41#include "stor-layout.h"
42#include "calls.h"
43#include "varasm.h"
36566b39 44#include "explow.h"
85d9c13c 45#include "expr.h"
85d9c13c 46#include "output.h"
60393bbc 47#include "cfgrtl.h"
60393bbc 48#include "cfgbuild.h"
85d9c13c
TS
49#include "langhooks.h"
50#include "reload.h"
85d9c13c
TS
51#include "sched-int.h"
52#include "params.h"
45b0be94 53#include "gimplify.h"
85d9c13c 54#include "tm-constrs.h"
60393bbc 55#include "ddg.h"
7ee2468b 56#include "dumpfile.h"
9b2b7279 57#include "builtins.h"
3dfc96ea 58#include "rtl-iter.h"
b66b813d 59
994c5d85 60/* This file should be included last. */
d58627a0
RS
61#include "target-def.h"
62
b66b813d 63/* Builtin types, data and prototypes. */
4a3a2376
UW
64
65enum spu_builtin_type_index
66{
67 SPU_BTI_END_OF_PARAMS,
68
69 /* We create new type nodes for these. */
70 SPU_BTI_V16QI,
71 SPU_BTI_V8HI,
72 SPU_BTI_V4SI,
73 SPU_BTI_V2DI,
74 SPU_BTI_V4SF,
75 SPU_BTI_V2DF,
76 SPU_BTI_UV16QI,
77 SPU_BTI_UV8HI,
78 SPU_BTI_UV4SI,
79 SPU_BTI_UV2DI,
80
81 /* A 16-byte type. (Implemented with V16QI_type_node) */
82 SPU_BTI_QUADWORD,
83
84 /* These all correspond to intSI_type_node */
85 SPU_BTI_7,
86 SPU_BTI_S7,
87 SPU_BTI_U7,
88 SPU_BTI_S10,
89 SPU_BTI_S10_4,
90 SPU_BTI_U14,
91 SPU_BTI_16,
92 SPU_BTI_S16,
93 SPU_BTI_S16_2,
94 SPU_BTI_U16,
95 SPU_BTI_U16_2,
96 SPU_BTI_U18,
97
98 /* These correspond to the standard types */
99 SPU_BTI_INTQI,
100 SPU_BTI_INTHI,
101 SPU_BTI_INTSI,
102 SPU_BTI_INTDI,
103
104 SPU_BTI_UINTQI,
105 SPU_BTI_UINTHI,
106 SPU_BTI_UINTSI,
107 SPU_BTI_UINTDI,
108
109 SPU_BTI_FLOAT,
110 SPU_BTI_DOUBLE,
111
112 SPU_BTI_VOID,
113 SPU_BTI_PTR,
114
115 SPU_BTI_MAX
116};
117
118#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
119#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
120#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
121#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
122#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
123#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
124#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
125#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
126#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
127#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
128
129static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
130
b66b813d
AP
131struct spu_builtin_range
132{
133 int low, high;
134};
135
136static struct spu_builtin_range spu_builtin_range[] = {
137 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
138 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
139 {0ll, 0x7fll}, /* SPU_BTI_U7 */
140 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
141 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
142 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
143 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
144 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
145 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
146 {0ll, 0xffffll}, /* SPU_BTI_U16 */
147 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
148 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
149};
150
85d9c13c
TS
151\f
152/* Target specific attribute specifications. */
153char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
154
155/* Prototypes and external defs. */
23c39aaa 156static int get_pipe (rtx_insn *insn);
85d9c13c 157static int spu_naked_function_p (tree func);
85d9c13c 158static int mem_is_padded_component_ref (rtx x);
32fb22af 159static void fix_range (const char *);
eec9405e 160static rtx spu_expand_load (rtx, rtx, rtx, int);
85d9c13c 161
39aeae85
SL
162/* Which instruction set architecture to use. */
163int spu_arch;
164/* Which cpu are we tuning for. */
165int spu_tune;
166
9dcc2e87
TS
167/* The hardware requires 8 insns between a hint and the branch it
168 effects. This variable describes how many rtl instructions the
169 compiler needs to see before inserting a hint, and then the compiler
170 will insert enough nops to make it at least 8 insns. The default is
171 for the compiler to allow up to 2 nops be emitted. The nops are
172 inserted in pairs, so we round down. */
173int spu_hint_dist = (8*4) - (2*4);
174
85d9c13c
TS
175enum spu_immediate {
176 SPU_NONE,
177 SPU_IL,
178 SPU_ILA,
179 SPU_ILH,
180 SPU_ILHU,
181 SPU_ORI,
182 SPU_ORHI,
183 SPU_ORBI,
01975fc7 184 SPU_IOHL
85d9c13c 185};
a1c6e4b8
TS
186enum immediate_class
187{
188 IC_POOL, /* constant pool */
189 IC_IL1, /* one il* instruction */
190 IC_IL2, /* both ilhu and iohl instructions */
191 IC_IL1s, /* one il* instruction */
192 IC_IL2s, /* both ilhu and iohl instructions */
193 IC_FSMBI, /* the fsmbi instruction */
194 IC_CPAT, /* one of the c*d instructions */
73701e27 195 IC_FSMBI2 /* fsmbi plus 1 other instruction */
a1c6e4b8 196};
85d9c13c
TS
197
198static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
199static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
a1c6e4b8
TS
200static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
201static enum immediate_class classify_immediate (rtx op,
ef4bddc2 202 machine_mode mode);
85d9c13c 203
299456f3
BE
204/* Pointer mode for __ea references. */
205#define EAmode (spu_ea_model != 32 ? DImode : SImode)
206
5a82ecd9 207\f
c5d94218
UW
208/* Define the structure for the machine field in struct function. */
209struct GTY(()) machine_function
210{
211 /* Register to use for PIC accesses. */
212 rtx pic_reg;
213};
214
215/* How to allocate a 'struct machine_function'. */
216static struct machine_function *
217spu_init_machine_status (void)
218{
766090c2 219 return ggc_cleared_alloc<machine_function> ();
c5d94218
UW
220}
221
c5387660
JM
222/* Implement TARGET_OPTION_OVERRIDE. */
223static void
224spu_option_override (void)
85d9c13c 225{
c5d94218
UW
226 /* Set up function hooks. */
227 init_machine_status = spu_init_machine_status;
228
039cb258
UW
229 /* Small loops will be unpeeled at -O3. For SPU it is more important
230 to keep code small by default. */
128dc8e2 231 if (!flag_unroll_loops && !flag_peel_loops)
dc242c4a 232 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
48476d13
JM
233 global_options.x_param_values,
234 global_options_set.x_param_values);
039cb258 235
85d9c13c
TS
236 flag_omit_frame_pointer = 1;
237
9dcc2e87 238 /* Functions must be 8 byte aligned so we correctly handle dual issue */
85d9c13c
TS
239 if (align_functions < 8)
240 align_functions = 8;
32fb22af 241
9dcc2e87
TS
242 spu_hint_dist = 8*4 - spu_max_nops*4;
243 if (spu_hint_dist < 0)
244 spu_hint_dist = 0;
245
32fb22af
SL
246 if (spu_fixed_range_string)
247 fix_range (spu_fixed_range_string);
39aeae85
SL
248
249 /* Determine processor architectural level. */
250 if (spu_arch_string)
251 {
252 if (strcmp (&spu_arch_string[0], "cell") == 0)
253 spu_arch = PROCESSOR_CELL;
254 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
255 spu_arch = PROCESSOR_CELLEDP;
256 else
9c1732c4 257 error ("bad value (%s) for -march= switch", spu_arch_string);
39aeae85
SL
258 }
259
260 /* Determine processor to tune for. */
261 if (spu_tune_string)
262 {
263 if (strcmp (&spu_tune_string[0], "cell") == 0)
264 spu_tune = PROCESSOR_CELL;
265 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
266 spu_tune = PROCESSOR_CELLEDP;
267 else
9c1732c4 268 error ("bad value (%s) for -mtune= switch", spu_tune_string);
39aeae85 269 }
88f091f5 270
59dbe4fe
UW
271 /* Change defaults according to the processor architecture. */
272 if (spu_arch == PROCESSOR_CELLEDP)
273 {
274 /* If no command line option has been otherwise specified, change
275 the default to -mno-safe-hints on celledp -- only the original
276 Cell/B.E. processors require this workaround. */
277 if (!(target_flags_explicit & MASK_SAFE_HINTS))
278 target_flags &= ~MASK_SAFE_HINTS;
279 }
280
88f091f5 281 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
85d9c13c
TS
282}
283\f
c43f4279
RS
284/* Implement TARGET_HARD_REGNO_NREGS. */
285
286static unsigned int
287spu_hard_regno_nregs (unsigned int, machine_mode mode)
288{
289 return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
290}
291
85d9c13c
TS
292/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
293 struct attribute_spec.handler. */
294
85d9c13c
TS
295/* True if MODE is valid for the target. By "valid", we mean able to
296 be manipulated in non-trivial ways. In particular, this means all
297 the arithmetic is supported. */
298static bool
18e2a8b8 299spu_scalar_mode_supported_p (scalar_mode mode)
85d9c13c
TS
300{
301 switch (mode)
302 {
4e10a5a7
RS
303 case E_QImode:
304 case E_HImode:
305 case E_SImode:
306 case E_SFmode:
307 case E_DImode:
308 case E_TImode:
309 case E_DFmode:
85d9c13c
TS
310 return true;
311
312 default:
313 return false;
314 }
315}
316
317/* Similarly for vector modes. "Supported" here is less strict. At
318 least some operations are supported; need to check optabs or builtins
319 for further details. */
320static bool
ef4bddc2 321spu_vector_mode_supported_p (machine_mode mode)
85d9c13c
TS
322{
323 switch (mode)
324 {
4e10a5a7
RS
325 case E_V16QImode:
326 case E_V8HImode:
327 case E_V4SImode:
328 case E_V2DImode:
329 case E_V4SFmode:
330 case E_V2DFmode:
85d9c13c
TS
331 return true;
332
333 default:
334 return false;
335 }
336}
337
338/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
339 least significant bytes of the outer mode. This function returns
340 TRUE for the SUBREG's where this is correct. */
341int
342valid_subreg (rtx op)
343{
ef4bddc2
RS
344 machine_mode om = GET_MODE (op);
345 machine_mode im = GET_MODE (SUBREG_REG (op));
85d9c13c
TS
346 return om != VOIDmode && im != VOIDmode
347 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
9e071d06
UW
348 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
349 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
85d9c13c
TS
350}
351
352/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
2f8e468b 353 and adjust the start offset. */
85d9c13c
TS
354static rtx
355adjust_operand (rtx op, HOST_WIDE_INT * start)
356{
ef4bddc2 357 machine_mode mode;
85d9c13c 358 int op_size;
9e071d06
UW
359 /* Strip any paradoxical SUBREG. */
360 if (GET_CODE (op) == SUBREG
361 && (GET_MODE_BITSIZE (GET_MODE (op))
362 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
85d9c13c
TS
363 {
364 if (start)
365 *start -=
366 GET_MODE_BITSIZE (GET_MODE (op)) -
367 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
368 op = SUBREG_REG (op);
369 }
370 /* If it is smaller than SI, assure a SUBREG */
371 op_size = GET_MODE_BITSIZE (GET_MODE (op));
372 if (op_size < 32)
373 {
374 if (start)
375 *start += 32 - op_size;
376 op_size = 32;
377 }
378 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
f4b31647 379 mode = int_mode_for_size (op_size, 0).require ();
85d9c13c
TS
380 if (mode != GET_MODE (op))
381 op = gen_rtx_SUBREG (mode, op, 0);
382 return op;
383}
384
385void
386spu_expand_extv (rtx ops[], int unsignedp)
387{
eec9405e 388 rtx dst = ops[0], src = ops[1];
85d9c13c
TS
389 HOST_WIDE_INT width = INTVAL (ops[2]);
390 HOST_WIDE_INT start = INTVAL (ops[3]);
eec9405e
TS
391 HOST_WIDE_INT align_mask;
392 rtx s0, s1, mask, r0;
85d9c13c 393
eec9405e 394 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
85d9c13c 395
eec9405e 396 if (MEM_P (src))
85d9c13c 397 {
eec9405e
TS
398 /* First, determine if we need 1 TImode load or 2. We need only 1
399 if the bits being extracted do not cross the alignment boundary
400 as determined by the MEM and its address. */
401
402 align_mask = -MEM_ALIGN (src);
403 if ((start & align_mask) == ((start + width - 1) & align_mask))
85d9c13c 404 {
eec9405e
TS
405 /* Alignment is sufficient for 1 load. */
406 s0 = gen_reg_rtx (TImode);
407 r0 = spu_expand_load (s0, 0, src, start / 8);
408 start &= 7;
409 if (r0)
410 emit_insn (gen_rotqby_ti (s0, s0, r0));
85d9c13c 411 }
eec9405e
TS
412 else
413 {
414 /* Need 2 loads. */
415 s0 = gen_reg_rtx (TImode);
416 s1 = gen_reg_rtx (TImode);
417 r0 = spu_expand_load (s0, s1, src, start / 8);
418 start &= 7;
419
420 gcc_assert (start + width <= 128);
421 if (r0)
422 {
423 rtx r1 = gen_reg_rtx (SImode);
424 mask = gen_reg_rtx (TImode);
425 emit_move_insn (mask, GEN_INT (-1));
426 emit_insn (gen_rotqby_ti (s0, s0, r0));
427 emit_insn (gen_rotqby_ti (s1, s1, r0));
428 if (GET_CODE (r0) == CONST_INT)
429 r1 = GEN_INT (INTVAL (r0) & 15);
430 else
431 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
432 emit_insn (gen_shlqby_ti (mask, mask, r1));
433 emit_insn (gen_selb (s0, s1, s0, mask));
434 }
435 }
436
437 }
438 else if (GET_CODE (src) == SUBREG)
439 {
440 rtx r = SUBREG_REG (src);
441 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
442 s0 = gen_reg_rtx (TImode);
443 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
f7df4a84 444 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
eec9405e
TS
445 else
446 emit_move_insn (s0, src);
447 }
448 else
449 {
450 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
451 s0 = gen_reg_rtx (TImode);
452 emit_move_insn (s0, src);
85d9c13c
TS
453 }
454
eec9405e
TS
455 /* Now s0 is TImode and contains the bits to extract at start. */
456
457 if (start)
458 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
459
460 if (128 - width)
eb6c3df1 461 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
85d9c13c 462
eec9405e 463 emit_move_insn (dst, s0);
85d9c13c
TS
464}
465
466void
467spu_expand_insv (rtx ops[])
468{
469 HOST_WIDE_INT width = INTVAL (ops[1]);
470 HOST_WIDE_INT start = INTVAL (ops[2]);
a4944851 471 unsigned HOST_WIDE_INT maskbits;
ef4bddc2 472 machine_mode dst_mode;
85d9c13c 473 rtx dst = ops[0], src = ops[3];
d707fc77 474 int dst_size;
85d9c13c
TS
475 rtx mask;
476 rtx shift_reg;
477 int shift;
478
479
480 if (GET_CODE (ops[0]) == MEM)
481 dst = gen_reg_rtx (TImode);
482 else
483 dst = adjust_operand (dst, &start);
484 dst_mode = GET_MODE (dst);
485 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
486
487 if (CONSTANT_P (src))
488 {
ef4bddc2 489 machine_mode m =
85d9c13c
TS
490 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
491 src = force_reg (m, convert_to_mode (m, src, 0));
492 }
493 src = adjust_operand (src, 0);
85d9c13c
TS
494
495 mask = gen_reg_rtx (dst_mode);
496 shift_reg = gen_reg_rtx (dst_mode);
497 shift = dst_size - start - width;
498
499 /* It's not safe to use subreg here because the compiler assumes
500 that the SUBREG_REG is right justified in the SUBREG. */
501 convert_move (shift_reg, src, 1);
502
503 if (shift > 0)
504 {
505 switch (dst_mode)
506 {
4e10a5a7 507 case E_SImode:
85d9c13c
TS
508 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
509 break;
4e10a5a7 510 case E_DImode:
85d9c13c
TS
511 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
512 break;
4e10a5a7 513 case E_TImode:
85d9c13c
TS
514 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
515 break;
516 default:
517 abort ();
518 }
519 }
520 else if (shift < 0)
521 abort ();
522
523 switch (dst_size)
524 {
525 case 32:
a4944851 526 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
85d9c13c 527 if (start)
a4944851 528 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
85d9c13c
TS
529 emit_move_insn (mask, GEN_INT (maskbits));
530 break;
531 case 64:
a4944851 532 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
85d9c13c 533 if (start)
a4944851 534 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
85d9c13c
TS
535 emit_move_insn (mask, GEN_INT (maskbits));
536 break;
537 case 128:
538 {
539 unsigned char arr[16];
540 int i = start / 8;
541 memset (arr, 0, sizeof (arr));
542 arr[i] = 0xff >> (start & 7);
543 for (i++; i <= (start + width - 1) / 8; i++)
544 arr[i] = 0xff;
545 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
546 emit_move_insn (mask, array_to_constant (TImode, arr));
547 }
548 break;
549 default:
550 abort ();
551 }
552 if (GET_CODE (ops[0]) == MEM)
553 {
85d9c13c 554 rtx low = gen_reg_rtx (SImode);
85d9c13c
TS
555 rtx rotl = gen_reg_rtx (SImode);
556 rtx mask0 = gen_reg_rtx (TImode);
eec9405e
TS
557 rtx addr;
558 rtx addr0;
559 rtx addr1;
85d9c13c
TS
560 rtx mem;
561
eec9405e
TS
562 addr = force_reg (Pmode, XEXP (ops[0], 0));
563 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
85d9c13c
TS
564 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
565 emit_insn (gen_negsi2 (rotl, low));
566 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
567 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
eec9405e 568 mem = change_address (ops[0], TImode, addr0);
85d9c13c
TS
569 set_mem_alias_set (mem, 0);
570 emit_move_insn (dst, mem);
571 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
85d9c13c
TS
572 if (start + width > MEM_ALIGN (ops[0]))
573 {
574 rtx shl = gen_reg_rtx (SImode);
575 rtx mask1 = gen_reg_rtx (TImode);
576 rtx dst1 = gen_reg_rtx (TImode);
577 rtx mem1;
0a81f074 578 addr1 = plus_constant (Pmode, addr, 16);
eec9405e 579 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
85d9c13c
TS
580 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
581 emit_insn (gen_shlqby_ti (mask1, mask, shl));
eec9405e 582 mem1 = change_address (ops[0], TImode, addr1);
85d9c13c
TS
583 set_mem_alias_set (mem1, 0);
584 emit_move_insn (dst1, mem1);
585 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
586 emit_move_insn (mem1, dst1);
587 }
eec9405e 588 emit_move_insn (mem, dst);
85d9c13c
TS
589 }
590 else
9c1f1e55 591 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
85d9c13c
TS
592}
593
594
595int
596spu_expand_block_move (rtx ops[])
597{
598 HOST_WIDE_INT bytes, align, offset;
599 rtx src, dst, sreg, dreg, target;
600 int i;
601 if (GET_CODE (ops[2]) != CONST_INT
602 || GET_CODE (ops[3]) != CONST_INT
f69bbb46 603 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
85d9c13c
TS
604 return 0;
605
606 bytes = INTVAL (ops[2]);
607 align = INTVAL (ops[3]);
608
609 if (bytes <= 0)
610 return 1;
611
612 dst = ops[0];
613 src = ops[1];
614
615 if (align == 16)
616 {
617 for (offset = 0; offset + 16 <= bytes; offset += 16)
618 {
619 dst = adjust_address (ops[0], V16QImode, offset);
620 src = adjust_address (ops[1], V16QImode, offset);
621 emit_move_insn (dst, src);
622 }
623 if (offset < bytes)
624 {
625 rtx mask;
626 unsigned char arr[16] = { 0 };
627 for (i = 0; i < bytes - offset; i++)
628 arr[i] = 0xff;
629 dst = adjust_address (ops[0], V16QImode, offset);
630 src = adjust_address (ops[1], V16QImode, offset);
631 mask = gen_reg_rtx (V16QImode);
632 sreg = gen_reg_rtx (V16QImode);
633 dreg = gen_reg_rtx (V16QImode);
634 target = gen_reg_rtx (V16QImode);
635 emit_move_insn (mask, array_to_constant (V16QImode, arr));
636 emit_move_insn (dreg, dst);
637 emit_move_insn (sreg, src);
638 emit_insn (gen_selb (target, dreg, sreg, mask));
639 emit_move_insn (dst, target);
640 }
641 return 1;
642 }
643 return 0;
644}
645
646enum spu_comp_code
647{ SPU_EQ, SPU_GT, SPU_GTU };
648
39aeae85
SL
649int spu_comp_icode[12][3] = {
650 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
651 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
652 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
653 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
654 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
655 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
656 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
657 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
658 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
659 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
660 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
661 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
85d9c13c
TS
662};
663
664/* Generate a compare for CODE. Return a brand-new rtx that represents
665 the result of the compare. GCC can figure this out too if we don't
666 provide all variations of compares, but GCC always wants to use
667 WORD_MODE, we can generate better code in most cases if we do it
668 ourselves. */
669void
f90b7a5a 670spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
85d9c13c
TS
671{
672 int reverse_compare = 0;
673 int reverse_test = 0;
9943eb0b
BE
674 rtx compare_result, eq_result;
675 rtx comp_rtx, eq_rtx;
ef4bddc2
RS
676 machine_mode comp_mode;
677 machine_mode op_mode;
bbbbb16a
ILT
678 enum spu_comp_code scode, eq_code;
679 enum insn_code ior_code;
f90b7a5a
PB
680 enum rtx_code code = GET_CODE (cmp);
681 rtx op0 = XEXP (cmp, 0);
682 rtx op1 = XEXP (cmp, 1);
85d9c13c 683 int index;
9943eb0b 684 int eq_test = 0;
85d9c13c 685
f90b7a5a 686 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
85d9c13c 687 and so on, to keep the constant in operand 1. */
f90b7a5a 688 if (GET_CODE (op1) == CONST_INT)
85d9c13c 689 {
f90b7a5a
PB
690 HOST_WIDE_INT val = INTVAL (op1) - 1;
691 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
85d9c13c
TS
692 switch (code)
693 {
694 case GE:
f90b7a5a 695 op1 = GEN_INT (val);
85d9c13c
TS
696 code = GT;
697 break;
698 case LT:
f90b7a5a 699 op1 = GEN_INT (val);
85d9c13c
TS
700 code = LE;
701 break;
702 case GEU:
f90b7a5a 703 op1 = GEN_INT (val);
85d9c13c
TS
704 code = GTU;
705 break;
706 case LTU:
f90b7a5a 707 op1 = GEN_INT (val);
85d9c13c
TS
708 code = LEU;
709 break;
710 default:
711 break;
712 }
713 }
714
0b01f619
UW
715 /* However, if we generate an integer result, performing a reverse test
716 would require an extra negation, so avoid that where possible. */
717 if (GET_CODE (op1) == CONST_INT && is_set == 1)
718 {
719 HOST_WIDE_INT val = INTVAL (op1) + 1;
720 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
721 switch (code)
722 {
723 case LE:
724 op1 = GEN_INT (val);
725 code = LT;
726 break;
727 case LEU:
728 op1 = GEN_INT (val);
729 code = LTU;
730 break;
731 default:
732 break;
733 }
734 }
735
9943eb0b 736 comp_mode = SImode;
f90b7a5a 737 op_mode = GET_MODE (op0);
9943eb0b 738
85d9c13c
TS
739 switch (code)
740 {
741 case GE:
85d9c13c 742 scode = SPU_GT;
dd4ba939 743 if (HONOR_NANS (op_mode))
9943eb0b
BE
744 {
745 reverse_compare = 0;
746 reverse_test = 0;
747 eq_test = 1;
748 eq_code = SPU_EQ;
749 }
750 else
751 {
752 reverse_compare = 1;
753 reverse_test = 1;
754 }
85d9c13c
TS
755 break;
756 case LE:
85d9c13c 757 scode = SPU_GT;
dd4ba939 758 if (HONOR_NANS (op_mode))
9943eb0b
BE
759 {
760 reverse_compare = 1;
761 reverse_test = 0;
762 eq_test = 1;
763 eq_code = SPU_EQ;
764 }
765 else
766 {
767 reverse_compare = 0;
768 reverse_test = 1;
769 }
85d9c13c
TS
770 break;
771 case LT:
772 reverse_compare = 1;
773 reverse_test = 0;
774 scode = SPU_GT;
775 break;
776 case GEU:
777 reverse_compare = 1;
778 reverse_test = 1;
779 scode = SPU_GTU;
780 break;
781 case LEU:
782 reverse_compare = 0;
783 reverse_test = 1;
784 scode = SPU_GTU;
785 break;
786 case LTU:
787 reverse_compare = 1;
788 reverse_test = 0;
789 scode = SPU_GTU;
790 break;
791 case NE:
792 reverse_compare = 0;
793 reverse_test = 1;
794 scode = SPU_EQ;
795 break;
796
797 case EQ:
798 scode = SPU_EQ;
799 break;
800 case GT:
801 scode = SPU_GT;
802 break;
803 case GTU:
804 scode = SPU_GTU;
805 break;
806 default:
807 scode = SPU_EQ;
808 break;
809 }
810
85d9c13c
TS
811 switch (op_mode)
812 {
4e10a5a7 813 case E_QImode:
85d9c13c
TS
814 index = 0;
815 comp_mode = QImode;
816 break;
4e10a5a7 817 case E_HImode:
85d9c13c
TS
818 index = 1;
819 comp_mode = HImode;
820 break;
4e10a5a7 821 case E_SImode:
85d9c13c
TS
822 index = 2;
823 break;
4e10a5a7 824 case E_DImode:
85d9c13c
TS
825 index = 3;
826 break;
4e10a5a7 827 case E_TImode:
85d9c13c
TS
828 index = 4;
829 break;
4e10a5a7 830 case E_SFmode:
85d9c13c
TS
831 index = 5;
832 break;
4e10a5a7 833 case E_DFmode:
85d9c13c
TS
834 index = 6;
835 break;
4e10a5a7 836 case E_V16QImode:
39aeae85
SL
837 index = 7;
838 comp_mode = op_mode;
839 break;
4e10a5a7 840 case E_V8HImode:
39aeae85
SL
841 index = 8;
842 comp_mode = op_mode;
843 break;
4e10a5a7 844 case E_V4SImode:
39aeae85
SL
845 index = 9;
846 comp_mode = op_mode;
847 break;
4e10a5a7 848 case E_V4SFmode:
39aeae85
SL
849 index = 10;
850 comp_mode = V4SImode;
851 break;
4e10a5a7 852 case E_V2DFmode:
39aeae85
SL
853 index = 11;
854 comp_mode = V2DImode;
85d9c13c 855 break;
4e10a5a7 856 case E_V2DImode:
85d9c13c
TS
857 default:
858 abort ();
859 }
860
f90b7a5a 861 if (GET_MODE (op1) == DFmode
dd4ba939
BE
862 && (scode != SPU_GT && scode != SPU_EQ))
863 abort ();
85d9c13c 864
f90b7a5a
PB
865 if (is_set == 0 && op1 == const0_rtx
866 && (GET_MODE (op0) == SImode
0b01f619
UW
867 || GET_MODE (op0) == HImode
868 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
85d9c13c
TS
869 {
870 /* Don't need to set a register with the result when we are
871 comparing against zero and branching. */
872 reverse_test = !reverse_test;
f90b7a5a 873 compare_result = op0;
85d9c13c
TS
874 }
875 else
876 {
877 compare_result = gen_reg_rtx (comp_mode);
878
879 if (reverse_compare)
880 {
f90b7a5a
PB
881 rtx t = op1;
882 op1 = op0;
883 op0 = t;
85d9c13c
TS
884 }
885
886 if (spu_comp_icode[index][scode] == 0)
887 abort ();
888
889 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
f90b7a5a
PB
890 (op0, op_mode))
891 op0 = force_reg (op_mode, op0);
85d9c13c 892 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
f90b7a5a
PB
893 (op1, op_mode))
894 op1 = force_reg (op_mode, op1);
85d9c13c 895 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
f90b7a5a 896 op0, op1);
85d9c13c
TS
897 if (comp_rtx == 0)
898 abort ();
899 emit_insn (comp_rtx);
900
9943eb0b
BE
901 if (eq_test)
902 {
903 eq_result = gen_reg_rtx (comp_mode);
904 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
f90b7a5a 905 op0, op1);
9943eb0b
BE
906 if (eq_rtx == 0)
907 abort ();
908 emit_insn (eq_rtx);
947131ba 909 ior_code = optab_handler (ior_optab, comp_mode);
9943eb0b
BE
910 gcc_assert (ior_code != CODE_FOR_nothing);
911 emit_insn (GEN_FCN (ior_code)
912 (compare_result, compare_result, eq_result));
913 }
85d9c13c
TS
914 }
915
916 if (is_set == 0)
917 {
918 rtx bcomp;
919 rtx loc_ref;
920
921 /* We don't have branch on QI compare insns, so we convert the
922 QI compare result to a HI result. */
923 if (comp_mode == QImode)
924 {
925 rtx old_res = compare_result;
926 compare_result = gen_reg_rtx (HImode);
927 comp_mode = HImode;
928 emit_insn (gen_extendqihi2 (compare_result, old_res));
929 }
930
931 if (reverse_test)
932 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
933 else
934 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
935
f90b7a5a 936 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
f7df4a84 937 emit_jump_insn (gen_rtx_SET (pc_rtx,
85d9c13c
TS
938 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
939 loc_ref, pc_rtx)));
940 }
941 else if (is_set == 2)
942 {
f90b7a5a 943 rtx target = operands[0];
85d9c13c
TS
944 int compare_size = GET_MODE_BITSIZE (comp_mode);
945 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
f4b31647 946 machine_mode mode = int_mode_for_size (target_size, 0).require ();
85d9c13c
TS
947 rtx select_mask;
948 rtx op_t = operands[2];
949 rtx op_f = operands[3];
950
951 /* The result of the comparison can be SI, HI or QI mode. Create a
952 mask based on that result. */
953 if (target_size > compare_size)
954 {
955 select_mask = gen_reg_rtx (mode);
956 emit_insn (gen_extend_compare (select_mask, compare_result));
957 }
958 else if (target_size < compare_size)
959 select_mask =
960 gen_rtx_SUBREG (mode, compare_result,
961 (compare_size - target_size) / BITS_PER_UNIT);
962 else if (comp_mode != mode)
963 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
964 else
965 select_mask = compare_result;
966
967 if (GET_MODE (target) != GET_MODE (op_t)
968 || GET_MODE (target) != GET_MODE (op_f))
969 abort ();
970
971 if (reverse_test)
972 emit_insn (gen_selb (target, op_t, op_f, select_mask));
973 else
974 emit_insn (gen_selb (target, op_f, op_t, select_mask));
975 }
976 else
977 {
f90b7a5a 978 rtx target = operands[0];
85d9c13c 979 if (reverse_test)
f7df4a84 980 emit_insn (gen_rtx_SET (compare_result,
85d9c13c
TS
981 gen_rtx_NOT (comp_mode, compare_result)));
982 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
983 emit_insn (gen_extendhisi2 (target, compare_result));
984 else if (GET_MODE (target) == SImode
985 && GET_MODE (compare_result) == QImode)
986 emit_insn (gen_extend_compare (target, compare_result));
987 else
988 emit_move_insn (target, compare_result);
989 }
990}
991
992HOST_WIDE_INT
993const_double_to_hwint (rtx x)
994{
995 HOST_WIDE_INT val;
85d9c13c 996 if (GET_MODE (x) == SFmode)
34a72c33 997 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
85d9c13c
TS
998 else if (GET_MODE (x) == DFmode)
999 {
1000 long l[2];
34a72c33 1001 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
85d9c13c
TS
1002 val = l[0];
1003 val = (val << 32) | (l[1] & 0xffffffff);
1004 }
1005 else
1006 abort ();
1007 return val;
1008}
1009
1010rtx
ef4bddc2 1011hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
85d9c13c
TS
1012{
1013 long tv[2];
1014 REAL_VALUE_TYPE rv;
1015 gcc_assert (mode == SFmode || mode == DFmode);
1016
1017 if (mode == SFmode)
1018 tv[0] = (v << 32) >> 32;
1019 else if (mode == DFmode)
1020 {
1021 tv[1] = (v << 32) >> 32;
1022 tv[0] = v >> 32;
1023 }
1024 real_from_target (&rv, tv, mode);
555affd7 1025 return const_double_from_real_value (rv, mode);
85d9c13c
TS
1026}
1027
1028void
1029print_operand_address (FILE * file, register rtx addr)
1030{
1031 rtx reg;
1032 rtx offset;
1033
09aad82b
TS
1034 if (GET_CODE (addr) == AND
1035 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1036 && INTVAL (XEXP (addr, 1)) == -16)
1037 addr = XEXP (addr, 0);
1038
85d9c13c
TS
1039 switch (GET_CODE (addr))
1040 {
1041 case REG:
1042 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1043 break;
1044
1045 case PLUS:
1046 reg = XEXP (addr, 0);
1047 offset = XEXP (addr, 1);
1048 if (GET_CODE (offset) == REG)
1049 {
1050 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1051 reg_names[REGNO (offset)]);
1052 }
1053 else if (GET_CODE (offset) == CONST_INT)
1054 {
1055 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1056 INTVAL (offset), reg_names[REGNO (reg)]);
1057 }
1058 else
1059 abort ();
1060 break;
1061
1062 case CONST:
1063 case LABEL_REF:
1064 case SYMBOL_REF:
1065 case CONST_INT:
1066 output_addr_const (file, addr);
1067 break;
1068
1069 default:
1070 debug_rtx (addr);
1071 abort ();
1072 }
1073}
1074
1075void
1076print_operand (FILE * file, rtx x, int code)
1077{
ef4bddc2 1078 machine_mode mode = GET_MODE (x);
85d9c13c
TS
1079 HOST_WIDE_INT val;
1080 unsigned char arr[16];
1081 int xcode = GET_CODE (x);
a1c6e4b8 1082 int i, info;
85d9c13c
TS
1083 if (GET_MODE (x) == VOIDmode)
1084 switch (code)
1085 {
85d9c13c
TS
1086 case 'L': /* 128 bits, signed */
1087 case 'm': /* 128 bits, signed */
1088 case 'T': /* 128 bits, signed */
1089 case 't': /* 128 bits, signed */
1090 mode = TImode;
1091 break;
85d9c13c
TS
1092 case 'K': /* 64 bits, signed */
1093 case 'k': /* 64 bits, signed */
1094 case 'D': /* 64 bits, signed */
1095 case 'd': /* 64 bits, signed */
1096 mode = DImode;
1097 break;
85d9c13c
TS
1098 case 'J': /* 32 bits, signed */
1099 case 'j': /* 32 bits, signed */
1100 case 's': /* 32 bits, signed */
1101 case 'S': /* 32 bits, signed */
1102 mode = SImode;
1103 break;
1104 }
1105 switch (code)
1106 {
1107
1108 case 'j': /* 32 bits, signed */
1109 case 'k': /* 64 bits, signed */
1110 case 'm': /* 128 bits, signed */
1111 if (xcode == CONST_INT
1112 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1113 {
1114 gcc_assert (logical_immediate_p (x, mode));
1115 constant_to_array (mode, x, arr);
1116 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1117 val = trunc_int_for_mode (val, SImode);
1118 switch (which_logical_immediate (val))
1119 {
1120 case SPU_ORI:
1121 break;
1122 case SPU_ORHI:
1123 fprintf (file, "h");
1124 break;
1125 case SPU_ORBI:
1126 fprintf (file, "b");
1127 break;
1128 default:
1129 gcc_unreachable();
1130 }
1131 }
1132 else
1133 gcc_unreachable();
1134 return;
1135
1136 case 'J': /* 32 bits, signed */
1137 case 'K': /* 64 bits, signed */
1138 case 'L': /* 128 bits, signed */
1139 if (xcode == CONST_INT
1140 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1141 {
1142 gcc_assert (logical_immediate_p (x, mode)
1143 || iohl_immediate_p (x, mode));
1144 constant_to_array (mode, x, arr);
1145 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1146 val = trunc_int_for_mode (val, SImode);
1147 switch (which_logical_immediate (val))
1148 {
1149 case SPU_ORI:
1150 case SPU_IOHL:
1151 break;
1152 case SPU_ORHI:
1153 val = trunc_int_for_mode (val, HImode);
1154 break;
1155 case SPU_ORBI:
1156 val = trunc_int_for_mode (val, QImode);
1157 break;
1158 default:
1159 gcc_unreachable();
1160 }
1161 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1162 }
1163 else
1164 gcc_unreachable();
1165 return;
1166
1167 case 't': /* 128 bits, signed */
1168 case 'd': /* 64 bits, signed */
1169 case 's': /* 32 bits, signed */
a1c6e4b8 1170 if (CONSTANT_P (x))
85d9c13c 1171 {
a1c6e4b8
TS
1172 enum immediate_class c = classify_immediate (x, mode);
1173 switch (c)
1174 {
1175 case IC_IL1:
1176 constant_to_array (mode, x, arr);
1177 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1178 val = trunc_int_for_mode (val, SImode);
1179 switch (which_immediate_load (val))
1180 {
1181 case SPU_IL:
1182 break;
1183 case SPU_ILA:
1184 fprintf (file, "a");
1185 break;
1186 case SPU_ILH:
1187 fprintf (file, "h");
1188 break;
1189 case SPU_ILHU:
1190 fprintf (file, "hu");
1191 break;
1192 default:
1193 gcc_unreachable ();
1194 }
1195 break;
1196 case IC_CPAT:
1197 constant_to_array (mode, x, arr);
1198 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1199 if (info == 1)
1200 fprintf (file, "b");
1201 else if (info == 2)
1202 fprintf (file, "h");
1203 else if (info == 4)
1204 fprintf (file, "w");
1205 else if (info == 8)
1206 fprintf (file, "d");
1207 break;
1208 case IC_IL1s:
1209 if (xcode == CONST_VECTOR)
1210 {
1211 x = CONST_VECTOR_ELT (x, 0);
1212 xcode = GET_CODE (x);
1213 }
1214 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1215 fprintf (file, "a");
1216 else if (xcode == HIGH)
1217 fprintf (file, "hu");
1218 break;
1219 case IC_FSMBI:
73701e27 1220 case IC_FSMBI2:
a1c6e4b8
TS
1221 case IC_IL2:
1222 case IC_IL2s:
1223 case IC_POOL:
1224 abort ();
1225 }
85d9c13c 1226 }
85d9c13c
TS
1227 else
1228 gcc_unreachable ();
1229 return;
1230
1231 case 'T': /* 128 bits, signed */
1232 case 'D': /* 64 bits, signed */
1233 case 'S': /* 32 bits, signed */
a1c6e4b8 1234 if (CONSTANT_P (x))
85d9c13c 1235 {
a1c6e4b8
TS
1236 enum immediate_class c = classify_immediate (x, mode);
1237 switch (c)
85d9c13c 1238 {
a1c6e4b8
TS
1239 case IC_IL1:
1240 constant_to_array (mode, x, arr);
1241 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1242 val = trunc_int_for_mode (val, SImode);
1243 switch (which_immediate_load (val))
1244 {
1245 case SPU_IL:
1246 case SPU_ILA:
1247 break;
1248 case SPU_ILH:
1249 case SPU_ILHU:
1250 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1251 break;
1252 default:
1253 gcc_unreachable ();
1254 }
1255 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1256 break;
1257 case IC_FSMBI:
1258 constant_to_array (mode, x, arr);
1259 val = 0;
1260 for (i = 0; i < 16; i++)
1261 {
1262 val <<= 1;
1263 val |= arr[i] & 1;
1264 }
1265 print_operand (file, GEN_INT (val), 0);
1266 break;
1267 case IC_CPAT:
1268 constant_to_array (mode, x, arr);
1269 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
85d9c13c 1271 break;
a1c6e4b8 1272 case IC_IL1s:
a1c6e4b8 1273 if (xcode == HIGH)
73701e27
TS
1274 x = XEXP (x, 0);
1275 if (GET_CODE (x) == CONST_VECTOR)
1276 x = CONST_VECTOR_ELT (x, 0);
1277 output_addr_const (file, x);
1278 if (xcode == HIGH)
1279 fprintf (file, "@h");
85d9c13c 1280 break;
a1c6e4b8
TS
1281 case IC_IL2:
1282 case IC_IL2s:
73701e27 1283 case IC_FSMBI2:
a1c6e4b8
TS
1284 case IC_POOL:
1285 abort ();
85d9c13c 1286 }
20e9e759 1287 }
85d9c13c
TS
1288 else
1289 gcc_unreachable ();
1290 return;
1291
85d9c13c
TS
1292 case 'C':
1293 if (xcode == CONST_INT)
1294 {
1295 /* Only 4 least significant bits are relevant for generate
1296 control word instructions. */
1297 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1298 return;
1299 }
1300 break;
1301
1302 case 'M': /* print code for c*d */
1303 if (GET_CODE (x) == CONST_INT)
1304 switch (INTVAL (x))
1305 {
1306 case 1:
1307 fprintf (file, "b");
1308 break;
1309 case 2:
1310 fprintf (file, "h");
1311 break;
1312 case 4:
1313 fprintf (file, "w");
1314 break;
1315 case 8:
1316 fprintf (file, "d");
1317 break;
1318 default:
1319 gcc_unreachable();
1320 }
1321 else
1322 gcc_unreachable();
1323 return;
1324
1325 case 'N': /* Negate the operand */
1326 if (xcode == CONST_INT)
1327 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1328 else if (xcode == CONST_VECTOR)
1329 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1330 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1331 return;
1332
1333 case 'I': /* enable/disable interrupts */
1334 if (xcode == CONST_INT)
1335 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1336 return;
1337
1338 case 'b': /* branch modifiers */
1339 if (xcode == REG)
1340 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1341 else if (COMPARISON_P (x))
1342 fprintf (file, "%s", xcode == NE ? "n" : "");
1343 return;
1344
1345 case 'i': /* indirect call */
1346 if (xcode == MEM)
1347 {
1348 if (GET_CODE (XEXP (x, 0)) == REG)
1349 /* Used in indirect function calls. */
1350 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1351 else
cc8ca59e 1352 output_address (GET_MODE (x), XEXP (x, 0));
85d9c13c
TS
1353 }
1354 return;
1355
1356 case 'p': /* load/store */
1357 if (xcode == MEM)
1358 {
1359 x = XEXP (x, 0);
1360 xcode = GET_CODE (x);
1361 }
09aad82b
TS
1362 if (xcode == AND)
1363 {
1364 x = XEXP (x, 0);
1365 xcode = GET_CODE (x);
1366 }
85d9c13c
TS
1367 if (xcode == REG)
1368 fprintf (file, "d");
1369 else if (xcode == CONST_INT)
1370 fprintf (file, "a");
1371 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1372 fprintf (file, "r");
1373 else if (xcode == PLUS || xcode == LO_SUM)
1374 {
1375 if (GET_CODE (XEXP (x, 1)) == REG)
1376 fprintf (file, "x");
1377 else
1378 fprintf (file, "d");
1379 }
1380 return;
1381
73701e27
TS
1382 case 'e':
1383 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1384 val &= 0x7;
1385 output_addr_const (file, GEN_INT (val));
1386 return;
1387
1388 case 'f':
1389 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1390 val &= 0x1f;
1391 output_addr_const (file, GEN_INT (val));
1392 return;
1393
1394 case 'g':
1395 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1396 val &= 0x3f;
1397 output_addr_const (file, GEN_INT (val));
1398 return;
1399
1400 case 'h':
1401 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1402 val = (val >> 3) & 0x1f;
1403 output_addr_const (file, GEN_INT (val));
1404 return;
1405
1406 case 'E':
1407 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1408 val = -val;
1409 val &= 0x7;
1410 output_addr_const (file, GEN_INT (val));
1411 return;
1412
1413 case 'F':
1414 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1415 val = -val;
1416 val &= 0x1f;
1417 output_addr_const (file, GEN_INT (val));
1418 return;
1419
1420 case 'G':
1421 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1422 val = -val;
1423 val &= 0x3f;
1424 output_addr_const (file, GEN_INT (val));
1425 return;
1426
1427 case 'H':
1428 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1429 val = -(val & -8ll);
1430 val = (val >> 3) & 0x1f;
1431 output_addr_const (file, GEN_INT (val));
1432 return;
1433
5345cf68
TS
1434 case 'v':
1435 case 'w':
1436 constant_to_array (mode, x, arr);
1437 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1438 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1439 return;
1440
85d9c13c
TS
1441 case 0:
1442 if (xcode == REG)
1443 fprintf (file, "%s", reg_names[REGNO (x)]);
1444 else if (xcode == MEM)
cc8ca59e 1445 output_address (GET_MODE (x), XEXP (x, 0));
85d9c13c 1446 else if (xcode == CONST_VECTOR)
a1c6e4b8 1447 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
85d9c13c
TS
1448 else
1449 output_addr_const (file, x);
1450 return;
1451
44c7bd63 1452 /* unused letters
5345cf68 1453 o qr u yz
73701e27 1454 AB OPQR UVWXYZ */
85d9c13c
TS
1455 default:
1456 output_operand_lossage ("invalid %%xn code");
1457 }
1458 gcc_unreachable ();
1459}
1460
85d9c13c
TS
1461/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1462 caller saved register. For leaf functions it is more efficient to
1463 use a volatile register because we won't need to save and restore the
1464 pic register. This routine is only valid after register allocation
1465 is completed, so we can pick an unused register. */
1466static rtx
1467get_pic_reg (void)
1468{
85d9c13c
TS
1469 if (!reload_completed && !reload_in_progress)
1470 abort ();
c5d94218
UW
1471
1472 /* If we've already made the decision, we need to keep with it. Once we've
1473 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1474 return true since the register is now live; this should not cause us to
1475 "switch back" to using pic_offset_table_rtx. */
1476 if (!cfun->machine->pic_reg)
1477 {
416ff32e 1478 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
c5d94218
UW
1479 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1480 else
1481 cfun->machine->pic_reg = pic_offset_table_rtx;
1482 }
1483
1484 return cfun->machine->pic_reg;
85d9c13c
TS
1485}
1486
73701e27
TS
1487/* Split constant addresses to handle cases that are too large.
1488 Add in the pic register when in PIC mode.
1489 Split immediates that require more than 1 instruction. */
a1c6e4b8
TS
1490int
1491spu_split_immediate (rtx * ops)
20e9e759 1492{
ef4bddc2 1493 machine_mode mode = GET_MODE (ops[0]);
a1c6e4b8
TS
1494 enum immediate_class c = classify_immediate (ops[1], mode);
1495
1496 switch (c)
20e9e759 1497 {
a1c6e4b8
TS
1498 case IC_IL2:
1499 {
1500 unsigned char arrhi[16];
1501 unsigned char arrlo[16];
88f091f5 1502 rtx to, temp, hi, lo;
a1c6e4b8 1503 int i;
88f091f5
UW
1504 /* We need to do reals as ints because the constant used in the
1505 IOR might not be a legitimate real constant. */
304b9962 1506 scalar_int_mode imode = int_mode_for_mode (mode).require ();
a1c6e4b8 1507 constant_to_array (mode, ops[1], arrhi);
88f091f5
UW
1508 if (imode != mode)
1509 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1510 else
1511 to = ops[0];
1512 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
a1c6e4b8
TS
1513 for (i = 0; i < 16; i += 4)
1514 {
1515 arrlo[i + 2] = arrhi[i + 2];
1516 arrlo[i + 3] = arrhi[i + 3];
1517 arrlo[i + 0] = arrlo[i + 1] = 0;
1518 arrhi[i + 2] = arrhi[i + 3] = 0;
1519 }
88f091f5
UW
1520 hi = array_to_constant (imode, arrhi);
1521 lo = array_to_constant (imode, arrlo);
1522 emit_move_insn (temp, hi);
f7df4a84 1523 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
a1c6e4b8
TS
1524 return 1;
1525 }
73701e27
TS
1526 case IC_FSMBI2:
1527 {
1528 unsigned char arr_fsmbi[16];
1529 unsigned char arr_andbi[16];
1530 rtx to, reg_fsmbi, reg_and;
1531 int i;
73701e27
TS
1532 /* We need to do reals as ints because the constant used in the
1533 * AND might not be a legitimate real constant. */
304b9962 1534 scalar_int_mode imode = int_mode_for_mode (mode).require ();
73701e27
TS
1535 constant_to_array (mode, ops[1], arr_fsmbi);
1536 if (imode != mode)
1537 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1538 else
1539 to = ops[0];
1540 for (i = 0; i < 16; i++)
1541 if (arr_fsmbi[i] != 0)
1542 {
1543 arr_andbi[0] = arr_fsmbi[i];
1544 arr_fsmbi[i] = 0xff;
1545 }
1546 for (i = 1; i < 16; i++)
1547 arr_andbi[i] = arr_andbi[0];
1548 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1549 reg_and = array_to_constant (imode, arr_andbi);
1550 emit_move_insn (to, reg_fsmbi);
f7df4a84 1551 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
73701e27
TS
1552 return 1;
1553 }
a1c6e4b8
TS
1554 case IC_POOL:
1555 if (reload_in_progress || reload_completed)
1556 {
1557 rtx mem = force_const_mem (mode, ops[1]);
1558 if (TARGET_LARGE_MEM)
1559 {
1560 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1561 emit_move_insn (addr, XEXP (mem, 0));
1562 mem = replace_equiv_address (mem, addr);
1563 }
1564 emit_move_insn (ops[0], mem);
1565 return 1;
1566 }
1567 break;
1568 case IC_IL1s:
1569 case IC_IL2s:
1570 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1571 {
1572 if (c == IC_IL2s)
1573 {
73701e27
TS
1574 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1575 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
a1c6e4b8
TS
1576 }
1577 else if (flag_pic)
1578 emit_insn (gen_pic (ops[0], ops[1]));
1579 if (flag_pic)
1580 {
1581 rtx pic_reg = get_pic_reg ();
1582 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
a1c6e4b8
TS
1583 }
1584 return flag_pic || c == IC_IL2s;
1585 }
1586 break;
1587 case IC_IL1:
1588 case IC_FSMBI:
1589 case IC_CPAT:
1590 break;
20e9e759 1591 }
a1c6e4b8 1592 return 0;
20e9e759
TS
1593}
1594
85d9c13c
TS
1595/* SAVING is TRUE when we are generating the actual load and store
1596 instructions for REGNO. When determining the size of the stack
1597 needed for saving register we must allocate enough space for the
1598 worst case, because we don't always have the information early enough
1599 to not allocate it. But we can at least eliminate the actual loads
1600 and stores during the prologue/epilogue. */
1601static int
1602need_to_save_reg (int regno, int saving)
1603{
6fb5fa3c 1604 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
85d9c13c
TS
1605 return 1;
1606 if (flag_pic
1607 && regno == PIC_OFFSET_TABLE_REGNUM
c5d94218 1608 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
85d9c13c
TS
1609 return 1;
1610 return 0;
1611}
1612
1613/* This function is only correct starting with local register
1614 allocation */
1615int
1616spu_saved_regs_size (void)
1617{
1618 int reg_save_size = 0;
1619 int regno;
1620
1621 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1622 if (need_to_save_reg (regno, 0))
1623 reg_save_size += 0x10;
1624 return reg_save_size;
1625}
1626
23c39aaa 1627static rtx_insn *
85d9c13c
TS
1628frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1629{
1630 rtx reg = gen_rtx_REG (V4SImode, regno);
1631 rtx mem =
1632 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1633 return emit_insn (gen_movv4si (mem, reg));
1634}
1635
23c39aaa 1636static rtx_insn *
85d9c13c
TS
1637frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1638{
1639 rtx reg = gen_rtx_REG (V4SImode, regno);
1640 rtx mem =
1641 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1642 return emit_insn (gen_movv4si (reg, mem));
1643}
1644
1645/* This happens after reload, so we need to expand it. */
23c39aaa 1646static rtx_insn *
85d9c13c
TS
1647frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1648{
23c39aaa 1649 rtx_insn *insn;
85d9c13c
TS
1650 if (satisfies_constraint_K (GEN_INT (imm)))
1651 {
1652 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1653 }
1654 else
1655 {
6fb5fa3c 1656 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
85d9c13c
TS
1657 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1658 if (REGNO (src) == REGNO (scratch))
1659 abort ();
1660 }
85d9c13c
TS
1661 return insn;
1662}
1663
1664/* Return nonzero if this function is known to have a null epilogue. */
1665
1666int
1667direct_return (void)
1668{
1669 if (reload_completed)
1670 {
1671 if (cfun->static_chain_decl == 0
1672 && (spu_saved_regs_size ()
1673 + get_frame_size ()
38173d38
JH
1674 + crtl->outgoing_args_size
1675 + crtl->args.pretend_args_size == 0)
416ff32e 1676 && crtl->is_leaf)
85d9c13c
TS
1677 return 1;
1678 }
1679 return 0;
1680}
1681
1682/*
1683 The stack frame looks like this:
1684 +-------------+
1685 | incoming |
7310a2da
SSF
1686 | args |
1687 AP -> +-------------+
85d9c13c
TS
1688 | $lr save |
1689 +-------------+
1690 prev SP | back chain |
1691 +-------------+
1692 | var args |
38173d38 1693 | reg save | crtl->args.pretend_args_size bytes
85d9c13c
TS
1694 +-------------+
1695 | ... |
1696 | saved regs | spu_saved_regs_size() bytes
7310a2da 1697 FP -> +-------------+
85d9c13c 1698 | ... |
7310a2da
SSF
1699 | vars | get_frame_size() bytes
1700 HFP -> +-------------+
85d9c13c
TS
1701 | ... |
1702 | outgoing |
38173d38 1703 | args | crtl->outgoing_args_size bytes
85d9c13c
TS
1704 +-------------+
1705 | $lr of next |
1706 | frame |
1707 +-------------+
7310a2da
SSF
1708 | back chain |
1709 SP -> +-------------+
85d9c13c
TS
1710
1711*/
1712void
1713spu_expand_prologue (void)
1714{
1715 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1716 HOST_WIDE_INT total_size;
1717 HOST_WIDE_INT saved_regs_size;
1718 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1719 rtx scratch_reg_0, scratch_reg_1;
23c39aaa
DM
1720 rtx_insn *insn;
1721 rtx real;
85d9c13c 1722
c5d94218
UW
1723 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1724 cfun->machine->pic_reg = pic_offset_table_rtx;
85d9c13c
TS
1725
1726 if (spu_naked_function_p (current_function_decl))
1727 return;
1728
1729 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1730 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1731
1732 saved_regs_size = spu_saved_regs_size ();
1733 total_size = size + saved_regs_size
38173d38
JH
1734 + crtl->outgoing_args_size
1735 + crtl->args.pretend_args_size;
85d9c13c 1736
416ff32e 1737 if (!crtl->is_leaf
e3b5732b 1738 || cfun->calls_alloca || total_size > 0)
85d9c13c
TS
1739 total_size += STACK_POINTER_OFFSET;
1740
1741 /* Save this first because code after this might use the link
1742 register as a scratch register. */
416ff32e 1743 if (!crtl->is_leaf)
85d9c13c
TS
1744 {
1745 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1746 RTX_FRAME_RELATED_P (insn) = 1;
1747 }
1748
1749 if (total_size > 0)
1750 {
38173d38 1751 offset = -crtl->args.pretend_args_size;
85d9c13c
TS
1752 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1753 if (need_to_save_reg (regno, 1))
1754 {
1755 offset -= 16;
1756 insn = frame_emit_store (regno, sp_reg, offset);
1757 RTX_FRAME_RELATED_P (insn) = 1;
1758 }
1759 }
1760
c5d94218 1761 if (flag_pic && cfun->machine->pic_reg)
85d9c13c 1762 {
c5d94218 1763 rtx pic_reg = cfun->machine->pic_reg;
85d9c13c 1764 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
85d9c13c 1765 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
85d9c13c
TS
1766 }
1767
1768 if (total_size > 0)
1769 {
1770 if (flag_stack_check)
1771 {
24fc18b9 1772 /* We compare against total_size-1 because
85d9c13c
TS
1773 ($sp >= total_size) <=> ($sp > total_size-1) */
1774 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1775 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1776 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1777 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1778 {
1779 emit_move_insn (scratch_v4si, size_v4si);
1780 size_v4si = scratch_v4si;
1781 }
1782 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
ff03930a 1783 emit_insn (gen_vec_extractv4sisi
85d9c13c
TS
1784 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1785 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1786 }
1787
1788 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1789 the value of the previous $sp because we save it as the back
1790 chain. */
1791 if (total_size <= 2000)
1792 {
1793 /* In this case we save the back chain first. */
1794 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
85d9c13c
TS
1795 insn =
1796 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1797 }
85d9c13c
TS
1798 else
1799 {
1800 insn = emit_move_insn (scratch_reg_0, sp_reg);
85d9c13c
TS
1801 insn =
1802 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1803 }
1804 RTX_FRAME_RELATED_P (insn) = 1;
1805 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
bbbbb16a 1806 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
85d9c13c
TS
1807
1808 if (total_size > 2000)
1809 {
1810 /* Save the back chain ptr */
1811 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
85d9c13c
TS
1812 }
1813
1814 if (frame_pointer_needed)
1815 {
1816 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1817 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
38173d38 1818 + crtl->outgoing_args_size;
85d9c13c 1819 /* Set the new frame_pointer */
10d55907
UW
1820 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1821 RTX_FRAME_RELATED_P (insn) = 1;
1822 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
bbbbb16a 1823 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
73701e27 1824 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
85d9c13c
TS
1825 }
1826 }
1827
a11e0df4 1828 if (flag_stack_usage_info)
4c825c02 1829 current_function_static_stack_size = total_size;
85d9c13c
TS
1830}
1831
1832void
1833spu_expand_epilogue (bool sibcall_p)
1834{
1835 int size = get_frame_size (), offset, regno;
1836 HOST_WIDE_INT saved_regs_size, total_size;
1837 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
c187d33c 1838 rtx scratch_reg_0;
85d9c13c 1839
85d9c13c
TS
1840 if (spu_naked_function_p (current_function_decl))
1841 return;
1842
1843 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1844
1845 saved_regs_size = spu_saved_regs_size ();
1846 total_size = size + saved_regs_size
38173d38
JH
1847 + crtl->outgoing_args_size
1848 + crtl->args.pretend_args_size;
85d9c13c 1849
416ff32e 1850 if (!crtl->is_leaf
e3b5732b 1851 || cfun->calls_alloca || total_size > 0)
85d9c13c
TS
1852 total_size += STACK_POINTER_OFFSET;
1853
1854 if (total_size > 0)
1855 {
e3b5732b 1856 if (cfun->calls_alloca)
85d9c13c
TS
1857 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1858 else
1859 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1860
1861
1862 if (saved_regs_size > 0)
1863 {
38173d38 1864 offset = -crtl->args.pretend_args_size;
85d9c13c
TS
1865 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1866 if (need_to_save_reg (regno, 1))
1867 {
1868 offset -= 0x10;
1869 frame_emit_load (regno, sp_reg, offset);
1870 }
1871 }
1872 }
1873
416ff32e 1874 if (!crtl->is_leaf)
85d9c13c
TS
1875 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1876
1877 if (!sibcall_p)
1878 {
c41c1387 1879 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
c187d33c 1880 emit_jump_insn (gen__return ());
85d9c13c 1881 }
85d9c13c
TS
1882}
1883
1884rtx
1885spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1886{
1887 if (count != 0)
1888 return 0;
1889 /* This is inefficient because it ends up copying to a save-register
1890 which then gets saved even though $lr has already been saved. But
1891 it does generate better code for leaf functions and we don't need
1892 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1893 used for __builtin_return_address anyway, so maybe we don't care if
1894 it's inefficient. */
1895 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1896}
1897\f
1898
1899/* Given VAL, generate a constant appropriate for MODE.
1900 If MODE is a vector mode, every element will be VAL.
1901 For TImode, VAL will be zero extended to 128 bits. */
1902rtx
ef4bddc2 1903spu_const (machine_mode mode, HOST_WIDE_INT val)
85d9c13c
TS
1904{
1905 rtx inner;
1906 rtvec v;
1907 int units, i;
1908
1909 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1910 || GET_MODE_CLASS (mode) == MODE_FLOAT
1911 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1912 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1913
1914 if (GET_MODE_CLASS (mode) == MODE_INT)
1915 return immed_double_const (val, 0, mode);
1916
1917 /* val is the bit representation of the float */
1918 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1919 return hwint_to_const_double (mode, val);
1920
1921 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1922 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1923 else
1924 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1925
1926 units = GET_MODE_NUNITS (mode);
1927
1928 v = rtvec_alloc (units);
1929
1930 for (i = 0; i < units; ++i)
1931 RTVEC_ELT (v, i) = inner;
1932
1933 return gen_rtx_CONST_VECTOR (mode, v);
1934}
85d9c13c 1935
39aeae85
SL
1936/* Create a MODE vector constant from 4 ints. */
1937rtx
ef4bddc2 1938spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
39aeae85
SL
1939{
1940 unsigned char arr[16];
1941 arr[0] = (a >> 24) & 0xff;
1942 arr[1] = (a >> 16) & 0xff;
1943 arr[2] = (a >> 8) & 0xff;
1944 arr[3] = (a >> 0) & 0xff;
1945 arr[4] = (b >> 24) & 0xff;
1946 arr[5] = (b >> 16) & 0xff;
1947 arr[6] = (b >> 8) & 0xff;
1948 arr[7] = (b >> 0) & 0xff;
1949 arr[8] = (c >> 24) & 0xff;
1950 arr[9] = (c >> 16) & 0xff;
1951 arr[10] = (c >> 8) & 0xff;
1952 arr[11] = (c >> 0) & 0xff;
1953 arr[12] = (d >> 24) & 0xff;
1954 arr[13] = (d >> 16) & 0xff;
1955 arr[14] = (d >> 8) & 0xff;
1956 arr[15] = (d >> 0) & 0xff;
1957 return array_to_constant(mode, arr);
1958}
9dcc2e87
TS
1959\f
1960/* branch hint stuff */
39aeae85 1961
85d9c13c
TS
1962/* An array of these is used to propagate hints to predecessor blocks. */
1963struct spu_bb_info
1964{
23c39aaa 1965 rtx_insn *prop_jump; /* propagated from another block */
9dcc2e87 1966 int bb_index; /* the original block. */
85d9c13c 1967};
9dcc2e87 1968static struct spu_bb_info *spu_bb_info;
85d9c13c 1969
9dcc2e87 1970#define STOP_HINT_P(INSN) \
b64925dc 1971 (CALL_P(INSN) \
9dcc2e87
TS
1972 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1973 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1974
1975/* 1 when RTX is a hinted branch or its target. We keep track of
1976 what has been hinted so the safe-hint code can test it easily. */
1977#define HINTED_P(RTX) \
1978 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1979
1980/* 1 when RTX is an insn that must be scheduled on an even boundary. */
1981#define SCHED_ON_EVEN_P(RTX) \
1982 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1983
1984/* Emit a nop for INSN such that the two will dual issue. This assumes
1985 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1986 We check for TImode to handle a MULTI1 insn which has dual issued its
b3d45ff0 1987 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
9dcc2e87 1988static void
23c39aaa 1989emit_nop_for_insn (rtx_insn *insn)
85d9c13c 1990{
9dcc2e87 1991 int p;
23c39aaa 1992 rtx_insn *new_insn;
b3d45ff0
UW
1993
1994 /* We need to handle JUMP_TABLE_DATA separately. */
1995 if (JUMP_TABLE_DATA_P (insn))
1996 {
1997 new_insn = emit_insn_after (gen_lnop(), insn);
1998 recog_memoized (new_insn);
1999 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2000 return;
2001 }
2002
9dcc2e87
TS
2003 p = get_pipe (insn);
2004 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2005 new_insn = emit_insn_after (gen_lnop (), insn);
2006 else if (p == 1 && GET_MODE (insn) == TImode)
85d9c13c 2007 {
9dcc2e87
TS
2008 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2009 PUT_MODE (new_insn, TImode);
2010 PUT_MODE (insn, VOIDmode);
2011 }
2012 else
2013 new_insn = emit_insn_after (gen_lnop (), insn);
2014 recog_memoized (new_insn);
9d12bc68 2015 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
9dcc2e87
TS
2016}
2017
2018/* Insert nops in basic blocks to meet dual issue alignment
2019 requirements. Also make sure hbrp and hint instructions are at least
2020 one cycle apart, possibly inserting a nop. */
2021static void
2022pad_bb(void)
2023{
23c39aaa 2024 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
9dcc2e87
TS
2025 int length;
2026 int addr;
2027
2028 /* This sets up INSN_ADDRESSES. */
2029 shorten_branches (get_insns ());
2030
2031 /* Keep track of length added by nops. */
2032 length = 0;
2033
2034 prev_insn = 0;
2035 insn = get_insns ();
2036 if (!active_insn_p (insn))
2037 insn = next_active_insn (insn);
2038 for (; insn; insn = next_insn)
2039 {
2040 next_insn = next_active_insn (insn);
2041 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2042 || INSN_CODE (insn) == CODE_FOR_hbr)
85d9c13c 2043 {
9dcc2e87
TS
2044 if (hbr_insn)
2045 {
2046 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2047 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2048 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2049 || (a1 - a0 == 4))
2050 {
2051 prev_insn = emit_insn_before (gen_lnop (), insn);
2052 PUT_MODE (prev_insn, GET_MODE (insn));
2053 PUT_MODE (insn, TImode);
9d12bc68 2054 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
9dcc2e87
TS
2055 length += 4;
2056 }
2057 }
2058 hbr_insn = insn;
2059 }
7c40228a 2060 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
9dcc2e87
TS
2061 {
2062 if (GET_MODE (insn) == TImode)
2063 PUT_MODE (next_insn, TImode);
2064 insn = next_insn;
2065 next_insn = next_active_insn (insn);
2066 }
2067 addr = INSN_ADDRESSES (INSN_UID (insn));
2068 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2069 {
2070 if (((addr + length) & 7) != 0)
2071 {
2072 emit_nop_for_insn (prev_insn);
2073 length += 4;
2074 }
85d9c13c 2075 }
9dcc2e87
TS
2076 else if (GET_MODE (insn) == TImode
2077 && ((next_insn && GET_MODE (next_insn) != TImode)
2078 || get_attr_type (insn) == TYPE_MULTI0)
2079 && ((addr + length) & 7) != 0)
2080 {
2081 /* prev_insn will always be set because the first insn is
2082 always 8-byte aligned. */
2083 emit_nop_for_insn (prev_insn);
2084 length += 4;
2085 }
2086 prev_insn = insn;
85d9c13c 2087 }
85d9c13c
TS
2088}
2089
9dcc2e87
TS
2090\f
2091/* Routines for branch hints. */
2092
85d9c13c 2093static void
23c39aaa 2094spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
9dcc2e87 2095 int distance, sbitmap blocks)
85d9c13c 2096{
23c39aaa
DM
2097 rtx_insn *hint;
2098 rtx_insn *insn;
8942ee0f 2099 rtx_jump_table_data *table;
85d9c13c
TS
2100
2101 if (before == 0 || branch == 0 || target == 0)
2102 return;
2103
9dcc2e87
TS
2104 /* While scheduling we require hints to be no further than 600, so
2105 we need to enforce that here too */
85d9c13c
TS
2106 if (distance > 600)
2107 return;
2108
9dcc2e87 2109 /* If we have a Basic block note, emit it after the basic block note. */
051de0eb 2110 if (NOTE_INSN_BASIC_BLOCK_P (before))
9dcc2e87 2111 before = NEXT_INSN (before);
85d9c13c 2112
f370536c 2113 rtx_code_label *branch_label = gen_label_rtx ();
85d9c13c
TS
2114 LABEL_NUSES (branch_label)++;
2115 LABEL_PRESERVE_P (branch_label) = 1;
2116 insn = emit_label_before (branch_label, branch);
f370536c 2117 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
d7c028c0 2118 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
9dcc2e87 2119
f370536c 2120 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
9dcc2e87 2121 recog_memoized (hint);
9d12bc68 2122 INSN_LOCATION (hint) = INSN_LOCATION (branch);
9dcc2e87 2123 HINTED_P (branch) = 1;
85d9c13c 2124
9dcc2e87
TS
2125 if (GET_CODE (target) == LABEL_REF)
2126 HINTED_P (XEXP (target, 0)) = 1;
2127 else if (tablejump_p (branch, 0, &table))
85d9c13c 2128 {
9dcc2e87
TS
2129 rtvec vec;
2130 int j;
2131 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2132 vec = XVEC (PATTERN (table), 0);
2133 else
2134 vec = XVEC (PATTERN (table), 1);
2135 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2136 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
85d9c13c 2137 }
9dcc2e87
TS
2138
2139 if (distance >= 588)
85d9c13c 2140 {
9dcc2e87
TS
2141 /* Make sure the hint isn't scheduled any earlier than this point,
2142 which could make it too far for the branch offest to fit */
f626b979
UW
2143 insn = emit_insn_before (gen_blockage (), hint);
2144 recog_memoized (insn);
9d12bc68 2145 INSN_LOCATION (insn) = INSN_LOCATION (hint);
9dcc2e87
TS
2146 }
2147 else if (distance <= 8 * 4)
2148 {
2149 /* To guarantee at least 8 insns between the hint and branch we
2150 insert nops. */
2151 int d;
2152 for (d = distance; d < 8 * 4; d += 4)
2153 {
2154 insn =
2155 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2156 recog_memoized (insn);
9d12bc68 2157 INSN_LOCATION (insn) = INSN_LOCATION (hint);
9dcc2e87
TS
2158 }
2159
2160 /* Make sure any nops inserted aren't scheduled before the hint. */
f626b979
UW
2161 insn = emit_insn_after (gen_blockage (), hint);
2162 recog_memoized (insn);
9d12bc68 2163 INSN_LOCATION (insn) = INSN_LOCATION (hint);
9dcc2e87
TS
2164
2165 /* Make sure any nops inserted aren't scheduled after the call. */
2166 if (CALL_P (branch) && distance < 8 * 4)
f626b979
UW
2167 {
2168 insn = emit_insn_before (gen_blockage (), branch);
2169 recog_memoized (insn);
9d12bc68 2170 INSN_LOCATION (insn) = INSN_LOCATION (branch);
f626b979 2171 }
85d9c13c 2172 }
85d9c13c
TS
2173}
2174
2175/* Returns 0 if we don't want a hint for this branch. Otherwise return
2176 the rtx for the branch target. */
2177static rtx
23c39aaa 2178get_branch_target (rtx_insn *branch)
85d9c13c 2179{
b64925dc 2180 if (JUMP_P (branch))
85d9c13c
TS
2181 {
2182 rtx set, src;
2183
2184 /* Return statements */
2185 if (GET_CODE (PATTERN (branch)) == RETURN)
2186 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2187
aa633255 2188 /* ASM GOTOs. */
3f254607 2189 if (extract_asm_operands (PATTERN (branch)) != NULL)
aa633255
AP
2190 return NULL;
2191
85d9c13c
TS
2192 set = single_set (branch);
2193 src = SET_SRC (set);
2194 if (GET_CODE (SET_DEST (set)) != PC)
2195 abort ();
2196
2197 if (GET_CODE (src) == IF_THEN_ELSE)
2198 {
2199 rtx lab = 0;
2200 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2201 if (note)
2202 {
2203 /* If the more probable case is not a fall through, then
2204 try a branch hint. */
5fa396ad
JH
2205 int prob = profile_probability::from_reg_br_prob_note
2206 (XINT (note, 0)).to_reg_br_prob_base ();
85d9c13c
TS
2207 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2208 && GET_CODE (XEXP (src, 1)) != PC)
2209 lab = XEXP (src, 1);
2210 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2211 && GET_CODE (XEXP (src, 2)) != PC)
2212 lab = XEXP (src, 2);
2213 }
2214 if (lab)
2215 {
2216 if (GET_CODE (lab) == RETURN)
2217 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2218 return lab;
2219 }
2220 return 0;
2221 }
2222
2223 return src;
2224 }
b64925dc 2225 else if (CALL_P (branch))
85d9c13c
TS
2226 {
2227 rtx call;
2228 /* All of our call patterns are in a PARALLEL and the CALL is
2229 the first pattern in the PARALLEL. */
2230 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2231 abort ();
2232 call = XVECEXP (PATTERN (branch), 0, 0);
2233 if (GET_CODE (call) == SET)
2234 call = SET_SRC (call);
2235 if (GET_CODE (call) != CALL)
2236 abort ();
2237 return XEXP (XEXP (call, 0), 0);
2238 }
2239 return 0;
2240}
2241
9dcc2e87
TS
2242/* The special $hbr register is used to prevent the insn scheduler from
2243 moving hbr insns across instructions which invalidate them. It
2244 should only be used in a clobber, and this function searches for
2245 insns which clobber it. */
2246static bool
23c39aaa 2247insn_clobbers_hbr (rtx_insn *insn)
9dcc2e87
TS
2248{
2249 if (INSN_P (insn)
2250 && GET_CODE (PATTERN (insn)) == PARALLEL)
2251 {
2252 rtx parallel = PATTERN (insn);
2253 rtx clobber;
2254 int j;
2255 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2256 {
2257 clobber = XVECEXP (parallel, 0, j);
2258 if (GET_CODE (clobber) == CLOBBER
2259 && GET_CODE (XEXP (clobber, 0)) == REG
2260 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2261 return 1;
2262 }
2263 }
2264 return 0;
2265}
2266
2267/* Search up to 32 insns starting at FIRST:
2268 - at any kind of hinted branch, just return
2269 - at any unconditional branch in the first 15 insns, just return
2270 - at a call or indirect branch, after the first 15 insns, force it to
2271 an even address and return
2272 - at any unconditional branch, after the first 15 insns, force it to
2273 an even address.
2274 At then end of the search, insert an hbrp within 4 insns of FIRST,
2275 and an hbrp within 16 instructions of FIRST.
2276 */
85d9c13c 2277static void
23c39aaa 2278insert_hbrp_for_ilb_runout (rtx_insn *first)
85d9c13c 2279{
23c39aaa 2280 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
9dcc2e87
TS
2281 int addr = 0, length, first_addr = -1;
2282 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2283 int insert_lnop_after = 0;
2284 for (insn = first; insn; insn = NEXT_INSN (insn))
2285 if (INSN_P (insn))
2286 {
2287 if (first_addr == -1)
2288 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2289 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2290 length = get_attr_length (insn);
2291
2292 if (before_4 == 0 && addr + length >= 4 * 4)
2293 before_4 = insn;
2294 /* We test for 14 instructions because the first hbrp will add
2295 up to 2 instructions. */
2296 if (before_16 == 0 && addr + length >= 14 * 4)
2297 before_16 = insn;
2298
2299 if (INSN_CODE (insn) == CODE_FOR_hbr)
2300 {
2301 /* Make sure an hbrp is at least 2 cycles away from a hint.
2302 Insert an lnop after the hbrp when necessary. */
2303 if (before_4 == 0 && addr > 0)
2304 {
2305 before_4 = insn;
2306 insert_lnop_after |= 1;
2307 }
2308 else if (before_4 && addr <= 4 * 4)
2309 insert_lnop_after |= 1;
2310 if (before_16 == 0 && addr > 10 * 4)
2311 {
2312 before_16 = insn;
2313 insert_lnop_after |= 2;
2314 }
2315 else if (before_16 && addr <= 14 * 4)
2316 insert_lnop_after |= 2;
2317 }
85d9c13c 2318
9dcc2e87
TS
2319 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2320 {
2321 if (addr < hbrp_addr0)
2322 hbrp_addr0 = addr;
2323 else if (addr < hbrp_addr1)
2324 hbrp_addr1 = addr;
2325 }
85d9c13c 2326
9dcc2e87
TS
2327 if (CALL_P (insn) || JUMP_P (insn))
2328 {
2329 if (HINTED_P (insn))
2330 return;
2331
2332 /* Any branch after the first 15 insns should be on an even
2333 address to avoid a special case branch. There might be
2334 some nops and/or hbrps inserted, so we test after 10
2335 insns. */
2336 if (addr > 10 * 4)
2337 SCHED_ON_EVEN_P (insn) = 1;
2338 }
85d9c13c 2339
9dcc2e87
TS
2340 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2341 return;
2342
2343
2344 if (addr + length >= 32 * 4)
85d9c13c 2345 {
9dcc2e87
TS
2346 gcc_assert (before_4 && before_16);
2347 if (hbrp_addr0 > 4 * 4)
85d9c13c 2348 {
9dcc2e87
TS
2349 insn =
2350 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2351 recog_memoized (insn);
9d12bc68 2352 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
9dcc2e87
TS
2353 INSN_ADDRESSES_NEW (insn,
2354 INSN_ADDRESSES (INSN_UID (before_4)));
2355 PUT_MODE (insn, GET_MODE (before_4));
2356 PUT_MODE (before_4, TImode);
2357 if (insert_lnop_after & 1)
85d9c13c 2358 {
9dcc2e87
TS
2359 insn = emit_insn_before (gen_lnop (), before_4);
2360 recog_memoized (insn);
9d12bc68 2361 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
9dcc2e87
TS
2362 INSN_ADDRESSES_NEW (insn,
2363 INSN_ADDRESSES (INSN_UID (before_4)));
2364 PUT_MODE (insn, TImode);
85d9c13c 2365 }
85d9c13c 2366 }
9dcc2e87
TS
2367 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2368 && hbrp_addr1 > 16 * 4)
85d9c13c 2369 {
9dcc2e87
TS
2370 insn =
2371 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2372 recog_memoized (insn);
9d12bc68 2373 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
9dcc2e87
TS
2374 INSN_ADDRESSES_NEW (insn,
2375 INSN_ADDRESSES (INSN_UID (before_16)));
2376 PUT_MODE (insn, GET_MODE (before_16));
2377 PUT_MODE (before_16, TImode);
2378 if (insert_lnop_after & 2)
85d9c13c 2379 {
9dcc2e87
TS
2380 insn = emit_insn_before (gen_lnop (), before_16);
2381 recog_memoized (insn);
9d12bc68 2382 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
9dcc2e87
TS
2383 INSN_ADDRESSES_NEW (insn,
2384 INSN_ADDRESSES (INSN_UID
2385 (before_16)));
2386 PUT_MODE (insn, TImode);
85d9c13c
TS
2387 }
2388 }
9dcc2e87 2389 return;
85d9c13c 2390 }
85d9c13c 2391 }
9dcc2e87
TS
2392 else if (BARRIER_P (insn))
2393 return;
85d9c13c 2394
85d9c13c 2395}
9dcc2e87
TS
2396
2397/* The SPU might hang when it executes 48 inline instructions after a
2398 hinted branch jumps to its hinted target. The beginning of a
dd5a833e
MS
2399 function and the return from a call might have been hinted, and
2400 must be handled as well. To prevent a hang we insert 2 hbrps. The
2401 first should be within 6 insns of the branch target. The second
2402 should be within 22 insns of the branch target. When determining
2403 if hbrps are necessary, we look for only 32 inline instructions,
2404 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2405 when inserting new hbrps, we insert them within 4 and 16 insns of
2406 the target. */
85d9c13c 2407static void
9dcc2e87 2408insert_hbrp (void)
85d9c13c 2409{
23c39aaa 2410 rtx_insn *insn;
9dcc2e87 2411 if (TARGET_SAFE_HINTS)
85d9c13c 2412 {
9dcc2e87
TS
2413 shorten_branches (get_insns ());
2414 /* Insert hbrp at beginning of function */
2415 insn = next_active_insn (get_insns ());
2416 if (insn)
2417 insert_hbrp_for_ilb_runout (insn);
2418 /* Insert hbrp after hinted targets. */
2419 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2420 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2421 insert_hbrp_for_ilb_runout (next_active_insn (insn));
85d9c13c 2422 }
85d9c13c
TS
2423}
2424
9dcc2e87
TS
2425static int in_spu_reorg;
2426
2ba42841
AO
2427static void
2428spu_var_tracking (void)
2429{
2430 if (flag_var_tracking)
2431 {
2432 df_analyze ();
2433 timevar_push (TV_VAR_TRACKING);
2434 variable_tracking_main ();
2435 timevar_pop (TV_VAR_TRACKING);
2436 df_finish_pass (false);
2437 }
2438}
2439
9dcc2e87
TS
2440/* Insert branch hints. There are no branch optimizations after this
2441 pass, so it's safe to set our branch hints now. */
85d9c13c 2442static void
9dcc2e87 2443spu_machine_dependent_reorg (void)
85d9c13c 2444{
9dcc2e87
TS
2445 sbitmap blocks;
2446 basic_block bb;
23c39aaa 2447 rtx_insn *branch, *insn;
9dcc2e87
TS
2448 rtx branch_target = 0;
2449 int branch_addr = 0, insn_addr, required_dist = 0;
2450 int i;
2451 unsigned int j;
85d9c13c 2452
9dcc2e87
TS
2453 if (!TARGET_BRANCH_HINTS || optimize == 0)
2454 {
2455 /* We still do it for unoptimized code because an external
2456 function might have hinted a call or return. */
b4d80e56 2457 compute_bb_for_insn ();
9dcc2e87
TS
2458 insert_hbrp ();
2459 pad_bb ();
2ba42841 2460 spu_var_tracking ();
b4d80e56 2461 free_bb_for_insn ();
9dcc2e87
TS
2462 return;
2463 }
85d9c13c 2464
8b1c6fd7 2465 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
f61e445a 2466 bitmap_clear (blocks);
85d9c13c 2467
9dcc2e87
TS
2468 in_spu_reorg = 1;
2469 compute_bb_for_insn ();
2470
66b038ce
UW
2471 /* (Re-)discover loops so that bb->loop_father can be used
2472 in the analysis below. */
2473 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2474
9dcc2e87
TS
2475 compact_blocks ();
2476
2477 spu_bb_info =
0cae8d31 2478 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
9dcc2e87
TS
2479 sizeof (struct spu_bb_info));
2480
2481 /* We need exact insn addresses and lengths. */
2482 shorten_branches (get_insns ());
2483
0cae8d31 2484 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
85d9c13c 2485 {
06e28de2 2486 bb = BASIC_BLOCK_FOR_FN (cfun, i);
9dcc2e87
TS
2487 branch = 0;
2488 if (spu_bb_info[i].prop_jump)
85d9c13c 2489 {
9dcc2e87
TS
2490 branch = spu_bb_info[i].prop_jump;
2491 branch_target = get_branch_target (branch);
2492 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2493 required_dist = spu_hint_dist;
2494 }
2495 /* Search from end of a block to beginning. In this loop, find
2496 jumps which need a branch and emit them only when:
2497 - it's an indirect branch and we're at the insn which sets
2498 the register
2499 - we're at an insn that will invalidate the hint. e.g., a
2500 call, another hint insn, inline asm that clobbers $hbr, and
2501 some inlined operations (divmodsi4). Don't consider jumps
2502 because they are only at the end of a block and are
2503 considered when we are deciding whether to propagate
2504 - we're getting too far away from the branch. The hbr insns
2505 only have a signed 10 bit offset
2506 We go back as far as possible so the branch will be considered
2507 for propagation when we get to the beginning of the block. */
2508 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2509 {
2510 if (INSN_P (insn))
2511 {
2512 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2513 if (branch
2514 && ((GET_CODE (branch_target) == REG
2515 && set_of (branch_target, insn) != NULL_RTX)
2516 || insn_clobbers_hbr (insn)
2517 || branch_addr - insn_addr > 600))
2518 {
23c39aaa 2519 rtx_insn *next = NEXT_INSN (insn);
9dcc2e87
TS
2520 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2521 if (insn != BB_END (bb)
2522 && branch_addr - next_addr >= required_dist)
2523 {
2524 if (dump_file)
2525 fprintf (dump_file,
2526 "hint for %i in block %i before %i\n",
2527 INSN_UID (branch), bb->index,
2528 INSN_UID (next));
2529 spu_emit_branch_hint (next, branch, branch_target,
2530 branch_addr - next_addr, blocks);
2531 }
2532 branch = 0;
2533 }
2534
2535 /* JUMP_P will only be true at the end of a block. When
2536 branch is already set it means we've previously decided
2537 to propagate a hint for that branch into this block. */
2538 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2539 {
2540 branch = 0;
2541 if ((branch_target = get_branch_target (insn)))
2542 {
2543 branch = insn;
2544 branch_addr = insn_addr;
2545 required_dist = spu_hint_dist;
2546 }
2547 }
2548 }
2549 if (insn == BB_HEAD (bb))
2550 break;
2551 }
2552
2553 if (branch)
2554 {
2555 /* If we haven't emitted a hint for this branch yet, it might
2556 be profitable to emit it in one of the predecessor blocks,
2557 especially for loops. */
23c39aaa 2558 rtx_insn *bbend;
9dcc2e87
TS
2559 basic_block prev = 0, prop = 0, prev2 = 0;
2560 int loop_exit = 0, simple_loop = 0;
2561 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2562
2563 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2564 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2565 prev = EDGE_PRED (bb, j)->src;
2566 else
2567 prev2 = EDGE_PRED (bb, j)->src;
2568
2569 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2570 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2571 loop_exit = 1;
2572 else if (EDGE_SUCC (bb, j)->dest == bb)
2573 simple_loop = 1;
2574
2575 /* If this branch is a loop exit then propagate to previous
2576 fallthru block. This catches the cases when it is a simple
2577 loop or when there is an initial branch into the loop. */
2578 if (prev && (loop_exit || simple_loop)
66b038ce 2579 && bb_loop_depth (prev) <= bb_loop_depth (bb))
9dcc2e87
TS
2580 prop = prev;
2581
2582 /* If there is only one adjacent predecessor. Don't propagate
66b038ce 2583 outside this loop. */
9dcc2e87 2584 else if (prev && single_pred_p (bb)
66b038ce 2585 && prev->loop_father == bb->loop_father)
9dcc2e87
TS
2586 prop = prev;
2587
2588 /* If this is the JOIN block of a simple IF-THEN then
073a8998 2589 propagate the hint to the HEADER block. */
9dcc2e87
TS
2590 else if (prev && prev2
2591 && EDGE_COUNT (bb->preds) == 2
2592 && EDGE_COUNT (prev->preds) == 1
2593 && EDGE_PRED (prev, 0)->src == prev2
66b038ce 2594 && prev2->loop_father == bb->loop_father
9dcc2e87
TS
2595 && GET_CODE (branch_target) != REG)
2596 prop = prev;
2597
2598 /* Don't propagate when:
2599 - this is a simple loop and the hint would be too far
2600 - this is not a simple loop and there are 16 insns in
2601 this block already
2602 - the predecessor block ends in a branch that will be
2603 hinted
2604 - the predecessor block ends in an insn that invalidates
2605 the hint */
2606 if (prop
2607 && prop->index >= 0
2608 && (bbend = BB_END (prop))
2609 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2610 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2611 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2612 {
2613 if (dump_file)
2614 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2615 "for %i (loop_exit %i simple_loop %i dist %i)\n",
66b038ce 2616 bb->index, prop->index, bb_loop_depth (bb),
9dcc2e87
TS
2617 INSN_UID (branch), loop_exit, simple_loop,
2618 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2619
2620 spu_bb_info[prop->index].prop_jump = branch;
2621 spu_bb_info[prop->index].bb_index = i;
2622 }
2623 else if (branch_addr - next_addr >= required_dist)
2624 {
2625 if (dump_file)
2626 fprintf (dump_file, "hint for %i in block %i before %i\n",
2627 INSN_UID (branch), bb->index,
2628 INSN_UID (NEXT_INSN (insn)));
2629 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2630 branch_addr - next_addr, blocks);
2631 }
2632 branch = 0;
85d9c13c 2633 }
85d9c13c 2634 }
9dcc2e87 2635 free (spu_bb_info);
85d9c13c 2636
f61e445a 2637 if (!bitmap_empty_p (blocks))
9dcc2e87
TS
2638 find_many_sub_basic_blocks (blocks);
2639
2640 /* We have to schedule to make sure alignment is ok. */
11cd3bed 2641 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
9dcc2e87
TS
2642
2643 /* The hints need to be scheduled, so call it again. */
2644 schedule_insns ();
f626b979 2645 df_finish_pass (true);
9dcc2e87
TS
2646
2647 insert_hbrp ();
2648
2649 pad_bb ();
2650
6e37f6d4
TS
2651 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2652 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2653 {
2654 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2655 between its branch label and the branch . We don't move the
2656 label because GCC expects it at the beginning of the block. */
2657 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2658 rtx label_ref = XVECEXP (unspec, 0, 0);
dc01c3d1
DM
2659 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2660 rtx_insn *branch;
6e37f6d4
TS
2661 int offset = 0;
2662 for (branch = NEXT_INSN (label);
2663 !JUMP_P (branch) && !CALL_P (branch);
2664 branch = NEXT_INSN (branch))
2665 if (NONJUMP_INSN_P (branch))
2666 offset += get_attr_length (branch);
2667 if (offset > 0)
0a81f074 2668 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
6e37f6d4 2669 }
9dcc2e87 2670
2ba42841 2671 spu_var_tracking ();
9dcc2e87 2672
66b038ce
UW
2673 loop_optimizer_finalize ();
2674
9dcc2e87
TS
2675 free_bb_for_insn ();
2676
2677 in_spu_reorg = 0;
85d9c13c
TS
2678}
2679\f
2680
2681/* Insn scheduling routines, primarily for dual issue. */
2682static int
2683spu_sched_issue_rate (void)
2684{
2685 return 2;
2686}
2687
2688static int
23c39aaa 2689uses_ls_unit(rtx_insn *insn)
85d9c13c 2690{
9dcc2e87
TS
2691 rtx set = single_set (insn);
2692 if (set != 0
2693 && (GET_CODE (SET_DEST (set)) == MEM
2694 || GET_CODE (SET_SRC (set)) == MEM))
2695 return 1;
2696 return 0;
85d9c13c
TS
2697}
2698
2699static int
23c39aaa 2700get_pipe (rtx_insn *insn)
85d9c13c
TS
2701{
2702 enum attr_type t;
2703 /* Handle inline asm */
2704 if (INSN_CODE (insn) == -1)
2705 return -1;
2706 t = get_attr_type (insn);
2707 switch (t)
2708 {
2709 case TYPE_CONVERT:
2710 return -2;
2711 case TYPE_MULTI0:
2712 return -1;
2713
2714 case TYPE_FX2:
2715 case TYPE_FX3:
2716 case TYPE_SPR:
2717 case TYPE_NOP:
2718 case TYPE_FXB:
2719 case TYPE_FPD:
2720 case TYPE_FP6:
2721 case TYPE_FP7:
85d9c13c
TS
2722 return 0;
2723
2724 case TYPE_LNOP:
2725 case TYPE_SHUF:
2726 case TYPE_LOAD:
2727 case TYPE_STORE:
2728 case TYPE_BR:
2729 case TYPE_MULTI1:
2730 case TYPE_HBR:
9dcc2e87 2731 case TYPE_IPREFETCH:
85d9c13c
TS
2732 return 1;
2733 default:
2734 abort ();
2735 }
2736}
2737
9dcc2e87
TS
2738
2739/* haifa-sched.c has a static variable that keeps track of the current
2740 cycle. It is passed to spu_sched_reorder, and we record it here for
2741 use by spu_sched_variable_issue. It won't be accurate if the
2742 scheduler updates it's clock_var between the two calls. */
2743static int clock_var;
2744
2745/* This is used to keep track of insn alignment. Set to 0 at the
2746 beginning of each block and increased by the "length" attr of each
2747 insn scheduled. */
2748static int spu_sched_length;
2749
2750/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2751 ready list appropriately in spu_sched_reorder(). */
2752static int pipe0_clock;
2753static int pipe1_clock;
2754
2755static int prev_clock_var;
2756
2757static int prev_priority;
2758
2759/* The SPU needs to load the next ilb sometime during the execution of
2760 the previous ilb. There is a potential conflict if every cycle has a
2761 load or store. To avoid the conflict we make sure the load/store
2762 unit is free for at least one cycle during the execution of insns in
2763 the previous ilb. */
2764static int spu_ls_first;
2765static int prev_ls_clock;
2766
2767static void
2768spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2769 int max_ready ATTRIBUTE_UNUSED)
2770{
2771 spu_sched_length = 0;
2772}
2773
2774static void
2775spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2776 int max_ready ATTRIBUTE_UNUSED)
2777{
2778 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2779 {
2780 /* When any block might be at least 8-byte aligned, assume they
2781 will all be at least 8-byte aligned to make sure dual issue
2782 works out correctly. */
2783 spu_sched_length = 0;
2784 }
2785 spu_ls_first = INT_MAX;
2786 clock_var = -1;
2787 prev_ls_clock = -1;
2788 pipe0_clock = -1;
2789 pipe1_clock = -1;
2790 prev_clock_var = -1;
2791 prev_priority = -1;
2792}
2793
85d9c13c 2794static int
9dcc2e87 2795spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
23c39aaa 2796 int verbose ATTRIBUTE_UNUSED,
ac44248e 2797 rtx_insn *insn, int more)
85d9c13c 2798{
9dcc2e87
TS
2799 int len;
2800 int p;
85d9c13c
TS
2801 if (GET_CODE (PATTERN (insn)) == USE
2802 || GET_CODE (PATTERN (insn)) == CLOBBER
9dcc2e87
TS
2803 || (len = get_attr_length (insn)) == 0)
2804 return more;
2805
2806 spu_sched_length += len;
2807
2808 /* Reset on inline asm */
2809 if (INSN_CODE (insn) == -1)
2810 {
2811 spu_ls_first = INT_MAX;
2812 pipe0_clock = -1;
2813 pipe1_clock = -1;
2814 return 0;
2815 }
2816 p = get_pipe (insn);
2817 if (p == 0)
2818 pipe0_clock = clock_var;
2819 else
2820 pipe1_clock = clock_var;
2821
2822 if (in_spu_reorg)
2823 {
2824 if (clock_var - prev_ls_clock > 1
2825 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2826 spu_ls_first = INT_MAX;
2827 if (uses_ls_unit (insn))
2828 {
2829 if (spu_ls_first == INT_MAX)
2830 spu_ls_first = spu_sched_length;
2831 prev_ls_clock = clock_var;
2832 }
2833
2834 /* The scheduler hasn't inserted the nop, but we will later on.
2835 Include those nops in spu_sched_length. */
2836 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2837 spu_sched_length += 4;
2838 prev_clock_var = clock_var;
2839
2840 /* more is -1 when called from spu_sched_reorder for new insns
2841 that don't have INSN_PRIORITY */
2842 if (more >= 0)
2843 prev_priority = INSN_PRIORITY (insn);
2844 }
2845
073a8998 2846 /* Always try issuing more insns. spu_sched_reorder will decide
9dcc2e87
TS
2847 when the cycle should be advanced. */
2848 return 1;
2849}
2850
2851/* This function is called for both TARGET_SCHED_REORDER and
2852 TARGET_SCHED_REORDER2. */
2853static int
2854spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
ce1ce33a 2855 rtx_insn **ready, int *nreadyp, int clock)
9dcc2e87
TS
2856{
2857 int i, nready = *nreadyp;
2858 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
ce1ce33a 2859 rtx_insn *insn;
9dcc2e87
TS
2860
2861 clock_var = clock;
2862
2863 if (nready <= 0 || pipe1_clock >= clock)
2864 return 0;
2865
2866 /* Find any rtl insns that don't generate assembly insns and schedule
2867 them first. */
2868 for (i = nready - 1; i >= 0; i--)
2869 {
2870 insn = ready[i];
2871 if (INSN_CODE (insn) == -1
2872 || INSN_CODE (insn) == CODE_FOR_blockage
eec9405e 2873 || (INSN_P (insn) && get_attr_length (insn) == 0))
9dcc2e87
TS
2874 {
2875 ready[i] = ready[nready - 1];
2876 ready[nready - 1] = insn;
2877 return 1;
2878 }
2879 }
2880
2881 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2882 for (i = 0; i < nready; i++)
2883 if (INSN_CODE (ready[i]) != -1)
2884 {
2885 insn = ready[i];
2886 switch (get_attr_type (insn))
2887 {
2888 default:
2889 case TYPE_MULTI0:
2890 case TYPE_CONVERT:
2891 case TYPE_FX2:
2892 case TYPE_FX3:
2893 case TYPE_SPR:
2894 case TYPE_NOP:
2895 case TYPE_FXB:
2896 case TYPE_FPD:
2897 case TYPE_FP6:
2898 case TYPE_FP7:
2899 pipe_0 = i;
2900 break;
2901 case TYPE_LOAD:
2902 case TYPE_STORE:
2903 pipe_ls = i;
ae51afc5 2904 /* FALLTHRU */
9dcc2e87
TS
2905 case TYPE_LNOP:
2906 case TYPE_SHUF:
2907 case TYPE_BR:
2908 case TYPE_MULTI1:
2909 case TYPE_HBR:
2910 pipe_1 = i;
2911 break;
2912 case TYPE_IPREFETCH:
2913 pipe_hbrp = i;
2914 break;
2915 }
2916 }
2917
2918 /* In the first scheduling phase, schedule loads and stores together
2919 to increase the chance they will get merged during postreload CSE. */
2920 if (!reload_completed && pipe_ls >= 0)
2921 {
2922 insn = ready[pipe_ls];
2923 ready[pipe_ls] = ready[nready - 1];
2924 ready[nready - 1] = insn;
2925 return 1;
2926 }
2927
2928 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2929 if (pipe_hbrp >= 0)
2930 pipe_1 = pipe_hbrp;
2931
2932 /* When we have loads/stores in every cycle of the last 15 insns and
2933 we are about to schedule another load/store, emit an hbrp insn
2934 instead. */
2935 if (in_spu_reorg
2936 && spu_sched_length - spu_ls_first >= 4 * 15
2937 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2938 {
2939 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2940 recog_memoized (insn);
2941 if (pipe0_clock < clock)
2942 PUT_MODE (insn, TImode);
2943 spu_sched_variable_issue (file, verbose, insn, -1);
2944 return 0;
2945 }
2946
2947 /* In general, we want to emit nops to increase dual issue, but dual
2948 issue isn't faster when one of the insns could be scheduled later
2949 without effecting the critical path. We look at INSN_PRIORITY to
2950 make a good guess, but it isn't perfect so -mdual-nops=n can be
2951 used to effect it. */
2952 if (in_spu_reorg && spu_dual_nops < 10)
2953 {
073a8998 2954 /* When we are at an even address and we are not issuing nops to
9dcc2e87
TS
2955 improve scheduling then we need to advance the cycle. */
2956 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2957 && (spu_dual_nops == 0
2958 || (pipe_1 != -1
2959 && prev_priority >
2960 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2961 return 0;
2962
2963 /* When at an odd address, schedule the highest priority insn
2964 without considering pipeline. */
2965 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2966 && (spu_dual_nops == 0
2967 || (prev_priority >
2968 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2969 return 1;
2970 }
2971
2972
2973 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2974 pipe0 insn in the ready list, schedule it. */
2975 if (pipe0_clock < clock && pipe_0 >= 0)
2976 schedule_i = pipe_0;
2977
2978 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2979 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2980 else
2981 schedule_i = pipe_1;
2982
2983 if (schedule_i > -1)
2984 {
2985 insn = ready[schedule_i];
2986 ready[schedule_i] = ready[nready - 1];
2987 ready[nready - 1] = insn;
2988 return 1;
2989 }
2990 return 0;
85d9c13c
TS
2991}
2992
2993/* INSN is dependent on DEP_INSN. */
2994static int
b505225b
TS
2995spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2996 int cost, unsigned int)
85d9c13c 2997{
9dcc2e87
TS
2998 rtx set;
2999
3000 /* The blockage pattern is used to prevent instructions from being
3001 moved across it and has no cost. */
3002 if (INSN_CODE (insn) == CODE_FOR_blockage
3003 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3004 return 0;
3005
eec9405e
TS
3006 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3007 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
9dcc2e87
TS
3008 return 0;
3009
3010 /* Make sure hbrps are spread out. */
3011 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3012 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3013 return 8;
3014
3015 /* Make sure hints and hbrps are 2 cycles apart. */
3016 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3017 || INSN_CODE (insn) == CODE_FOR_hbr)
3018 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3019 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3020 return 2;
3021
3022 /* An hbrp has no real dependency on other insns. */
3023 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3024 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3025 return 0;
3026
3027 /* Assuming that it is unlikely an argument register will be used in
3028 the first cycle of the called function, we reduce the cost for
3029 slightly better scheduling of dep_insn. When not hinted, the
3030 mispredicted branch would hide the cost as well. */
3031 if (CALL_P (insn))
3032 {
3033 rtx target = get_branch_target (insn);
3034 if (GET_CODE (target) != REG || !set_of (target, insn))
3035 return cost - 2;
3036 return cost;
3037 }
3038
3039 /* And when returning from a function, let's assume the return values
3040 are completed sooner too. */
3041 if (CALL_P (dep_insn))
85d9c13c 3042 return cost - 2;
9dcc2e87
TS
3043
3044 /* Make sure an instruction that loads from the back chain is schedule
3045 away from the return instruction so a hint is more likely to get
3046 issued. */
3047 if (INSN_CODE (insn) == CODE_FOR__return
3048 && (set = single_set (dep_insn))
3049 && GET_CODE (SET_DEST (set)) == REG
3050 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3051 return 20;
3052
85d9c13c
TS
3053 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3054 scheduler makes every insn in a block anti-dependent on the final
3055 jump_insn. We adjust here so higher cost insns will get scheduled
3056 earlier. */
b505225b 3057 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
b198261f 3058 return insn_cost (dep_insn) - 3;
9dcc2e87 3059
85d9c13c
TS
3060 return cost;
3061}
3062\f
3063/* Create a CONST_DOUBLE from a string. */
984514ac 3064rtx
ef4bddc2 3065spu_float_const (const char *string, machine_mode mode)
85d9c13c
TS
3066{
3067 REAL_VALUE_TYPE value;
3068 value = REAL_VALUE_ATOF (string, mode);
555affd7 3069 return const_double_from_real_value (value, mode);
85d9c13c
TS
3070}
3071
85d9c13c
TS
3072int
3073spu_constant_address_p (rtx x)
3074{
3075 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3076 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3077 || GET_CODE (x) == HIGH);
3078}
3079
3080static enum spu_immediate
3081which_immediate_load (HOST_WIDE_INT val)
3082{
3083 gcc_assert (val == trunc_int_for_mode (val, SImode));
3084
3085 if (val >= -0x8000 && val <= 0x7fff)
3086 return SPU_IL;
3087 if (val >= 0 && val <= 0x3ffff)
3088 return SPU_ILA;
3089 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3090 return SPU_ILH;
3091 if ((val & 0xffff) == 0)
3092 return SPU_ILHU;
3093
3094 return SPU_NONE;
3095}
3096
a1c6e4b8
TS
3097/* Return true when OP can be loaded by one of the il instructions, or
3098 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
85d9c13c 3099int
ef4bddc2 3100immediate_load_p (rtx op, machine_mode mode)
a1c6e4b8
TS
3101{
3102 if (CONSTANT_P (op))
3103 {
3104 enum immediate_class c = classify_immediate (op, mode);
73701e27 3105 return c == IC_IL1 || c == IC_IL1s
6fb5fa3c 3106 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
a1c6e4b8
TS
3107 }
3108 return 0;
3109}
3110
3111/* Return true if the first SIZE bytes of arr is a constant that can be
3112 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3113 represent the size and offset of the instruction to use. */
3114static int
3115cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3116{
3117 int cpat, run, i, start;
3118 cpat = 1;
3119 run = 0;
3120 start = -1;
3121 for (i = 0; i < size && cpat; i++)
3122 if (arr[i] != i+16)
3123 {
3124 if (!run)
3125 {
3126 start = i;
3127 if (arr[i] == 3)
3128 run = 1;
3129 else if (arr[i] == 2 && arr[i+1] == 3)
3130 run = 2;
3131 else if (arr[i] == 0)
3132 {
3133 while (arr[i+run] == run && i+run < 16)
3134 run++;
3135 if (run != 4 && run != 8)
3136 cpat = 0;
3137 }
3138 else
3139 cpat = 0;
3140 if ((i & (run-1)) != 0)
3141 cpat = 0;
3142 i += run;
3143 }
3144 else
3145 cpat = 0;
3146 }
1f49ae6e 3147 if (cpat && (run || size < 16))
a1c6e4b8
TS
3148 {
3149 if (run == 0)
3150 run = 1;
3151 if (prun)
3152 *prun = run;
3153 if (pstart)
3154 *pstart = start == -1 ? 16-run : start;
3155 return 1;
3156 }
3157 return 0;
3158}
3159
3160/* OP is a CONSTANT_P. Determine what instructions can be used to load
24fc18b9 3161 it into a register. MODE is only valid when OP is a CONST_INT. */
a1c6e4b8 3162static enum immediate_class
ef4bddc2 3163classify_immediate (rtx op, machine_mode mode)
85d9c13c
TS
3164{
3165 HOST_WIDE_INT val;
3166 unsigned char arr[16];
73701e27 3167 int i, j, repeated, fsmbi, repeat;
a1c6e4b8
TS
3168
3169 gcc_assert (CONSTANT_P (op));
3170
85d9c13c
TS
3171 if (GET_MODE (op) != VOIDmode)
3172 mode = GET_MODE (op);
3173
a1c6e4b8 3174 /* A V4SI const_vector with all identical symbols is ok. */
73701e27
TS
3175 if (!flag_pic
3176 && mode == V4SImode
a1c6e4b8
TS
3177 && GET_CODE (op) == CONST_VECTOR
3178 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
92695fbb
RS
3179 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3180 op = unwrap_const_vec_duplicate (op);
85d9c13c 3181
a1c6e4b8
TS
3182 switch (GET_CODE (op))
3183 {
3184 case SYMBOL_REF:
3185 case LABEL_REF:
3186 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
85d9c13c 3187
a1c6e4b8 3188 case CONST:
3f61b42f
UW
3189 /* We can never know if the resulting address fits in 18 bits and can be
3190 loaded with ila. For now, assume the address will not overflow if
3191 the displacement is "small" (fits 'K' constraint). */
3192 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3193 {
3194 rtx sym = XEXP (XEXP (op, 0), 0);
3195 rtx cst = XEXP (XEXP (op, 0), 1);
3196
3197 if (GET_CODE (sym) == SYMBOL_REF
3198 && GET_CODE (cst) == CONST_INT
3199 && satisfies_constraint_K (cst))
3200 return IC_IL1s;
3201 }
3202 return IC_IL2s;
85d9c13c 3203
a1c6e4b8
TS
3204 case HIGH:
3205 return IC_IL1s;
3206
3207 case CONST_VECTOR:
3208 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3209 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3210 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3211 return IC_POOL;
3212 /* Fall through. */
3213
3214 case CONST_INT:
3215 case CONST_DOUBLE:
3216 constant_to_array (mode, op, arr);
85d9c13c 3217
a1c6e4b8
TS
3218 /* Check that each 4-byte slot is identical. */
3219 repeated = 1;
3220 for (i = 4; i < 16; i += 4)
3221 for (j = 0; j < 4; j++)
3222 if (arr[j] != arr[i + j])
3223 repeated = 0;
3224
3225 if (repeated)
3226 {
3227 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3228 val = trunc_int_for_mode (val, SImode);
3229
3230 if (which_immediate_load (val) != SPU_NONE)
3231 return IC_IL1;
3232 }
3233
3234 /* Any mode of 2 bytes or smaller can be loaded with an il
3235 instruction. */
3236 gcc_assert (GET_MODE_SIZE (mode) > 2);
3237
3238 fsmbi = 1;
73701e27 3239 repeat = 0;
a1c6e4b8 3240 for (i = 0; i < 16 && fsmbi; i++)
73701e27
TS
3241 if (arr[i] != 0 && repeat == 0)
3242 repeat = arr[i];
3243 else if (arr[i] != 0 && arr[i] != repeat)
a1c6e4b8
TS
3244 fsmbi = 0;
3245 if (fsmbi)
73701e27 3246 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
a1c6e4b8
TS
3247
3248 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3249 return IC_CPAT;
3250
3251 if (repeated)
3252 return IC_IL2;
3253
3254 return IC_POOL;
3255 default:
3256 break;
3257 }
3258 gcc_unreachable ();
85d9c13c
TS
3259}
3260
3261static enum spu_immediate
3262which_logical_immediate (HOST_WIDE_INT val)
3263{
3264 gcc_assert (val == trunc_int_for_mode (val, SImode));
3265
3266 if (val >= -0x200 && val <= 0x1ff)
3267 return SPU_ORI;
3268 if (val >= 0 && val <= 0xffff)
3269 return SPU_IOHL;
3270 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3271 {
3272 val = trunc_int_for_mode (val, HImode);
3273 if (val >= -0x200 && val <= 0x1ff)
3274 return SPU_ORHI;
3275 if ((val & 0xff) == ((val >> 8) & 0xff))
3276 {
3277 val = trunc_int_for_mode (val, QImode);
3278 if (val >= -0x200 && val <= 0x1ff)
3279 return SPU_ORBI;
3280 }
3281 }
3282 return SPU_NONE;
3283}
3284
73701e27
TS
3285/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3286 CONST_DOUBLEs. */
3287static int
3288const_vector_immediate_p (rtx x)
3289{
3290 int i;
3291 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3292 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3293 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3294 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3295 return 0;
3296 return 1;
3297}
3298
85d9c13c 3299int
ef4bddc2 3300logical_immediate_p (rtx op, machine_mode mode)
85d9c13c
TS
3301{
3302 HOST_WIDE_INT val;
3303 unsigned char arr[16];
3304 int i, j;
3305
3306 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3307 || GET_CODE (op) == CONST_VECTOR);
3308
73701e27
TS
3309 if (GET_CODE (op) == CONST_VECTOR
3310 && !const_vector_immediate_p (op))
3311 return 0;
3312
85d9c13c
TS
3313 if (GET_MODE (op) != VOIDmode)
3314 mode = GET_MODE (op);
3315
3316 constant_to_array (mode, op, arr);
3317
3318 /* Check that bytes are repeated. */
3319 for (i = 4; i < 16; i += 4)
3320 for (j = 0; j < 4; j++)
3321 if (arr[j] != arr[i + j])
3322 return 0;
3323
3324 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3325 val = trunc_int_for_mode (val, SImode);
3326
3327 i = which_logical_immediate (val);
3328 return i != SPU_NONE && i != SPU_IOHL;
3329}
3330
3331int
ef4bddc2 3332iohl_immediate_p (rtx op, machine_mode mode)
85d9c13c
TS
3333{
3334 HOST_WIDE_INT val;
3335 unsigned char arr[16];
3336 int i, j;
3337
3338 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3339 || GET_CODE (op) == CONST_VECTOR);
3340
73701e27
TS
3341 if (GET_CODE (op) == CONST_VECTOR
3342 && !const_vector_immediate_p (op))
3343 return 0;
3344
85d9c13c
TS
3345 if (GET_MODE (op) != VOIDmode)
3346 mode = GET_MODE (op);
3347
3348 constant_to_array (mode, op, arr);
3349
3350 /* Check that bytes are repeated. */
3351 for (i = 4; i < 16; i += 4)
3352 for (j = 0; j < 4; j++)
3353 if (arr[j] != arr[i + j])
3354 return 0;
3355
3356 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3357 val = trunc_int_for_mode (val, SImode);
3358
3359 return val >= 0 && val <= 0xffff;
3360}
3361
3362int
ef4bddc2 3363arith_immediate_p (rtx op, machine_mode mode,
85d9c13c
TS
3364 HOST_WIDE_INT low, HOST_WIDE_INT high)
3365{
3366 HOST_WIDE_INT val;
3367 unsigned char arr[16];
3368 int bytes, i, j;
3369
3370 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3371 || GET_CODE (op) == CONST_VECTOR);
3372
73701e27
TS
3373 if (GET_CODE (op) == CONST_VECTOR
3374 && !const_vector_immediate_p (op))
3375 return 0;
3376
85d9c13c
TS
3377 if (GET_MODE (op) != VOIDmode)
3378 mode = GET_MODE (op);
3379
3380 constant_to_array (mode, op, arr);
3381
cb5ca315 3382 bytes = GET_MODE_UNIT_SIZE (mode);
636bfdfa 3383 mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
85d9c13c
TS
3384
3385 /* Check that bytes are repeated. */
3386 for (i = bytes; i < 16; i += bytes)
3387 for (j = 0; j < bytes; j++)
3388 if (arr[j] != arr[i + j])
3389 return 0;
3390
3391 val = arr[0];
3392 for (j = 1; j < bytes; j++)
3393 val = (val << 8) | arr[j];
3394
3395 val = trunc_int_for_mode (val, mode);
3396
3397 return val >= low && val <= high;
3398}
3399
5345cf68
TS
3400/* TRUE when op is an immediate and an exact power of 2, and given that
3401 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3402 all entries must be the same. */
3403bool
ef4bddc2 3404exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
5345cf68 3405{
ef4bddc2 3406 machine_mode int_mode;
5345cf68
TS
3407 HOST_WIDE_INT val;
3408 unsigned char arr[16];
3409 int bytes, i, j;
3410
3411 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3412 || GET_CODE (op) == CONST_VECTOR);
3413
3414 if (GET_CODE (op) == CONST_VECTOR
3415 && !const_vector_immediate_p (op))
3416 return 0;
3417
3418 if (GET_MODE (op) != VOIDmode)
3419 mode = GET_MODE (op);
3420
3421 constant_to_array (mode, op, arr);
3422
cb5ca315 3423 mode = GET_MODE_INNER (mode);
5345cf68
TS
3424
3425 bytes = GET_MODE_SIZE (mode);
636bfdfa 3426 int_mode = int_mode_for_mode (mode).require ();
5345cf68
TS
3427
3428 /* Check that bytes are repeated. */
3429 for (i = bytes; i < 16; i += bytes)
3430 for (j = 0; j < bytes; j++)
3431 if (arr[j] != arr[i + j])
3432 return 0;
3433
3434 val = arr[0];
3435 for (j = 1; j < bytes; j++)
3436 val = (val << 8) | arr[j];
3437
3438 val = trunc_int_for_mode (val, int_mode);
3439
3440 /* Currently, we only handle SFmode */
3441 gcc_assert (mode == SFmode);
3442 if (mode == SFmode)
3443 {
3444 int exp = (val >> 23) - 127;
3445 return val > 0 && (val & 0x007fffff) == 0
3446 && exp >= low && exp <= high;
3447 }
3448 return FALSE;
3449}
3450
299456f3
BE
3451/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3452
3dfc96ea
RS
3453static bool
3454ea_symbol_ref_p (const_rtx x)
299456f3 3455{
299456f3
BE
3456 tree decl;
3457
3458 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3459 {
3460 rtx plus = XEXP (x, 0);
3461 rtx op0 = XEXP (plus, 0);
3462 rtx op1 = XEXP (plus, 1);
3463 if (GET_CODE (op1) == CONST_INT)
3464 x = op0;
3465 }
3466
3467 return (GET_CODE (x) == SYMBOL_REF
3468 && (decl = SYMBOL_REF_DECL (x)) != 0
3469 && TREE_CODE (decl) == VAR_DECL
3470 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3471}
3472
85d9c13c 3473/* We accept:
a7b376ee 3474 - any 32-bit constant (SImode, SFmode)
85d9c13c 3475 - any constant that can be generated with fsmbi (any mode)
a7b376ee 3476 - a 64-bit constant where the high and low bits are identical
85d9c13c 3477 (DImode, DFmode)
a7b376ee 3478 - a 128-bit constant where the four 32-bit words match. */
1a627b35 3479bool
ef4bddc2 3480spu_legitimate_constant_p (machine_mode mode, rtx x)
85d9c13c 3481{
3dfc96ea 3482 subrtx_iterator::array_type array;
73701e27
TS
3483 if (GET_CODE (x) == HIGH)
3484 x = XEXP (x, 0);
299456f3
BE
3485
3486 /* Reject any __ea qualified reference. These can't appear in
3487 instructions but must be forced to the constant pool. */
3dfc96ea
RS
3488 FOR_EACH_SUBRTX (iter, array, x, ALL)
3489 if (ea_symbol_ref_p (*iter))
3490 return 0;
299456f3 3491
85d9c13c 3492 /* V4SI with all identical symbols is valid. */
73701e27 3493 if (!flag_pic
1a627b35 3494 && mode == V4SImode
85d9c13c
TS
3495 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3496 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
73701e27 3497 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
92695fbb 3498 return const_vec_duplicate_p (x);
85d9c13c 3499
73701e27
TS
3500 if (GET_CODE (x) == CONST_VECTOR
3501 && !const_vector_immediate_p (x))
3502 return 0;
85d9c13c
TS
3503 return 1;
3504}
3505
3506/* Valid address are:
3507 - symbol_ref, label_ref, const
3508 - reg
eec9405e 3509 - reg + const_int, where const_int is 16 byte aligned
85d9c13c
TS
3510 - reg + reg, alignment doesn't matter
3511 The alignment matters in the reg+const case because lqd and stqd
eec9405e
TS
3512 ignore the 4 least significant bits of the const. We only care about
3513 16 byte modes because the expand phase will change all smaller MEM
3514 references to TImode. */
3515static bool
ef4bddc2 3516spu_legitimate_address_p (machine_mode mode,
c6c3dba9 3517 rtx x, bool reg_ok_strict)
85d9c13c 3518{
eec9405e
TS
3519 int aligned = GET_MODE_SIZE (mode) >= 16;
3520 if (aligned
3521 && GET_CODE (x) == AND
85d9c13c 3522 && GET_CODE (XEXP (x, 1)) == CONST_INT
eec9405e 3523 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
85d9c13c
TS
3524 x = XEXP (x, 0);
3525 switch (GET_CODE (x))
3526 {
85d9c13c 3527 case LABEL_REF:
299456f3
BE
3528 return !TARGET_LARGE_MEM;
3529
eec9405e 3530 case SYMBOL_REF:
85d9c13c 3531 case CONST:
299456f3
BE
3532 /* Keep __ea references until reload so that spu_expand_mov can see them
3533 in MEMs. */
3dfc96ea 3534 if (ea_symbol_ref_p (x))
299456f3 3535 return !reload_in_progress && !reload_completed;
eec9405e 3536 return !TARGET_LARGE_MEM;
85d9c13c
TS
3537
3538 case CONST_INT:
3539 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3540
3541 case SUBREG:
3542 x = XEXP (x, 0);
ae51afc5 3543 if (!REG_P (x))
eec9405e 3544 return 0;
ae51afc5 3545 /* FALLTHRU */
85d9c13c
TS
3546
3547 case REG:
3548 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3549
3550 case PLUS:
3551 case LO_SUM:
3552 {
3553 rtx op0 = XEXP (x, 0);
3554 rtx op1 = XEXP (x, 1);
3555 if (GET_CODE (op0) == SUBREG)
3556 op0 = XEXP (op0, 0);
3557 if (GET_CODE (op1) == SUBREG)
3558 op1 = XEXP (op1, 0);
85d9c13c
TS
3559 if (GET_CODE (op0) == REG
3560 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3561 && GET_CODE (op1) == CONST_INT
2ea0be59
UW
3562 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3563 /* If virtual registers are involved, the displacement will
3564 change later on anyway, so checking would be premature.
3565 Reload will make sure the final displacement after
3566 register elimination is OK. */
3567 || op0 == arg_pointer_rtx
3568 || op0 == frame_pointer_rtx
3569 || op0 == virtual_stack_vars_rtx)
eec9405e
TS
3570 && (!aligned || (INTVAL (op1) & 15) == 0))
3571 return TRUE;
85d9c13c
TS
3572 if (GET_CODE (op0) == REG
3573 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3574 && GET_CODE (op1) == REG
3575 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
eec9405e 3576 return TRUE;
85d9c13c
TS
3577 }
3578 break;
3579
3580 default:
3581 break;
3582 }
eec9405e 3583 return FALSE;
85d9c13c
TS
3584}
3585
299456f3
BE
3586/* Like spu_legitimate_address_p, except with named addresses. */
3587static bool
ef4bddc2 3588spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
299456f3
BE
3589 bool reg_ok_strict, addr_space_t as)
3590{
3591 if (as == ADDR_SPACE_EA)
3592 return (REG_P (x) && (GET_MODE (x) == EAmode));
3593
3594 else if (as != ADDR_SPACE_GENERIC)
3595 gcc_unreachable ();
3596
3597 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3598}
3599
85d9c13c 3600/* When the address is reg + const_int, force the const_int into a
2f8e468b 3601 register. */
c9c72699 3602static rtx
85d9c13c 3603spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
ef4bddc2 3604 machine_mode mode ATTRIBUTE_UNUSED)
85d9c13c
TS
3605{
3606 rtx op0, op1;
3607 /* Make sure both operands are registers. */
3608 if (GET_CODE (x) == PLUS)
3609 {
3610 op0 = XEXP (x, 0);
3611 op1 = XEXP (x, 1);
3612 if (ALIGNED_SYMBOL_REF_P (op0))
3613 {
3614 op0 = force_reg (Pmode, op0);
3615 mark_reg_pointer (op0, 128);
3616 }
3617 else if (GET_CODE (op0) != REG)
3618 op0 = force_reg (Pmode, op0);
3619 if (ALIGNED_SYMBOL_REF_P (op1))
3620 {
3621 op1 = force_reg (Pmode, op1);
3622 mark_reg_pointer (op1, 128);
3623 }
3624 else if (GET_CODE (op1) != REG)
3625 op1 = force_reg (Pmode, op1);
3626 x = gen_rtx_PLUS (Pmode, op0, op1);
85d9c13c 3627 }
506d7b68 3628 return x;
85d9c13c
TS
3629}
3630
299456f3
BE
3631/* Like spu_legitimate_address, except with named address support. */
3632static rtx
ef4bddc2 3633spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
299456f3
BE
3634 addr_space_t as)
3635{
3636 if (as != ADDR_SPACE_GENERIC)
3637 return x;
3638
3639 return spu_legitimize_address (x, oldx, mode);
3640}
3641
2ea0be59
UW
3642/* Reload reg + const_int for out-of-range displacements. */
3643rtx
ef4bddc2 3644spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
2ea0be59
UW
3645 int opnum, int type)
3646{
3647 bool removed_and = false;
3648
3649 if (GET_CODE (ad) == AND
3650 && CONST_INT_P (XEXP (ad, 1))
3651 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3652 {
3653 ad = XEXP (ad, 0);
3654 removed_and = true;
3655 }
3656
3657 if (GET_CODE (ad) == PLUS
3658 && REG_P (XEXP (ad, 0))
3659 && CONST_INT_P (XEXP (ad, 1))
3660 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3661 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3662 {
3663 /* Unshare the sum. */
3664 ad = copy_rtx (ad);
3665
3666 /* Reload the displacement. */
3667 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3668 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3669 opnum, (enum reload_type) type);
3670
3671 /* Add back AND for alignment if we stripped it. */
3672 if (removed_and)
3673 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3674
3675 return ad;
3676 }
3677
3678 return NULL_RTX;
3679}
3680
85d9c13c
TS
3681/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3682 struct attribute_spec.handler. */
3683static tree
3684spu_handle_fndecl_attribute (tree * node,
3685 tree name,
3686 tree args ATTRIBUTE_UNUSED,
3687 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3688{
3689 if (TREE_CODE (*node) != FUNCTION_DECL)
3690 {
29d08eba
JM
3691 warning (0, "%qE attribute only applies to functions",
3692 name);
85d9c13c
TS
3693 *no_add_attrs = true;
3694 }
3695
3696 return NULL_TREE;
3697}
3698
3699/* Handle the "vector" attribute. */
3700static tree
3701spu_handle_vector_attribute (tree * node, tree name,
3702 tree args ATTRIBUTE_UNUSED,
3703 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3704{
3705 tree type = *node, result = NULL_TREE;
ef4bddc2 3706 machine_mode mode;
85d9c13c
TS
3707 int unsigned_p;
3708
3709 while (POINTER_TYPE_P (type)
3710 || TREE_CODE (type) == FUNCTION_TYPE
3711 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3712 type = TREE_TYPE (type);
3713
3714 mode = TYPE_MODE (type);
3715
3716 unsigned_p = TYPE_UNSIGNED (type);
3717 switch (mode)
3718 {
4e10a5a7 3719 case E_DImode:
85d9c13c
TS
3720 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3721 break;
4e10a5a7 3722 case E_SImode:
85d9c13c
TS
3723 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3724 break;
4e10a5a7 3725 case E_HImode:
85d9c13c
TS
3726 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3727 break;
4e10a5a7 3728 case E_QImode:
85d9c13c
TS
3729 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3730 break;
4e10a5a7 3731 case E_SFmode:
85d9c13c
TS
3732 result = V4SF_type_node;
3733 break;
4e10a5a7 3734 case E_DFmode:
85d9c13c
TS
3735 result = V2DF_type_node;
3736 break;
3737 default:
3738 break;
3739 }
3740
3741 /* Propagate qualifiers attached to the element type
3742 onto the vector type. */
3743 if (result && result != type && TYPE_QUALS (type))
3744 result = build_qualified_type (result, TYPE_QUALS (type));
3745
3746 *no_add_attrs = true; /* No need to hang on to the attribute. */
3747
3748 if (!result)
29d08eba 3749 warning (0, "%qE attribute ignored", name);
85d9c13c 3750 else
5dc11954 3751 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
85d9c13c
TS
3752
3753 return NULL_TREE;
3754}
3755
9f5ed61a 3756/* Return nonzero if FUNC is a naked function. */
85d9c13c
TS
3757static int
3758spu_naked_function_p (tree func)
3759{
3760 tree a;
3761
3762 if (TREE_CODE (func) != FUNCTION_DECL)
3763 abort ();
3764
3765 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3766 return a != NULL_TREE;
3767}
3768
3769int
3770spu_initial_elimination_offset (int from, int to)
3771{
3772 int saved_regs_size = spu_saved_regs_size ();
3773 int sp_offset = 0;
416ff32e 3774 if (!crtl->is_leaf || crtl->outgoing_args_size
85d9c13c
TS
3775 || get_frame_size () || saved_regs_size)
3776 sp_offset = STACK_POINTER_OFFSET;
3777 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7310a2da 3778 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
85d9c13c 3779 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7310a2da 3780 return get_frame_size ();
85d9c13c 3781 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38173d38 3782 return sp_offset + crtl->outgoing_args_size
85d9c13c
TS
3783 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3784 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3785 return get_frame_size () + saved_regs_size + sp_offset;
7310a2da
SSF
3786 else
3787 gcc_unreachable ();
85d9c13c
TS
3788}
3789
3790rtx
586de218 3791spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
85d9c13c 3792{
ef4bddc2 3793 machine_mode mode = TYPE_MODE (type);
85d9c13c
TS
3794 int byte_size = ((mode == BLKmode)
3795 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3796
3797 /* Make sure small structs are left justified in a register. */
3798 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3799 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3800 {
ef4bddc2 3801 machine_mode smode;
85d9c13c
TS
3802 rtvec v;
3803 int i;
3804 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3805 int n = byte_size / UNITS_PER_WORD;
3806 v = rtvec_alloc (nregs);
3807 for (i = 0; i < n; i++)
3808 {
3809 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3810 gen_rtx_REG (TImode,
3811 FIRST_RETURN_REGNUM
3812 + i),
3813 GEN_INT (UNITS_PER_WORD * i));
3814 byte_size -= UNITS_PER_WORD;
3815 }
3816
3817 if (n < nregs)
3818 {
3819 if (byte_size < 4)
3820 byte_size = 4;
f67f4dff 3821 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
85d9c13c
TS
3822 RTVEC_ELT (v, n) =
3823 gen_rtx_EXPR_LIST (VOIDmode,
3824 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3825 GEN_INT (UNITS_PER_WORD * n));
3826 }
3827 return gen_rtx_PARALLEL (mode, v);
3828 }
3829 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3830}
3831
925ed112 3832static rtx
d5cc9181 3833spu_function_arg (cumulative_args_t cum_v,
ef4bddc2 3834 machine_mode mode,
925ed112 3835 const_tree type, bool named ATTRIBUTE_UNUSED)
85d9c13c 3836{
d5cc9181 3837 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
85d9c13c
TS
3838 int byte_size;
3839
0aa88287 3840 if (*cum >= MAX_REGISTER_ARGS)
85d9c13c
TS
3841 return 0;
3842
3843 byte_size = ((mode == BLKmode)
3844 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3845
3846 /* The ABI does not allow parameters to be passed partially in
3847 reg and partially in stack. */
0aa88287 3848 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
85d9c13c
TS
3849 return 0;
3850
3851 /* Make sure small structs are left justified in a register. */
3852 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3853 && byte_size < UNITS_PER_WORD && byte_size > 0)
3854 {
ef4bddc2 3855 machine_mode smode;
85d9c13c
TS
3856 rtx gr_reg;
3857 if (byte_size < 4)
3858 byte_size = 4;
f67f4dff 3859 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
85d9c13c 3860 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
0aa88287 3861 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
85d9c13c
TS
3862 const0_rtx);
3863 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3864 }
3865 else
0aa88287 3866 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
85d9c13c
TS
3867}
3868
925ed112 3869static void
ef4bddc2 3870spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
925ed112
NF
3871 const_tree type, bool named ATTRIBUTE_UNUSED)
3872{
d5cc9181
JR
3873 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3874
925ed112
NF
3875 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3876 ? 1
3877 : mode == BLKmode
3878 ? ((int_size_in_bytes (type) + 15) / 16)
3879 : mode == VOIDmode
3880 ? 1
c43f4279 3881 : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
925ed112
NF
3882}
3883
76b0cbf8
RS
3884/* Implement TARGET_FUNCTION_ARG_PADDING. */
3885
3886static pad_direction
3887spu_function_arg_padding (machine_mode, const_tree)
3888{
3889 return PAD_UPWARD;
3890}
3891
85d9c13c
TS
3892/* Variable sized types are passed by reference. */
3893static bool
d5cc9181 3894spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
ef4bddc2 3895 machine_mode mode ATTRIBUTE_UNUSED,
586de218 3896 const_tree type, bool named ATTRIBUTE_UNUSED)
85d9c13c
TS
3897{
3898 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3899}
3900\f
3901
3902/* Var args. */
3903
3904/* Create and return the va_list datatype.
3905
3906 On SPU, va_list is an array type equivalent to
3907
3908 typedef struct __va_list_tag
3909 {
3910 void *__args __attribute__((__aligned(16)));
3911 void *__skip __attribute__((__aligned(16)));
3912
3913 } va_list[1];
3914
2f8e468b 3915 where __args points to the arg that will be returned by the next
85d9c13c
TS
3916 va_arg(), and __skip points to the previous stack frame such that
3917 when __args == __skip we should advance __args by 32 bytes. */
3918static tree
3919spu_build_builtin_va_list (void)
3920{
3921 tree f_args, f_skip, record, type_decl;
3922 bool owp;
3923
3924 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3925
3926 type_decl =
4c4bde29
AH
3927 build_decl (BUILTINS_LOCATION,
3928 TYPE_DECL, get_identifier ("__va_list_tag"), record);
85d9c13c 3929
4c4bde29
AH
3930 f_args = build_decl (BUILTINS_LOCATION,
3931 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3932 f_skip = build_decl (BUILTINS_LOCATION,
3933 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
85d9c13c
TS
3934
3935 DECL_FIELD_CONTEXT (f_args) = record;
fe37c7af 3936 SET_DECL_ALIGN (f_args, 128);
85d9c13c
TS
3937 DECL_USER_ALIGN (f_args) = 1;
3938
3939 DECL_FIELD_CONTEXT (f_skip) = record;
fe37c7af 3940 SET_DECL_ALIGN (f_skip, 128);
85d9c13c
TS
3941 DECL_USER_ALIGN (f_skip) = 1;
3942
0fd2eac2 3943 TYPE_STUB_DECL (record) = type_decl;
85d9c13c
TS
3944 TYPE_NAME (record) = type_decl;
3945 TYPE_FIELDS (record) = f_args;
910ad8de 3946 DECL_CHAIN (f_args) = f_skip;
85d9c13c
TS
3947
3948 /* We know this is being padded and we want it too. It is an internal
3949 type so hide the warnings from the user. */
3950 owp = warn_padded;
3951 warn_padded = false;
3952
3953 layout_type (record);
3954
3955 warn_padded = owp;
3956
3957 /* The correct type is an array type of one element. */
3958 return build_array_type (record, build_index_type (size_zero_node));
3959}
3960
3961/* Implement va_start by filling the va_list structure VALIST.
3962 NEXTARG points to the first anonymous stack argument.
3963
3964 The following global variables are used to initialize
3965 the va_list structure:
3966
38173d38 3967 crtl->args.info;
85d9c13c
TS
3968 the CUMULATIVE_ARGS for this function
3969
38173d38 3970 crtl->args.arg_offset_rtx:
85d9c13c
TS
3971 holds the offset of the first anonymous stack argument
3972 (relative to the virtual arg pointer). */
3973
d7bd8aeb 3974static void
85d9c13c
TS
3975spu_va_start (tree valist, rtx nextarg)
3976{
3977 tree f_args, f_skip;
3978 tree args, skip, t;
3979
3980 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
910ad8de 3981 f_skip = DECL_CHAIN (f_args);
85d9c13c 3982
86710a8b 3983 valist = build_simple_mem_ref (valist);
85d9c13c
TS
3984 args =
3985 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3986 skip =
3987 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3988
3989 /* Find the __args area. */
3990 t = make_tree (TREE_TYPE (args), nextarg);
38173d38 3991 if (crtl->args.pretend_args_size > 0)
5d49b6a7 3992 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
726a989a 3993 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
85d9c13c
TS
3994 TREE_SIDE_EFFECTS (t) = 1;
3995 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3996
3997 /* Find the __skip area. */
3998 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
5d49b6a7
RG
3999 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4000 - STACK_POINTER_OFFSET));
726a989a 4001 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
85d9c13c
TS
4002 TREE_SIDE_EFFECTS (t) = 1;
4003 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4004}
4005
4006/* Gimplify va_arg by updating the va_list structure
4007 VALIST as required to retrieve an argument of type
4008 TYPE, and returning that argument.
4009
4010 ret = va_arg(VALIST, TYPE);
4011
4012 generates code equivalent to:
4013
4014 paddedsize = (sizeof(TYPE) + 15) & -16;
4015 if (VALIST.__args + paddedsize > VALIST.__skip
4016 && VALIST.__args <= VALIST.__skip)
4017 addr = VALIST.__skip + 32;
4018 else
4019 addr = VALIST.__args;
4020 VALIST.__args = addr + paddedsize;
4021 ret = *(TYPE *)addr;
4022 */
4023static tree
726a989a
RB
4024spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4025 gimple_seq * post_p ATTRIBUTE_UNUSED)
85d9c13c
TS
4026{
4027 tree f_args, f_skip;
4028 tree args, skip;
4029 HOST_WIDE_INT size, rsize;
5d49b6a7 4030 tree addr, tmp;
85d9c13c
TS
4031 bool pass_by_reference_p;
4032
4033 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
910ad8de 4034 f_skip = DECL_CHAIN (f_args);
85d9c13c 4035
85d9c13c
TS
4036 args =
4037 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4038 skip =
4039 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4040
4041 addr = create_tmp_var (ptr_type_node, "va_arg");
85d9c13c
TS
4042
4043 /* if an object is dynamically sized, a pointer to it is passed
4044 instead of the object itself. */
a207915a
UW
4045 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4046 false);
85d9c13c
TS
4047 if (pass_by_reference_p)
4048 type = build_pointer_type (type);
4049 size = int_size_in_bytes (type);
4050 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4051
4052 /* build conditional expression to calculate addr. The expression
4053 will be gimplified later. */
5d49b6a7 4054 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
85d9c13c 4055 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
726a989a
RB
4056 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4057 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4058 unshare_expr (skip)));
85d9c13c
TS
4059
4060 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
5d49b6a7
RG
4061 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4062 unshare_expr (args));
85d9c13c 4063
726a989a 4064 gimplify_assign (addr, tmp, pre_p);
85d9c13c
TS
4065
4066 /* update VALIST.__args */
5d49b6a7 4067 tmp = fold_build_pointer_plus_hwi (addr, rsize);
726a989a 4068 gimplify_assign (unshare_expr (args), tmp, pre_p);
85d9c13c 4069
5b21f0f3
RG
4070 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4071 addr);
85d9c13c
TS
4072
4073 if (pass_by_reference_p)
4074 addr = build_va_arg_indirect_ref (addr);
4075
4076 return build_va_arg_indirect_ref (addr);
4077}
4078
4079/* Save parameter registers starting with the register that corresponds
4080 to the first unnamed parameters. If the first unnamed parameter is
4081 in the stack then save no registers. Set pretend_args_size to the
4082 amount of space needed to save the registers. */
d5cc9181 4083static void
ef4bddc2 4084spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
85d9c13c
TS
4085 tree type, int *pretend_size, int no_rtl)
4086{
4087 if (!no_rtl)
4088 {
4089 rtx tmp;
4090 int regno;
4091 int offset;
d5cc9181 4092 int ncum = *get_cumulative_args (cum);
85d9c13c
TS
4093
4094 /* cum currently points to the last named argument, we want to
4095 start at the next argument. */
d5cc9181 4096 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
85d9c13c
TS
4097
4098 offset = -STACK_POINTER_OFFSET;
4099 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4100 {
4101 tmp = gen_frame_mem (V4SImode,
0a81f074 4102 plus_constant (Pmode, virtual_incoming_args_rtx,
85d9c13c
TS
4103 offset));
4104 emit_move_insn (tmp,
4105 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4106 offset += 16;
4107 }
4108 *pretend_size = offset + STACK_POINTER_OFFSET;
4109 }
4110}
4111\f
5efd84c5 4112static void
85d9c13c
TS
4113spu_conditional_register_usage (void)
4114{
4115 if (flag_pic)
4116 {
4117 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4118 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4119 }
85d9c13c
TS
4120}
4121
eec9405e
TS
4122/* This is called any time we inspect the alignment of a register for
4123 addresses. */
85d9c13c 4124static int
eec9405e 4125reg_aligned_for_addr (rtx x)
85d9c13c 4126{
eec9405e
TS
4127 int regno =
4128 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4129 return REGNO_POINTER_ALIGN (regno) >= 128;
85d9c13c
TS
4130}
4131
3d9cd79a
UW
4132/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4133 into its SYMBOL_REF_FLAGS. */
4134static void
4135spu_encode_section_info (tree decl, rtx rtl, int first)
4136{
4137 default_encode_section_info (decl, rtl, first);
4138
4139 /* If a variable has a forced alignment to < 16 bytes, mark it with
4140 SYMBOL_FLAG_ALIGN1. */
4141 if (TREE_CODE (decl) == VAR_DECL
4142 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4143 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4144}
4145
85d9c13c
TS
4146/* Return TRUE if we are certain the mem refers to a complete object
4147 which is both 16-byte aligned and padded to a 16-byte boundary. This
4148 would make it safe to store with a single instruction.
4149 We guarantee the alignment and padding for static objects by aligning
4150 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4151 FIXME: We currently cannot guarantee this for objects on the stack
4152 because assign_parm_setup_stack calls assign_stack_local with the
4153 alignment of the parameter mode and in that case the alignment never
4154 gets adjusted by LOCAL_ALIGNMENT. */
4155static int
4156store_with_one_insn_p (rtx mem)
4157{
ef4bddc2 4158 machine_mode mode = GET_MODE (mem);
85d9c13c 4159 rtx addr = XEXP (mem, 0);
eec9405e 4160 if (mode == BLKmode)
85d9c13c 4161 return 0;
eec9405e
TS
4162 if (GET_MODE_SIZE (mode) >= 16)
4163 return 1;
85d9c13c
TS
4164 /* Only static objects. */
4165 if (GET_CODE (addr) == SYMBOL_REF)
4166 {
4167 /* We use the associated declaration to make sure the access is
2f8e468b 4168 referring to the whole object.
dd5a833e 4169 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
85d9c13c
TS
4170 if it is necessary. Will there be cases where one exists, and
4171 the other does not? Will there be cases where both exist, but
4172 have different types? */
4173 tree decl = MEM_EXPR (mem);
4174 if (decl
4175 && TREE_CODE (decl) == VAR_DECL
4176 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4177 return 1;
4178 decl = SYMBOL_REF_DECL (addr);
4179 if (decl
4180 && TREE_CODE (decl) == VAR_DECL
4181 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4182 return 1;
4183 }
4184 return 0;
4185}
4186
eec9405e
TS
4187/* Return 1 when the address is not valid for a simple load and store as
4188 required by the '_mov*' patterns. We could make this less strict
4189 for loads, but we prefer mem's to look the same so they are more
4190 likely to be merged. */
4191static int
4192address_needs_split (rtx mem)
4193{
4194 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4195 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4196 || !(store_with_one_insn_p (mem)
4197 || mem_is_padded_component_ref (mem))))
4198 return 1;
4199
4200 return 0;
4201}
4202
299456f3
BE
4203static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4204static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4205static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4206
4207/* MEM is known to be an __ea qualified memory access. Emit a call to
4208 fetch the ppu memory to local store, and return its address in local
4209 store. */
4210
4211static void
4212ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4213{
4214 if (is_store)
4215 {
4216 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4217 if (!cache_fetch_dirty)
4218 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4219 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
db69559b 4220 ea_addr, EAmode, ndirty, SImode);
299456f3
BE
4221 }
4222 else
4223 {
4224 if (!cache_fetch)
4225 cache_fetch = init_one_libfunc ("__cache_fetch");
4226 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
db69559b 4227 ea_addr, EAmode);
299456f3
BE
4228 }
4229}
4230
4231/* Like ea_load_store, but do the cache tag comparison and, for stores,
4232 dirty bit marking, inline.
4233
4234 The cache control data structure is an array of
4235
4236 struct __cache_tag_array
4237 {
4238 unsigned int tag_lo[4];
4239 unsigned int tag_hi[4];
4240 void *data_pointer[4];
4241 int reserved[4];
4242 vector unsigned short dirty_bits[4];
4243 } */
4244
4245static void
4246ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4247{
4248 rtx ea_addr_si;
4249 HOST_WIDE_INT v;
4250 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4251 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4252 rtx index_mask = gen_reg_rtx (SImode);
4253 rtx tag_arr = gen_reg_rtx (Pmode);
4254 rtx splat_mask = gen_reg_rtx (TImode);
4255 rtx splat = gen_reg_rtx (V4SImode);
4256 rtx splat_hi = NULL_RTX;
4257 rtx tag_index = gen_reg_rtx (Pmode);
4258 rtx block_off = gen_reg_rtx (SImode);
4259 rtx tag_addr = gen_reg_rtx (Pmode);
4260 rtx tag = gen_reg_rtx (V4SImode);
4261 rtx cache_tag = gen_reg_rtx (V4SImode);
4262 rtx cache_tag_hi = NULL_RTX;
4263 rtx cache_ptrs = gen_reg_rtx (TImode);
4264 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4265 rtx tag_equal = gen_reg_rtx (V4SImode);
4266 rtx tag_equal_hi = NULL_RTX;
4267 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4268 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4269 rtx eq_index = gen_reg_rtx (SImode);
23c39aaa
DM
4270 rtx bcomp, hit_label, hit_ref, cont_label;
4271 rtx_insn *insn;
299456f3
BE
4272
4273 if (spu_ea_model != 32)
4274 {
4275 splat_hi = gen_reg_rtx (V4SImode);
4276 cache_tag_hi = gen_reg_rtx (V4SImode);
4277 tag_equal_hi = gen_reg_rtx (V4SImode);
4278 }
4279
0a81f074 4280 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
299456f3
BE
4281 emit_move_insn (tag_arr, tag_arr_sym);
4282 v = 0x0001020300010203LL;
4283 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4284 ea_addr_si = ea_addr;
4285 if (spu_ea_model != 32)
4286 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4287
4288 /* tag_index = ea_addr & (tag_array_size - 128) */
4289 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4290
4291 /* splat ea_addr to all 4 slots. */
4292 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4293 /* Similarly for high 32 bits of ea_addr. */
4294 if (spu_ea_model != 32)
4295 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4296
4297 /* block_off = ea_addr & 127 */
4298 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4299
4300 /* tag_addr = tag_arr + tag_index */
4301 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4302
4303 /* Read cache tags. */
4304 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4305 if (spu_ea_model != 32)
4306 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
0a81f074
RS
4307 plus_constant (Pmode,
4308 tag_addr, 16)));
299456f3
BE
4309
4310 /* tag = ea_addr & -128 */
4311 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4312
4313 /* Read all four cache data pointers. */
4314 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
0a81f074
RS
4315 plus_constant (Pmode,
4316 tag_addr, 32)));
299456f3
BE
4317
4318 /* Compare tags. */
4319 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4320 if (spu_ea_model != 32)
4321 {
4322 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4323 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4324 }
4325
4326 /* At most one of the tags compare equal, so tag_equal has one
4327 32-bit slot set to all 1's, with the other slots all zero.
4328 gbb picks off low bit from each byte in the 128-bit registers,
4329 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4330 we have a hit. */
4331 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4332 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4333
4334 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4335 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4336
4337 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4338 (rotating eq_index mod 16 bytes). */
4339 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4340 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4341
4342 /* Add block offset to form final data address. */
4343 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4344
4345 /* Check that we did hit. */
4346 hit_label = gen_label_rtx ();
4347 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4348 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
f7df4a84 4349 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
299456f3
BE
4350 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4351 hit_ref, pc_rtx)));
4352 /* Say that this branch is very likely to happen. */
5fa396ad 4353 add_reg_br_prob_note (insn, profile_probability::very_likely ());
299456f3
BE
4354
4355 ea_load_store (mem, is_store, ea_addr, data_addr);
4356 cont_label = gen_label_rtx ();
4357 emit_jump_insn (gen_jump (cont_label));
4358 emit_barrier ();
4359
4360 emit_label (hit_label);
4361
4362 if (is_store)
4363 {
4364 HOST_WIDE_INT v_hi;
4365 rtx dirty_bits = gen_reg_rtx (TImode);
4366 rtx dirty_off = gen_reg_rtx (SImode);
4367 rtx dirty_128 = gen_reg_rtx (TImode);
4368 rtx neg_block_off = gen_reg_rtx (SImode);
4369
4370 /* Set up mask with one dirty bit per byte of the mem we are
4371 writing, starting from top bit. */
4372 v_hi = v = -1;
4373 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4374 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4375 {
4376 v_hi = v;
4377 v = 0;
4378 }
4379 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4380
4381 /* Form index into cache dirty_bits. eq_index is one of
4382 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4383 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4384 offset to each of the four dirty_bits elements. */
4385 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4386
4387 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4388
4389 /* Rotate bit mask to proper bit. */
4390 emit_insn (gen_negsi2 (neg_block_off, block_off));
4391 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4392 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4393
4394 /* Or in the new dirty bits. */
4395 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4396
4397 /* Store. */
4398 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4399 }
4400
4401 emit_label (cont_label);
4402}
4403
4404static rtx
4405expand_ea_mem (rtx mem, bool is_store)
4406{
4407 rtx ea_addr;
4408 rtx data_addr = gen_reg_rtx (Pmode);
4409 rtx new_mem;
4410
4411 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4412 if (optimize_size || optimize == 0)
4413 ea_load_store (mem, is_store, ea_addr, data_addr);
4414 else
4415 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4416
4417 if (ea_alias_set == -1)
4418 ea_alias_set = new_alias_set ();
4419
4420 /* We generate a new MEM RTX to refer to the copy of the data
4421 in the cache. We do not copy memory attributes (except the
4422 alignment) from the original MEM, as they may no longer apply
4423 to the cache copy. */
4424 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4425 set_mem_alias_set (new_mem, ea_alias_set);
4426 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4427
4428 return new_mem;
4429}
4430
85d9c13c 4431int
ef4bddc2 4432spu_expand_mov (rtx * ops, machine_mode mode)
85d9c13c
TS
4433{
4434 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
46fc2305
UW
4435 {
4436 /* Perform the move in the destination SUBREG's inner mode. */
4437 ops[0] = SUBREG_REG (ops[0]);
4438 mode = GET_MODE (ops[0]);
4439 ops[1] = gen_lowpart_common (mode, ops[1]);
4440 gcc_assert (ops[1]);
4441 }
85d9c13c
TS
4442
4443 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4444 {
4445 rtx from = SUBREG_REG (ops[1]);
304b9962 4446 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
85d9c13c
TS
4447
4448 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4449 && GET_MODE_CLASS (imode) == MODE_INT
4450 && subreg_lowpart_p (ops[1]));
4451
4452 if (GET_MODE_SIZE (imode) < 4)
4caab5ba
UW
4453 imode = SImode;
4454 if (imode != GET_MODE (from))
4455 from = gen_rtx_SUBREG (imode, from, 0);
85d9c13c
TS
4456
4457 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4458 {
947131ba
RS
4459 enum insn_code icode = convert_optab_handler (trunc_optab,
4460 mode, imode);
85d9c13c
TS
4461 emit_insn (GEN_FCN (icode) (ops[0], from));
4462 }
4463 else
4464 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4465 return 1;
4466 }
4467
4468 /* At least one of the operands needs to be a register. */
4469 if ((reload_in_progress | reload_completed) == 0
4470 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4471 {
4472 rtx temp = force_reg (mode, ops[1]);
4473 emit_move_insn (ops[0], temp);
4474 return 1;
4475 }
4476 if (reload_in_progress || reload_completed)
4477 {
a1c6e4b8
TS
4478 if (CONSTANT_P (ops[1]))
4479 return spu_split_immediate (ops);
85d9c13c
TS
4480 return 0;
4481 }
eec9405e
TS
4482
4483 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4484 extend them. */
4485 if (GET_CODE (ops[1]) == CONST_INT)
85d9c13c 4486 {
eec9405e
TS
4487 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4488 if (val != INTVAL (ops[1]))
85d9c13c 4489 {
eec9405e
TS
4490 emit_move_insn (ops[0], GEN_INT (val));
4491 return 1;
85d9c13c
TS
4492 }
4493 }
eec9405e 4494 if (MEM_P (ops[0]))
299456f3
BE
4495 {
4496 if (MEM_ADDR_SPACE (ops[0]))
4497 ops[0] = expand_ea_mem (ops[0], true);
4498 return spu_split_store (ops);
4499 }
eec9405e 4500 if (MEM_P (ops[1]))
299456f3
BE
4501 {
4502 if (MEM_ADDR_SPACE (ops[1]))
4503 ops[1] = expand_ea_mem (ops[1], false);
4504 return spu_split_load (ops);
4505 }
eec9405e 4506
85d9c13c
TS
4507 return 0;
4508}
4509
eec9405e
TS
4510static void
4511spu_convert_move (rtx dst, rtx src)
85d9c13c 4512{
ef4bddc2 4513 machine_mode mode = GET_MODE (dst);
636bfdfa 4514 machine_mode int_mode = int_mode_for_mode (mode).require ();
eec9405e
TS
4515 rtx reg;
4516 gcc_assert (GET_MODE (src) == TImode);
4517 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
f7df4a84 4518 emit_insn (gen_rtx_SET (reg,
eec9405e
TS
4519 gen_rtx_TRUNCATE (int_mode,
4520 gen_rtx_LSHIFTRT (TImode, src,
4521 GEN_INT (int_mode == DImode ? 64 : 96)))));
4522 if (int_mode != mode)
4523 {
4524 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4525 emit_move_insn (dst, reg);
4526 }
4527}
85d9c13c 4528
eec9405e
TS
4529/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4530 the address from SRC and SRC+16. Return a REG or CONST_INT that
4531 specifies how many bytes to rotate the loaded registers, plus any
4532 extra from EXTRA_ROTQBY. The address and rotate amounts are
4533 normalized to improve merging of loads and rotate computations. */
4534static rtx
4535spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4536{
4537 rtx addr = XEXP (src, 0);
4538 rtx p0, p1, rot, addr0, addr1;
4539 int rot_amt;
85d9c13c
TS
4540
4541 rot = 0;
4542 rot_amt = 0;
eec9405e
TS
4543
4544 if (MEM_ALIGN (src) >= 128)
4545 /* Address is already aligned; simply perform a TImode load. */ ;
4546 else if (GET_CODE (addr) == PLUS)
85d9c13c
TS
4547 {
4548 /* 8 cases:
4549 aligned reg + aligned reg => lqx
4550 aligned reg + unaligned reg => lqx, rotqby
4551 aligned reg + aligned const => lqd
4552 aligned reg + unaligned const => lqd, rotqbyi
4553 unaligned reg + aligned reg => lqx, rotqby
4554 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4555 unaligned reg + aligned const => lqd, rotqby
4556 unaligned reg + unaligned const -> not allowed by legitimate address
4557 */
4558 p0 = XEXP (addr, 0);
4559 p1 = XEXP (addr, 1);
eec9405e 4560 if (!reg_aligned_for_addr (p0))
85d9c13c 4561 {
eec9405e 4562 if (REG_P (p1) && !reg_aligned_for_addr (p1))
85d9c13c 4563 {
eec9405e
TS
4564 rot = gen_reg_rtx (SImode);
4565 emit_insn (gen_addsi3 (rot, p0, p1));
4566 }
4567 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4568 {
4569 if (INTVAL (p1) > 0
4570 && REG_POINTER (p0)
4571 && INTVAL (p1) * BITS_PER_UNIT
4572 < REGNO_POINTER_ALIGN (REGNO (p0)))
4573 {
4574 rot = gen_reg_rtx (SImode);
4575 emit_insn (gen_addsi3 (rot, p0, p1));
4576 addr = p0;
4577 }
4578 else
4579 {
4580 rtx x = gen_reg_rtx (SImode);
4581 emit_move_insn (x, p1);
4582 if (!spu_arith_operand (p1, SImode))
4583 p1 = x;
4584 rot = gen_reg_rtx (SImode);
4585 emit_insn (gen_addsi3 (rot, p0, p1));
4586 addr = gen_rtx_PLUS (Pmode, p0, x);
4587 }
85d9c13c
TS
4588 }
4589 else
4590 rot = p0;
4591 }
4592 else
4593 {
4594 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4595 {
4596 rot_amt = INTVAL (p1) & 15;
eec9405e
TS
4597 if (INTVAL (p1) & -16)
4598 {
4599 p1 = GEN_INT (INTVAL (p1) & -16);
4600 addr = gen_rtx_PLUS (SImode, p0, p1);
4601 }
4602 else
4603 addr = p0;
85d9c13c 4604 }
eec9405e 4605 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
85d9c13c
TS
4606 rot = p1;
4607 }
4608 }
eec9405e 4609 else if (REG_P (addr))
85d9c13c 4610 {
eec9405e 4611 if (!reg_aligned_for_addr (addr))
85d9c13c
TS
4612 rot = addr;
4613 }
4614 else if (GET_CODE (addr) == CONST)
4615 {
4616 if (GET_CODE (XEXP (addr, 0)) == PLUS
4617 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4618 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4619 {
4620 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4621 if (rot_amt & -16)
4622 addr = gen_rtx_CONST (Pmode,
4623 gen_rtx_PLUS (Pmode,
4624 XEXP (XEXP (addr, 0), 0),
4625 GEN_INT (rot_amt & -16)));
4626 else
4627 addr = XEXP (XEXP (addr, 0), 0);
4628 }
4629 else
eec9405e
TS
4630 {
4631 rot = gen_reg_rtx (Pmode);
4632 emit_move_insn (rot, addr);
4633 }
85d9c13c
TS
4634 }
4635 else if (GET_CODE (addr) == CONST_INT)
4636 {
4637 rot_amt = INTVAL (addr);
4638 addr = GEN_INT (rot_amt & -16);
4639 }
4640 else if (!ALIGNED_SYMBOL_REF_P (addr))
eec9405e
TS
4641 {
4642 rot = gen_reg_rtx (Pmode);
4643 emit_move_insn (rot, addr);
4644 }
85d9c13c 4645
eec9405e 4646 rot_amt += extra_rotby;
85d9c13c
TS
4647
4648 rot_amt &= 15;
4649
4650 if (rot && rot_amt)
4651 {
eec9405e
TS
4652 rtx x = gen_reg_rtx (SImode);
4653 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4654 rot = x;
85d9c13c
TS
4655 rot_amt = 0;
4656 }
eec9405e
TS
4657 if (!rot && rot_amt)
4658 rot = GEN_INT (rot_amt);
4659
4660 addr0 = copy_rtx (addr);
4661 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4662 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4663
4664 if (dst1)
4665 {
0a81f074 4666 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
eec9405e
TS
4667 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4668 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4669 }
85d9c13c 4670
eec9405e
TS
4671 return rot;
4672}
4673
4674int
4675spu_split_load (rtx * ops)
4676{
ef4bddc2 4677 machine_mode mode = GET_MODE (ops[0]);
eec9405e
TS
4678 rtx addr, load, rot;
4679 int rot_amt;
85d9c13c 4680
eec9405e
TS
4681 if (GET_MODE_SIZE (mode) >= 16)
4682 return 0;
85d9c13c 4683
eec9405e
TS
4684 addr = XEXP (ops[1], 0);
4685 gcc_assert (GET_CODE (addr) != AND);
4686
4687 if (!address_needs_split (ops[1]))
4688 {
4689 ops[1] = change_address (ops[1], TImode, addr);
4690 load = gen_reg_rtx (TImode);
4691 emit_insn (gen__movti (load, ops[1]));
4692 spu_convert_move (ops[0], load);
4693 return 1;
4694 }
4695
4696 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4697
4698 load = gen_reg_rtx (TImode);
4699 rot = spu_expand_load (load, 0, ops[1], rot_amt);
85d9c13c
TS
4700
4701 if (rot)
4702 emit_insn (gen_rotqby_ti (load, load, rot));
85d9c13c 4703
eec9405e
TS
4704 spu_convert_move (ops[0], load);
4705 return 1;
85d9c13c
TS
4706}
4707
eec9405e 4708int
85d9c13c
TS
4709spu_split_store (rtx * ops)
4710{
ef4bddc2 4711 machine_mode mode = GET_MODE (ops[0]);
eec9405e 4712 rtx reg;
85d9c13c
TS
4713 rtx addr, p0, p1, p1_lo, smem;
4714 int aform;
4715 int scalar;
4716
eec9405e
TS
4717 if (GET_MODE_SIZE (mode) >= 16)
4718 return 0;
4719
85d9c13c 4720 addr = XEXP (ops[0], 0);
eec9405e
TS
4721 gcc_assert (GET_CODE (addr) != AND);
4722
4723 if (!address_needs_split (ops[0]))
4724 {
4725 reg = gen_reg_rtx (TImode);
4726 emit_insn (gen_spu_convert (reg, ops[1]));
4727 ops[0] = change_address (ops[0], TImode, addr);
4728 emit_move_insn (ops[0], reg);
4729 return 1;
4730 }
85d9c13c
TS
4731
4732 if (GET_CODE (addr) == PLUS)
4733 {
4734 /* 8 cases:
4735 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4736 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4737 aligned reg + aligned const => lqd, c?d, shuf, stqx
4738 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4739 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4740 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4741 unaligned reg + aligned const => lqd, c?d, shuf, stqx
eec9405e 4742 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
85d9c13c
TS
4743 */
4744 aform = 0;
4745 p0 = XEXP (addr, 0);
4746 p1 = p1_lo = XEXP (addr, 1);
eec9405e 4747 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
85d9c13c
TS
4748 {
4749 p1_lo = GEN_INT (INTVAL (p1) & 15);
eec9405e
TS
4750 if (reg_aligned_for_addr (p0))
4751 {
4752 p1 = GEN_INT (INTVAL (p1) & -16);
4753 if (p1 == const0_rtx)
4754 addr = p0;
4755 else
4756 addr = gen_rtx_PLUS (SImode, p0, p1);
4757 }
4758 else
4759 {
4760 rtx x = gen_reg_rtx (SImode);
4761 emit_move_insn (x, p1);
4762 addr = gen_rtx_PLUS (SImode, p0, x);
4763 }
85d9c13c
TS
4764 }
4765 }
eec9405e 4766 else if (REG_P (addr))
85d9c13c
TS
4767 {
4768 aform = 0;
4769 p0 = addr;
4770 p1 = p1_lo = const0_rtx;
4771 }
4772 else
4773 {
4774 aform = 1;
4775 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4776 p1 = 0; /* aform doesn't use p1 */
4777 p1_lo = addr;
4778 if (ALIGNED_SYMBOL_REF_P (addr))
4779 p1_lo = const0_rtx;
eec9405e
TS
4780 else if (GET_CODE (addr) == CONST
4781 && GET_CODE (XEXP (addr, 0)) == PLUS
4782 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4783 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
85d9c13c 4784 {
eec9405e
TS
4785 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4786 if ((v & -16) != 0)
4787 addr = gen_rtx_CONST (Pmode,
4788 gen_rtx_PLUS (Pmode,
4789 XEXP (XEXP (addr, 0), 0),
4790 GEN_INT (v & -16)));
4791 else
4792 addr = XEXP (XEXP (addr, 0), 0);
4793 p1_lo = GEN_INT (v & 15);
85d9c13c
TS
4794 }
4795 else if (GET_CODE (addr) == CONST_INT)
4796 {
4797 p1_lo = GEN_INT (INTVAL (addr) & 15);
4798 addr = GEN_INT (INTVAL (addr) & -16);
4799 }
eec9405e
TS
4800 else
4801 {
4802 p1_lo = gen_reg_rtx (SImode);
4803 emit_move_insn (p1_lo, addr);
4804 }
85d9c13c
TS
4805 }
4806
d707fc77 4807 gcc_assert (aform == 0 || aform == 1);
eec9405e 4808 reg = gen_reg_rtx (TImode);
09aad82b 4809
85d9c13c
TS
4810 scalar = store_with_one_insn_p (ops[0]);
4811 if (!scalar)
4812 {
4813 /* We could copy the flags from the ops[0] MEM to mem here,
4814 We don't because we want this load to be optimized away if
4815 possible, and copying the flags will prevent that in certain
4816 cases, e.g. consider the volatile flag. */
4817
eec9405e 4818 rtx pat = gen_reg_rtx (TImode);
09aad82b
TS
4819 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4820 set_mem_alias_set (lmem, 0);
4821 emit_insn (gen_movti (reg, lmem));
85d9c13c 4822
eec9405e 4823 if (!p0 || reg_aligned_for_addr (p0))
85d9c13c
TS
4824 p0 = stack_pointer_rtx;
4825 if (!p1_lo)
4826 p1_lo = const0_rtx;
4827
4828 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4829 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4830 }
85d9c13c
TS
4831 else
4832 {
4833 if (GET_CODE (ops[1]) == REG)
4834 emit_insn (gen_spu_convert (reg, ops[1]));
4835 else if (GET_CODE (ops[1]) == SUBREG)
4836 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4837 else
4838 abort ();
4839 }
4840
4841 if (GET_MODE_SIZE (mode) < 4 && scalar)
eec9405e
TS
4842 emit_insn (gen_ashlti3
4843 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
85d9c13c 4844
eec9405e 4845 smem = change_address (ops[0], TImode, copy_rtx (addr));
85d9c13c
TS
4846 /* We can't use the previous alias set because the memory has changed
4847 size and can potentially overlap objects of other types. */
4848 set_mem_alias_set (smem, 0);
4849
09aad82b 4850 emit_insn (gen_movti (smem, reg));
eec9405e 4851 return 1;
85d9c13c
TS
4852}
4853
4854/* Return TRUE if X is MEM which is a struct member reference
4855 and the member can safely be loaded and stored with a single
4856 instruction because it is padded. */
4857static int
4858mem_is_padded_component_ref (rtx x)
4859{
4860 tree t = MEM_EXPR (x);
4861 tree r;
4862 if (!t || TREE_CODE (t) != COMPONENT_REF)
4863 return 0;
4864 t = TREE_OPERAND (t, 1);
4865 if (!t || TREE_CODE (t) != FIELD_DECL
4866 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4867 return 0;
4868 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4869 r = DECL_FIELD_CONTEXT (t);
4870 if (!r || TREE_CODE (r) != RECORD_TYPE)
4871 return 0;
4872 /* Make sure they are the same mode */
4873 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4874 return 0;
4875 /* If there are no following fields then the field alignment assures
2f8e468b
KH
4876 the structure is padded to the alignment which means this field is
4877 padded too. */
85d9c13c
TS
4878 if (TREE_CHAIN (t) == 0)
4879 return 1;
4880 /* If the following field is also aligned then this field will be
4881 padded. */
4882 t = TREE_CHAIN (t);
4883 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4884 return 1;
4885 return 0;
4886}
4887
32fb22af
SL
4888/* Parse the -mfixed-range= option string. */
4889static void
4890fix_range (const char *const_str)
4891{
4892 int i, first, last;
4893 char *str, *dash, *comma;
4894
4895 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4896 REG2 are either register names or register numbers. The effect
4897 of this option is to mark the registers in the range from REG1 to
4898 REG2 as ``fixed'' so they won't be used by the compiler. */
4899
4900 i = strlen (const_str);
4901 str = (char *) alloca (i + 1);
4902 memcpy (str, const_str, i + 1);
4903
4904 while (1)
4905 {
4906 dash = strchr (str, '-');
4907 if (!dash)
4908 {
4909 warning (0, "value of -mfixed-range must have form REG1-REG2");
4910 return;
4911 }
4912 *dash = '\0';
4913 comma = strchr (dash + 1, ',');
4914 if (comma)
4915 *comma = '\0';
4916
4917 first = decode_reg_name (str);
4918 if (first < 0)
4919 {
4920 warning (0, "unknown register name: %s", str);
4921 return;
4922 }
4923
4924 last = decode_reg_name (dash + 1);
4925 if (last < 0)
4926 {
4927 warning (0, "unknown register name: %s", dash + 1);
4928 return;
4929 }
4930
4931 *dash = '-';
4932
4933 if (first > last)
4934 {
4935 warning (0, "%s-%s is an empty range", str, dash + 1);
4936 return;
4937 }
4938
4939 for (i = first; i <= last; ++i)
4940 fixed_regs[i] = call_used_regs[i] = 1;
4941
4942 if (!comma)
4943 break;
4944
4945 *comma = ',';
4946 str = comma + 1;
4947 }
4948}
4949
85d9c13c
TS
4950/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4951 can be generated using the fsmbi instruction. */
4952int
4953fsmbi_const_p (rtx x)
4954{
a1c6e4b8
TS
4955 if (CONSTANT_P (x))
4956 {
73701e27 4957 /* We can always choose TImode for CONST_INT because the high bits
a1c6e4b8 4958 of an SImode will always be all 1s, i.e., valid for fsmbi. */
73701e27 4959 enum immediate_class c = classify_immediate (x, TImode);
6fb5fa3c 4960 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
a1c6e4b8
TS
4961 }
4962 return 0;
4963}
4964
4965/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4966 can be generated using the cbd, chd, cwd or cdd instruction. */
4967int
ef4bddc2 4968cpat_const_p (rtx x, machine_mode mode)
a1c6e4b8
TS
4969{
4970 if (CONSTANT_P (x))
4971 {
4972 enum immediate_class c = classify_immediate (x, mode);
4973 return c == IC_CPAT;
4974 }
4975 return 0;
4976}
85d9c13c 4977
a1c6e4b8
TS
4978rtx
4979gen_cpat_const (rtx * ops)
4980{
4981 unsigned char dst[16];
4982 int i, offset, shift, isize;
4983 if (GET_CODE (ops[3]) != CONST_INT
4984 || GET_CODE (ops[2]) != CONST_INT
4985 || (GET_CODE (ops[1]) != CONST_INT
4986 && GET_CODE (ops[1]) != REG))
4987 return 0;
4988 if (GET_CODE (ops[1]) == REG
4989 && (!REG_POINTER (ops[1])
4990 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4991 return 0;
85d9c13c
TS
4992
4993 for (i = 0; i < 16; i++)
a1c6e4b8
TS
4994 dst[i] = i + 16;
4995 isize = INTVAL (ops[3]);
4996 if (isize == 1)
4997 shift = 3;
4998 else if (isize == 2)
4999 shift = 2;
5000 else
5001 shift = 0;
5002 offset = (INTVAL (ops[2]) +
5003 (GET_CODE (ops[1]) ==
5004 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5005 for (i = 0; i < isize; i++)
5006 dst[offset + i] = i + shift;
5007 return array_to_constant (TImode, dst);
85d9c13c
TS
5008}
5009
5010/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5011 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5012 than 16 bytes, the value is repeated across the rest of the array. */
5013void
ef4bddc2 5014constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
85d9c13c
TS
5015{
5016 HOST_WIDE_INT val;
5017 int i, j, first;
5018
5019 memset (arr, 0, 16);
5020 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5021 if (GET_CODE (x) == CONST_INT
5022 || (GET_CODE (x) == CONST_DOUBLE
5023 && (mode == SFmode || mode == DFmode)))
5024 {
5025 gcc_assert (mode != VOIDmode && mode != BLKmode);
5026
5027 if (GET_CODE (x) == CONST_DOUBLE)
5028 val = const_double_to_hwint (x);
5029 else
5030 val = INTVAL (x);
5031 first = GET_MODE_SIZE (mode) - 1;
5032 for (i = first; i >= 0; i--)
5033 {
5034 arr[i] = val & 0xff;
5035 val >>= 8;
5036 }
5037 /* Splat the constant across the whole array. */
5038 for (j = 0, i = first + 1; i < 16; i++)
5039 {
5040 arr[i] = arr[j];
5041 j = (j == first) ? 0 : j + 1;
5042 }
5043 }
5044 else if (GET_CODE (x) == CONST_DOUBLE)
5045 {
5046 val = CONST_DOUBLE_LOW (x);
5047 for (i = 15; i >= 8; i--)
5048 {
5049 arr[i] = val & 0xff;
5050 val >>= 8;
5051 }
5052 val = CONST_DOUBLE_HIGH (x);
5053 for (i = 7; i >= 0; i--)
5054 {
5055 arr[i] = val & 0xff;
5056 val >>= 8;
5057 }
5058 }
5059 else if (GET_CODE (x) == CONST_VECTOR)
5060 {
5061 int units;
5062 rtx elt;
5063 mode = GET_MODE_INNER (mode);
5064 units = CONST_VECTOR_NUNITS (x);
5065 for (i = 0; i < units; i++)
5066 {
5067 elt = CONST_VECTOR_ELT (x, i);
5068 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5069 {
5070 if (GET_CODE (elt) == CONST_DOUBLE)
5071 val = const_double_to_hwint (elt);
5072 else
5073 val = INTVAL (elt);
5074 first = GET_MODE_SIZE (mode) - 1;
5075 if (first + i * GET_MODE_SIZE (mode) > 16)
5076 abort ();
5077 for (j = first; j >= 0; j--)
5078 {
5079 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5080 val >>= 8;
5081 }
5082 }
5083 }
5084 }
5085 else
5086 gcc_unreachable();
5087}
5088
5089/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5090 smaller than 16 bytes, use the bytes that would represent that value
5091 in a register, e.g., for QImode return the value of arr[3]. */
5092rtx
ef4bddc2 5093array_to_constant (machine_mode mode, const unsigned char arr[16])
85d9c13c 5094{
ef4bddc2 5095 machine_mode inner_mode;
85d9c13c
TS
5096 rtvec v;
5097 int units, size, i, j, k;
5098 HOST_WIDE_INT val;
5099
5100 if (GET_MODE_CLASS (mode) == MODE_INT
5101 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5102 {
5103 j = GET_MODE_SIZE (mode);
5104 i = j < 4 ? 4 - j : 0;
5105 for (val = 0; i < j; i++)
5106 val = (val << 8) | arr[i];
5107 val = trunc_int_for_mode (val, mode);
5108 return GEN_INT (val);
5109 }
5110
5111 if (mode == TImode)
5112 {
5113 HOST_WIDE_INT high;
5114 for (i = high = 0; i < 8; i++)
5115 high = (high << 8) | arr[i];
5116 for (i = 8, val = 0; i < 16; i++)
5117 val = (val << 8) | arr[i];
5118 return immed_double_const (val, high, TImode);
5119 }
5120 if (mode == SFmode)
5121 {
5122 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5123 val = trunc_int_for_mode (val, SImode);
9dc5f9ba 5124 return hwint_to_const_double (SFmode, val);
85d9c13c
TS
5125 }
5126 if (mode == DFmode)
5127 {
e41e2ab4
UW
5128 for (i = 0, val = 0; i < 8; i++)
5129 val = (val << 8) | arr[i];
9dc5f9ba 5130 return hwint_to_const_double (DFmode, val);
85d9c13c
TS
5131 }
5132
5133 if (!VECTOR_MODE_P (mode))
5134 abort ();
5135
5136 units = GET_MODE_NUNITS (mode);
5137 size = GET_MODE_UNIT_SIZE (mode);
5138 inner_mode = GET_MODE_INNER (mode);
5139 v = rtvec_alloc (units);
5140
5141 for (k = i = 0; i < units; ++i)
5142 {
5143 val = 0;
5144 for (j = 0; j < size; j++, k++)
5145 val = (val << 8) | arr[k];
5146
5147 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5148 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5149 else
5150 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5151 }
5152 if (k > 16)
5153 abort ();
5154
5155 return gen_rtx_CONST_VECTOR (mode, v);
5156}
5157
5158static void
5159reloc_diagnostic (rtx x)
5160{
c5d75364 5161 tree decl = 0;
85d9c13c
TS
5162 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5163 return;
5164
5165 if (GET_CODE (x) == SYMBOL_REF)
5166 decl = SYMBOL_REF_DECL (x);
5167 else if (GET_CODE (x) == CONST
5168 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5169 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5170
5171 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5172 if (decl && !DECL_P (decl))
5173 decl = 0;
5174
85d9c13c
TS
5175 /* The decl could be a string constant. */
5176 if (decl && DECL_P (decl))
c5d75364
MLI
5177 {
5178 location_t loc;
5179 /* We use last_assemble_variable_decl to get line information. It's
5180 not always going to be right and might not even be close, but will
5181 be right for the more common cases. */
5182 if (!last_assemble_variable_decl || in_section == ctors_section)
5183 loc = DECL_SOURCE_LOCATION (decl);
5184 else
5185 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
85d9c13c 5186
c5d75364
MLI
5187 if (TARGET_WARN_RELOC)
5188 warning_at (loc, 0,
5189 "creating run-time relocation for %qD", decl);
5190 else
5191 error_at (loc,
5192 "creating run-time relocation for %qD", decl);
5193 }
5194 else
5195 {
5196 if (TARGET_WARN_RELOC)
5197 warning_at (input_location, 0, "creating run-time relocation");
5198 else
5199 error_at (input_location, "creating run-time relocation");
5200 }
85d9c13c
TS
5201}
5202
5203/* Hook into assemble_integer so we can generate an error for run-time
5204 relocations. The SPU ABI disallows them. */
5205static bool
5206spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5207{
5208 /* By default run-time relocations aren't supported, but we allow them
5209 in case users support it in their own run-time loader. And we provide
5210 a warning for those users that don't. */
5211 if ((GET_CODE (x) == SYMBOL_REF)
5212 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5213 reloc_diagnostic (x);
5214
5215 return default_assemble_integer (x, size, aligned_p);
5216}
5217
5218static void
5219spu_asm_globalize_label (FILE * file, const char *name)
5220{
5221 fputs ("\t.global\t", file);
5222 assemble_name (file, name);
5223 fputs ("\n", file);
5224}
5225
5226static bool
e548c9df 5227spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
68f932c4 5228 int opno ATTRIBUTE_UNUSED, int *total,
f40751dd 5229 bool speed ATTRIBUTE_UNUSED)
85d9c13c 5230{
e548c9df 5231 int code = GET_CODE (x);
85d9c13c
TS
5232 int cost = COSTS_N_INSNS (2);
5233
5234 /* Folding to a CONST_VECTOR will use extra space but there might
5235 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9fc4da9d 5236 only if it allows us to fold away multiple insns. Changing the cost
85d9c13c
TS
5237 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5238 because this cost will only be compared against a single insn.
5239 if (code == CONST_VECTOR)
1a627b35 5240 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
85d9c13c
TS
5241 */
5242
5243 /* Use defaults for float operations. Not accurate but good enough. */
5244 if (mode == DFmode)
5245 {
5246 *total = COSTS_N_INSNS (13);
5247 return true;
5248 }
5249 if (mode == SFmode)
5250 {
5251 *total = COSTS_N_INSNS (6);
5252 return true;
5253 }
5254 switch (code)
5255 {
5256 case CONST_INT:
5257 if (satisfies_constraint_K (x))
5258 *total = 0;
5259 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5260 *total = COSTS_N_INSNS (1);
5261 else
5262 *total = COSTS_N_INSNS (3);
5263 return true;
5264
5265 case CONST:
5266 *total = COSTS_N_INSNS (3);
5267 return true;
5268
5269 case LABEL_REF:
5270 case SYMBOL_REF:
5271 *total = COSTS_N_INSNS (0);
5272 return true;
5273
5274 case CONST_DOUBLE:
5275 *total = COSTS_N_INSNS (5);
5276 return true;
5277
5278 case FLOAT_EXTEND:
5279 case FLOAT_TRUNCATE:
5280 case FLOAT:
5281 case UNSIGNED_FLOAT:
5282 case FIX:
5283 case UNSIGNED_FIX:
5284 *total = COSTS_N_INSNS (7);
5285 return true;
5286
5287 case PLUS:
5288 if (mode == TImode)
5289 {
5290 *total = COSTS_N_INSNS (9);
5291 return true;
5292 }
5293 break;
5294
5295 case MULT:
5296 cost =
5297 GET_CODE (XEXP (x, 0)) ==
5298 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5299 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5300 {
5301 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5302 {
5303 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5304 cost = COSTS_N_INSNS (14);
5305 if ((val & 0xffff) == 0)
5306 cost = COSTS_N_INSNS (9);
5307 else if (val > 0 && val < 0x10000)
5308 cost = COSTS_N_INSNS (11);
5309 }
5310 }
5311 *total = cost;
5312 return true;
5313 case DIV:
5314 case UDIV:
5315 case MOD:
5316 case UMOD:
5317 *total = COSTS_N_INSNS (20);
5318 return true;
5319 case ROTATE:
5320 case ROTATERT:
5321 case ASHIFT:
5322 case ASHIFTRT:
5323 case LSHIFTRT:
5324 *total = COSTS_N_INSNS (4);
5325 return true;
5326 case UNSPEC:
5327 if (XINT (x, 1) == UNSPEC_CONVERT)
5328 *total = COSTS_N_INSNS (0);
5329 else
5330 *total = COSTS_N_INSNS (4);
5331 return true;
5332 }
5333 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5334 if (GET_MODE_CLASS (mode) == MODE_INT
5335 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5336 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5337 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5338 *total = cost;
5339 return true;
5340}
5341
095a2d76 5342static scalar_int_mode
7b0518e3 5343spu_unwind_word_mode (void)
85d9c13c 5344{
7b0518e3 5345 return SImode;
85d9c13c
TS
5346}
5347
5348/* Decide whether we can make a sibling call to a function. DECL is the
5349 declaration of the function being targeted by the call and EXP is the
5350 CALL_EXPR representing the call. */
5351static bool
5352spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5353{
5354 return decl && !TARGET_LARGE_MEM;
5355}
5356
5357/* We need to correctly update the back chain pointer and the Available
5358 Stack Size (which is in the second slot of the sp register.) */
5359void
5360spu_allocate_stack (rtx op0, rtx op1)
5361{
5362 HOST_WIDE_INT v;
5363 rtx chain = gen_reg_rtx (V4SImode);
5364 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5365 rtx sp = gen_reg_rtx (V4SImode);
5366 rtx splatted = gen_reg_rtx (V4SImode);
5367 rtx pat = gen_reg_rtx (TImode);
5368
5369 /* copy the back chain so we can save it back again. */
5370 emit_move_insn (chain, stack_bot);
5371
5372 op1 = force_reg (SImode, op1);
5373
5374 v = 0x1020300010203ll;
5375 emit_move_insn (pat, immed_double_const (v, v, TImode));
5376 emit_insn (gen_shufb (splatted, op1, op1, pat));
5377
5378 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5379 emit_insn (gen_subv4si3 (sp, sp, splatted));
5380
5381 if (flag_stack_check)
5382 {
5383 rtx avail = gen_reg_rtx(SImode);
5384 rtx result = gen_reg_rtx(SImode);
ff03930a 5385 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
85d9c13c
TS
5386 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5387 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5388 }
5389
5390 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5391
5392 emit_move_insn (stack_bot, chain);
5393
5394 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5395}
5396
5397void
5398spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5399{
5400 static unsigned char arr[16] =
5401 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5402 rtx temp = gen_reg_rtx (SImode);
5403 rtx temp2 = gen_reg_rtx (SImode);
5404 rtx temp3 = gen_reg_rtx (V4SImode);
5405 rtx temp4 = gen_reg_rtx (V4SImode);
5406 rtx pat = gen_reg_rtx (TImode);
5407 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5408
5409 /* Restore the backchain from the first word, sp from the second. */
5410 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5411 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5412
5413 emit_move_insn (pat, array_to_constant (TImode, arr));
5414
5415 /* Compute Available Stack Size for sp */
5416 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5417 emit_insn (gen_shufb (temp3, temp, temp, pat));
5418
5419 /* Compute Available Stack Size for back chain */
5420 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5421 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5422 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5423
5424 emit_insn (gen_addv4si3 (sp, sp, temp3));
5425 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5426}
5427
5428static void
5429spu_init_libfuncs (void)
5430{
5431 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5432 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5433 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5434 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5435 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5436 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5437 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5438 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5439 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4dfe3ad5 5440 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
85d9c13c
TS
5441 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5442 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5443
5444 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5445 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
9bf85028 5446
b46ae6da
UW
5447 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5448 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5449 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5450 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5451 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5452 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5453 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5454 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5455 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5456 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5457 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5458 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5459
9bf85028
TS
5460 set_optab_libfunc (smul_optab, TImode, "__multi3");
5461 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5462 set_optab_libfunc (smod_optab, TImode, "__modti3");
5463 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5464 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5465 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
85d9c13c
TS
5466}
5467
5468/* Make a subreg, stripping any existing subreg. We could possibly just
5469 call simplify_subreg, but in this case we know what we want. */
5470rtx
ef4bddc2 5471spu_gen_subreg (machine_mode mode, rtx x)
85d9c13c
TS
5472{
5473 if (GET_CODE (x) == SUBREG)
5474 x = SUBREG_REG (x);
5475 if (GET_MODE (x) == mode)
5476 return x;
5477 return gen_rtx_SUBREG (mode, x, 0);
5478}
5479
5480static bool
586de218 5481spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
85d9c13c
TS
5482{
5483 return (TYPE_MODE (type) == BLKmode
5484 && ((type) == 0
5485 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5486 || int_size_in_bytes (type) >
5487 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5488}
5489\f
5490/* Create the built-in types and functions */
5491
4a3a2376
UW
5492enum spu_function_code
5493{
5494#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5495#include "spu-builtins.def"
5496#undef DEF_BUILTIN
5497 NUM_SPU_BUILTINS
5498};
5499
5500extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5501
85d9c13c
TS
5502struct spu_builtin_description spu_builtins[] = {
5503#define DEF_BUILTIN(fcode, icode, name, type, params) \
8dc9f5bd 5504 {fcode, icode, name, type, params},
85d9c13c
TS
5505#include "spu-builtins.def"
5506#undef DEF_BUILTIN
5507};
5508
8dc9f5bd
UW
5509static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5510
5511/* Returns the spu builtin decl for CODE. */
2c93399f
AP
5512
5513static tree
5514spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5515{
5516 if (code >= NUM_SPU_BUILTINS)
5517 return error_mark_node;
5518
8dc9f5bd 5519 return spu_builtin_decls[code];
2c93399f
AP
5520}
5521
5522
85d9c13c
TS
5523static void
5524spu_init_builtins (void)
5525{
5526 struct spu_builtin_description *d;
5527 unsigned int i;
5528
5529 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5530 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5531 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5532 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5533 V4SF_type_node = build_vector_type (float_type_node, 4);
5534 V2DF_type_node = build_vector_type (double_type_node, 2);
5535
5536 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5537 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5538 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5539 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5540
60c9cf8d 5541 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
85d9c13c
TS
5542
5543 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5544 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5545 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5546 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5547 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5548 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5555
5556 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5557 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5558 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5560 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5561 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5562 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5564
5565 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5566 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5567
5568 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5569
5570 spu_builtin_types[SPU_BTI_PTR] =
5571 build_pointer_type (build_qualified_type
5572 (void_type_node,
5573 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5574
5575 /* For each builtin we build a new prototype. The tree code will make
5576 sure nodes are shared. */
5577 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5578 {
5579 tree p;
5580 char name[64]; /* build_function will make a copy. */
5581 int parm;
5582
5583 if (d->name == 0)
5584 continue;
5585
e47f8bba 5586 /* Find last parm. */
85d9c13c 5587 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
e47f8bba 5588 ;
85d9c13c
TS
5589
5590 p = void_list_node;
5591 while (parm > 1)
5592 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5593
5594 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5595
5596 sprintf (name, "__builtin_%s", d->name);
8dc9f5bd 5597 spu_builtin_decls[i] =
fec6e65b 5598 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
bbea461b 5599 if (d->fcode == SPU_MASK_FOR_LOAD)
8dc9f5bd 5600 TREE_READONLY (spu_builtin_decls[i]) = 1;
e47f8bba
BE
5601
5602 /* These builtins don't throw. */
8dc9f5bd 5603 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
85d9c13c
TS
5604 }
5605}
5606
e1f1d97f
SL
5607void
5608spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5609{
5610 static unsigned char arr[16] =
5611 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5612
5613 rtx temp = gen_reg_rtx (Pmode);
5614 rtx temp2 = gen_reg_rtx (V4SImode);
5615 rtx temp3 = gen_reg_rtx (V4SImode);
5616 rtx pat = gen_reg_rtx (TImode);
5617 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5618
5619 emit_move_insn (pat, array_to_constant (TImode, arr));
5620
5621 /* Restore the sp. */
5622 emit_move_insn (temp, op1);
5623 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5624
5625 /* Compute available stack size for sp. */
5626 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5627 emit_insn (gen_shufb (temp3, temp, temp, pat));
5628
5629 emit_insn (gen_addv4si3 (sp, sp, temp3));
5630 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5631}
5632
85d9c13c
TS
5633int
5634spu_safe_dma (HOST_WIDE_INT channel)
5635{
4230d0fe 5636 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
85d9c13c
TS
5637}
5638
5639void
5640spu_builtin_splats (rtx ops[])
5641{
ef4bddc2 5642 machine_mode mode = GET_MODE (ops[0]);
85d9c13c
TS
5643 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5644 {
5645 unsigned char arr[16];
5646 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5647 emit_move_insn (ops[0], array_to_constant (mode, arr));
5648 }
85d9c13c
TS
5649 else
5650 {
5651 rtx reg = gen_reg_rtx (TImode);
5652 rtx shuf;
5653 if (GET_CODE (ops[1]) != REG
5654 && GET_CODE (ops[1]) != SUBREG)
5655 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5656 switch (mode)
5657 {
4e10a5a7
RS
5658 case E_V2DImode:
5659 case E_V2DFmode:
85d9c13c
TS
5660 shuf =
5661 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5662 TImode);
5663 break;
4e10a5a7
RS
5664 case E_V4SImode:
5665 case E_V4SFmode:
85d9c13c
TS
5666 shuf =
5667 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5668 TImode);
5669 break;
4e10a5a7 5670 case E_V8HImode:
85d9c13c
TS
5671 shuf =
5672 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5673 TImode);
5674 break;
4e10a5a7 5675 case E_V16QImode:
85d9c13c
TS
5676 shuf =
5677 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5678 TImode);
5679 break;
5680 default:
5681 abort ();
5682 }
5683 emit_move_insn (reg, shuf);
5684 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5685 }
5686}
5687
5688void
5689spu_builtin_extract (rtx ops[])
5690{
ef4bddc2 5691 machine_mode mode;
85d9c13c
TS
5692 rtx rot, from, tmp;
5693
5694 mode = GET_MODE (ops[1]);
5695
5696 if (GET_CODE (ops[2]) == CONST_INT)
5697 {
5698 switch (mode)
5699 {
4e10a5a7 5700 case E_V16QImode:
ff03930a 5701 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
85d9c13c 5702 break;
4e10a5a7 5703 case E_V8HImode:
ff03930a 5704 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
85d9c13c 5705 break;
4e10a5a7 5706 case E_V4SFmode:
ff03930a 5707 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
85d9c13c 5708 break;
4e10a5a7 5709 case E_V4SImode:
ff03930a 5710 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
85d9c13c 5711 break;
4e10a5a7 5712 case E_V2DImode:
ff03930a 5713 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
85d9c13c 5714 break;
4e10a5a7 5715 case E_V2DFmode:
ff03930a 5716 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
85d9c13c
TS
5717 break;
5718 default:
5719 abort ();
5720 }
5721 return;
5722 }
5723
5724 from = spu_gen_subreg (TImode, ops[1]);
5725 rot = gen_reg_rtx (TImode);
5726 tmp = gen_reg_rtx (SImode);
5727
5728 switch (mode)
5729 {
4e10a5a7 5730 case E_V16QImode:
85d9c13c
TS
5731 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5732 break;
4e10a5a7 5733 case E_V8HImode:
85d9c13c
TS
5734 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5735 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5736 break;
4e10a5a7
RS
5737 case E_V4SFmode:
5738 case E_V4SImode:
85d9c13c
TS
5739 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5740 break;
4e10a5a7
RS
5741 case E_V2DImode:
5742 case E_V2DFmode:
85d9c13c
TS
5743 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5744 break;
5745 default:
5746 abort ();
5747 }
5748 emit_insn (gen_rotqby_ti (rot, from, tmp));
5749
5750 emit_insn (gen_spu_convert (ops[0], rot));
5751}
5752
5753void
5754spu_builtin_insert (rtx ops[])
5755{
ef4bddc2
RS
5756 machine_mode mode = GET_MODE (ops[0]);
5757 machine_mode imode = GET_MODE_INNER (mode);
85d9c13c
TS
5758 rtx mask = gen_reg_rtx (TImode);
5759 rtx offset;
5760
5761 if (GET_CODE (ops[3]) == CONST_INT)
5762 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5763 else
5764 {
5765 offset = gen_reg_rtx (SImode);
5766 emit_insn (gen_mulsi3
5767 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5768 }
5769 emit_insn (gen_cpat
5770 (mask, stack_pointer_rtx, offset,
5771 GEN_INT (GET_MODE_SIZE (imode))));
5772 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5773}
5774
5775void
5776spu_builtin_promote (rtx ops[])
5777{
ef4bddc2 5778 machine_mode mode, imode;
85d9c13c
TS
5779 rtx rot, from, offset;
5780 HOST_WIDE_INT pos;
5781
5782 mode = GET_MODE (ops[0]);
5783 imode = GET_MODE_INNER (mode);
5784
5785 from = gen_reg_rtx (TImode);
5786 rot = spu_gen_subreg (TImode, ops[0]);
5787
5788 emit_insn (gen_spu_convert (from, ops[1]));
5789
5790 if (GET_CODE (ops[2]) == CONST_INT)
5791 {
5792 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5793 if (GET_MODE_SIZE (imode) < 4)
5794 pos += 4 - GET_MODE_SIZE (imode);
5795 offset = GEN_INT (pos & 15);
5796 }
5797 else
5798 {
5799 offset = gen_reg_rtx (SImode);
5800 switch (mode)
5801 {
4e10a5a7 5802 case E_V16QImode:
85d9c13c
TS
5803 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5804 break;
4e10a5a7 5805 case E_V8HImode:
85d9c13c
TS
5806 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5807 emit_insn (gen_addsi3 (offset, offset, offset));
5808 break;
4e10a5a7
RS
5809 case E_V4SFmode:
5810 case E_V4SImode:
85d9c13c
TS
5811 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5812 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5813 break;
4e10a5a7
RS
5814 case E_V2DImode:
5815 case E_V2DFmode:
85d9c13c
TS
5816 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5817 break;
5818 default:
5819 abort ();
5820 }
5821 }
5822 emit_insn (gen_rotqby_ti (rot, from, offset));
5823}
5824
a85b4c91
RH
5825static void
5826spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
85d9c13c 5827{
a85b4c91 5828 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
85d9c13c
TS
5829 rtx shuf = gen_reg_rtx (V4SImode);
5830 rtx insn = gen_reg_rtx (V4SImode);
5831 rtx shufc;
5832 rtx insnc;
5833 rtx mem;
5834
5835 fnaddr = force_reg (SImode, fnaddr);
5836 cxt = force_reg (SImode, cxt);
5837
5838 if (TARGET_LARGE_MEM)
5839 {
5840 rtx rotl = gen_reg_rtx (V4SImode);
5841 rtx mask = gen_reg_rtx (V4SImode);
5842 rtx bi = gen_reg_rtx (SImode);
a85b4c91 5843 static unsigned char const shufa[16] = {
85d9c13c
TS
5844 2, 3, 0, 1, 18, 19, 16, 17,
5845 0, 1, 2, 3, 16, 17, 18, 19
5846 };
a85b4c91 5847 static unsigned char const insna[16] = {
85d9c13c
TS
5848 0x41, 0, 0, 79,
5849 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5850 0x60, 0x80, 0, 79,
5851 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5852 };
5853
5854 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5855 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5856
5857 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
71d46ca5 5858 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
85d9c13c
TS
5859 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5860 emit_insn (gen_selb (insn, insnc, rotl, mask));
5861
a85b4c91
RH
5862 mem = adjust_address (m_tramp, V4SImode, 0);
5863 emit_move_insn (mem, insn);
85d9c13c
TS
5864
5865 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
a85b4c91
RH
5866 mem = adjust_address (m_tramp, Pmode, 16);
5867 emit_move_insn (mem, bi);
85d9c13c
TS
5868 }
5869 else
5870 {
5871 rtx scxt = gen_reg_rtx (SImode);
5872 rtx sfnaddr = gen_reg_rtx (SImode);
a85b4c91 5873 static unsigned char const insna[16] = {
85d9c13c
TS
5874 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5875 0x30, 0, 0, 0,
5876 0, 0, 0, 0,
5877 0, 0, 0, 0
5878 };
5879
5880 shufc = gen_reg_rtx (TImode);
5881 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5882
5883 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5884 fits 18 bits and the last 4 are zeros. This will be true if
5885 the stack pointer is initialized to 0x3fff0 at program start,
5886 otherwise the ila instruction will be garbage. */
5887
5888 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5889 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5890 emit_insn (gen_cpat
5891 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5892 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5893 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5894
a85b4c91
RH
5895 mem = adjust_address (m_tramp, V4SImode, 0);
5896 emit_move_insn (mem, insn);
85d9c13c
TS
5897 }
5898 emit_insn (gen_sync ());
5899}
5900
d45eae79
SL
5901static bool
5902spu_warn_func_return (tree decl)
5903{
5904 /* Naked functions are implemented entirely in assembly, including the
5905 return sequence, so suppress warnings about this. */
5906 return !spu_naked_function_p (decl);
5907}
5908
85d9c13c
TS
5909void
5910spu_expand_sign_extend (rtx ops[])
5911{
5912 unsigned char arr[16];
5913 rtx pat = gen_reg_rtx (TImode);
5914 rtx sign, c;
5915 int i, last;
5916 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5917 if (GET_MODE (ops[1]) == QImode)
5918 {
5919 sign = gen_reg_rtx (HImode);
5920 emit_insn (gen_extendqihi2 (sign, ops[1]));
5921 for (i = 0; i < 16; i++)
5922 arr[i] = 0x12;
5923 arr[last] = 0x13;
5924 }
5925 else
5926 {
5927 for (i = 0; i < 16; i++)
5928 arr[i] = 0x10;
5929 switch (GET_MODE (ops[1]))
5930 {
4e10a5a7 5931 case E_HImode:
85d9c13c
TS
5932 sign = gen_reg_rtx (SImode);
5933 emit_insn (gen_extendhisi2 (sign, ops[1]));
5934 arr[last] = 0x03;
5935 arr[last - 1] = 0x02;
5936 break;
4e10a5a7 5937 case E_SImode:
85d9c13c
TS
5938 sign = gen_reg_rtx (SImode);
5939 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5940 for (i = 0; i < 4; i++)
5941 arr[last - i] = 3 - i;
5942 break;
4e10a5a7 5943 case E_DImode:
85d9c13c
TS
5944 sign = gen_reg_rtx (SImode);
5945 c = gen_reg_rtx (SImode);
5946 emit_insn (gen_spu_convert (c, ops[1]));
5947 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5948 for (i = 0; i < 8; i++)
5949 arr[last - i] = 7 - i;
5950 break;
5951 default:
5952 abort ();
5953 }
5954 }
5955 emit_move_insn (pat, array_to_constant (TImode, arr));
5956 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5957}
5958
5959/* expand vector initialization. If there are any constant parts,
5960 load constant parts first. Then load any non-constant parts. */
5961void
5962spu_expand_vector_init (rtx target, rtx vals)
5963{
ef4bddc2 5964 machine_mode mode = GET_MODE (target);
85d9c13c
TS
5965 int n_elts = GET_MODE_NUNITS (mode);
5966 int n_var = 0;
5967 bool all_same = true;
b509487e 5968 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
85d9c13c
TS
5969 int i;
5970
5971 first = XVECEXP (vals, 0, 0);
5972 for (i = 0; i < n_elts; ++i)
5973 {
5974 x = XVECEXP (vals, 0, i);
d74032d9
UW
5975 if (!(CONST_INT_P (x)
5976 || GET_CODE (x) == CONST_DOUBLE
5977 || GET_CODE (x) == CONST_FIXED))
85d9c13c
TS
5978 ++n_var;
5979 else
5980 {
5981 if (first_constant == NULL_RTX)
5982 first_constant = x;
5983 }
5984 if (i > 0 && !rtx_equal_p (x, first))
5985 all_same = false;
5986 }
5987
5988 /* if all elements are the same, use splats to repeat elements */
5989 if (all_same)
5990 {
5991 if (!CONSTANT_P (first)
5992 && !register_operand (first, GET_MODE (x)))
5993 first = force_reg (GET_MODE (first), first);
5994 emit_insn (gen_spu_splats (target, first));
5995 return;
5996 }
5997
5998 /* load constant parts */
5999 if (n_var != n_elts)
6000 {
6001 if (n_var == 0)
6002 {
6003 emit_move_insn (target,
6004 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6005 }
6006 else
6007 {
6008 rtx constant_parts_rtx = copy_rtx (vals);
6009
6010 gcc_assert (first_constant != NULL_RTX);
6011 /* fill empty slots with the first constant, this increases
6012 our chance of using splats in the recursive call below. */
6013 for (i = 0; i < n_elts; ++i)
d74032d9
UW
6014 {
6015 x = XVECEXP (constant_parts_rtx, 0, i);
6016 if (!(CONST_INT_P (x)
6017 || GET_CODE (x) == CONST_DOUBLE
6018 || GET_CODE (x) == CONST_FIXED))
6019 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6020 }
85d9c13c
TS
6021
6022 spu_expand_vector_init (target, constant_parts_rtx);
6023 }
6024 }
6025
6026 /* load variable parts */
6027 if (n_var != 0)
6028 {
6029 rtx insert_operands[4];
6030
6031 insert_operands[0] = target;
6032 insert_operands[2] = target;
6033 for (i = 0; i < n_elts; ++i)
6034 {
6035 x = XVECEXP (vals, 0, i);
d74032d9
UW
6036 if (!(CONST_INT_P (x)
6037 || GET_CODE (x) == CONST_DOUBLE
6038 || GET_CODE (x) == CONST_FIXED))
85d9c13c
TS
6039 {
6040 if (!register_operand (x, GET_MODE (x)))
6041 x = force_reg (GET_MODE (x), x);
6042 insert_operands[1] = x;
6043 insert_operands[3] = GEN_INT (i);
6044 spu_builtin_insert (insert_operands);
6045 }
6046 }
6047 }
6048}
b66b813d 6049
39aeae85
SL
6050/* Return insn index for the vector compare instruction for given CODE,
6051 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6052
6053static int
6054get_vec_cmp_insn (enum rtx_code code,
ef4bddc2
RS
6055 machine_mode dest_mode,
6056 machine_mode op_mode)
39aeae85
SL
6057
6058{
6059 switch (code)
6060 {
6061 case EQ:
6062 if (dest_mode == V16QImode && op_mode == V16QImode)
6063 return CODE_FOR_ceq_v16qi;
6064 if (dest_mode == V8HImode && op_mode == V8HImode)
6065 return CODE_FOR_ceq_v8hi;
6066 if (dest_mode == V4SImode && op_mode == V4SImode)
6067 return CODE_FOR_ceq_v4si;
6068 if (dest_mode == V4SImode && op_mode == V4SFmode)
6069 return CODE_FOR_ceq_v4sf;
6070 if (dest_mode == V2DImode && op_mode == V2DFmode)
6071 return CODE_FOR_ceq_v2df;
6072 break;
6073 case GT:
6074 if (dest_mode == V16QImode && op_mode == V16QImode)
6075 return CODE_FOR_cgt_v16qi;
6076 if (dest_mode == V8HImode && op_mode == V8HImode)
6077 return CODE_FOR_cgt_v8hi;
6078 if (dest_mode == V4SImode && op_mode == V4SImode)
6079 return CODE_FOR_cgt_v4si;
6080 if (dest_mode == V4SImode && op_mode == V4SFmode)
6081 return CODE_FOR_cgt_v4sf;
6082 if (dest_mode == V2DImode && op_mode == V2DFmode)
6083 return CODE_FOR_cgt_v2df;
6084 break;
6085 case GTU:
6086 if (dest_mode == V16QImode && op_mode == V16QImode)
6087 return CODE_FOR_clgt_v16qi;
6088 if (dest_mode == V8HImode && op_mode == V8HImode)
6089 return CODE_FOR_clgt_v8hi;
6090 if (dest_mode == V4SImode && op_mode == V4SImode)
6091 return CODE_FOR_clgt_v4si;
6092 break;
6093 default:
6094 break;
6095 }
6096 return -1;
6097}
6098
6099/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6100 DMODE is expected destination mode. This is a recursive function. */
6101
6102static rtx
6103spu_emit_vector_compare (enum rtx_code rcode,
6104 rtx op0, rtx op1,
ef4bddc2 6105 machine_mode dmode)
39aeae85
SL
6106{
6107 int vec_cmp_insn;
6108 rtx mask;
ef4bddc2
RS
6109 machine_mode dest_mode;
6110 machine_mode op_mode = GET_MODE (op1);
39aeae85
SL
6111
6112 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6113
6114 /* Floating point vector compare instructions uses destination V4SImode.
6115 Double floating point vector compare instructions uses destination V2DImode.
6116 Move destination to appropriate mode later. */
6117 if (dmode == V4SFmode)
6118 dest_mode = V4SImode;
6119 else if (dmode == V2DFmode)
6120 dest_mode = V2DImode;
6121 else
6122 dest_mode = dmode;
6123
6124 mask = gen_reg_rtx (dest_mode);
6125 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6126
6127 if (vec_cmp_insn == -1)
6128 {
6129 bool swap_operands = false;
6130 bool try_again = false;
6131 switch (rcode)
6132 {
6133 case LT:
6134 rcode = GT;
6135 swap_operands = true;
6136 try_again = true;
6137 break;
6138 case LTU:
6139 rcode = GTU;
6140 swap_operands = true;
6141 try_again = true;
6142 break;
6143 case NE:
7f9a3dcd
UW
6144 case UNEQ:
6145 case UNLE:
6146 case UNLT:
6147 case UNGE:
6148 case UNGT:
6149 case UNORDERED:
39aeae85
SL
6150 /* Treat A != B as ~(A==B). */
6151 {
7f9a3dcd 6152 enum rtx_code rev_code;
39aeae85 6153 enum insn_code nor_code;
7f9a3dcd
UW
6154 rtx rev_mask;
6155
6156 rev_code = reverse_condition_maybe_unordered (rcode);
6157 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6158
947131ba 6159 nor_code = optab_handler (one_cmpl_optab, dest_mode);
39aeae85 6160 gcc_assert (nor_code != CODE_FOR_nothing);
7f9a3dcd 6161 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
39aeae85
SL
6162 if (dmode != dest_mode)
6163 {
6164 rtx temp = gen_reg_rtx (dest_mode);
6165 convert_move (temp, mask, 0);
6166 return temp;
6167 }
6168 return mask;
6169 }
6170 break;
6171 case GE:
6172 case GEU:
6173 case LE:
6174 case LEU:
6175 /* Try GT/GTU/LT/LTU OR EQ */
6176 {
6177 rtx c_rtx, eq_rtx;
6178 enum insn_code ior_code;
6179 enum rtx_code new_code;
6180
6181 switch (rcode)
6182 {
6183 case GE: new_code = GT; break;
6184 case GEU: new_code = GTU; break;
6185 case LE: new_code = LT; break;
6186 case LEU: new_code = LTU; break;
6187 default:
6188 gcc_unreachable ();
6189 }
6190
6191 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6192 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6193
947131ba 6194 ior_code = optab_handler (ior_optab, dest_mode);
39aeae85
SL
6195 gcc_assert (ior_code != CODE_FOR_nothing);
6196 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6197 if (dmode != dest_mode)
6198 {
6199 rtx temp = gen_reg_rtx (dest_mode);
6200 convert_move (temp, mask, 0);
6201 return temp;
6202 }
6203 return mask;
6204 }
6205 break;
7f9a3dcd
UW
6206 case LTGT:
6207 /* Try LT OR GT */
6208 {
6209 rtx lt_rtx, gt_rtx;
6210 enum insn_code ior_code;
6211
6212 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6213 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6214
6215 ior_code = optab_handler (ior_optab, dest_mode);
6216 gcc_assert (ior_code != CODE_FOR_nothing);
6217 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6218 if (dmode != dest_mode)
6219 {
6220 rtx temp = gen_reg_rtx (dest_mode);
6221 convert_move (temp, mask, 0);
6222 return temp;
6223 }
6224 return mask;
6225 }
6226 break;
6227 case ORDERED:
6228 /* Implement as (A==A) & (B==B) */
6229 {
6230 rtx a_rtx, b_rtx;
6231 enum insn_code and_code;
6232
6233 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6234 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6235
6236 and_code = optab_handler (and_optab, dest_mode);
6237 gcc_assert (and_code != CODE_FOR_nothing);
6238 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6239 if (dmode != dest_mode)
6240 {
6241 rtx temp = gen_reg_rtx (dest_mode);
6242 convert_move (temp, mask, 0);
6243 return temp;
6244 }
6245 return mask;
6246 }
6247 break;
39aeae85
SL
6248 default:
6249 gcc_unreachable ();
6250 }
6251
6252 /* You only get two chances. */
6253 if (try_again)
6254 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6255
6256 gcc_assert (vec_cmp_insn != -1);
6257
6258 if (swap_operands)
6259 {
6260 rtx tmp;
6261 tmp = op0;
6262 op0 = op1;
6263 op1 = tmp;
6264 }
6265 }
6266
6267 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6268 if (dmode != dest_mode)
6269 {
6270 rtx temp = gen_reg_rtx (dest_mode);
6271 convert_move (temp, mask, 0);
6272 return temp;
6273 }
6274 return mask;
6275}
6276
6277
6278/* Emit vector conditional expression.
6279 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6280 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6281
6282int
6283spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6284 rtx cond, rtx cc_op0, rtx cc_op1)
6285{
ef4bddc2 6286 machine_mode dest_mode = GET_MODE (dest);
39aeae85
SL
6287 enum rtx_code rcode = GET_CODE (cond);
6288 rtx mask;
6289
6290 /* Get the vector mask for the given relational operations. */
6291 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6292
6293 emit_insn(gen_selb (dest, op2, op1, mask));
6294
6295 return 1;
6296}
6297
b66b813d 6298static rtx
ef4bddc2 6299spu_force_reg (machine_mode mode, rtx op)
b66b813d
AP
6300{
6301 rtx x, r;
6302 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6303 {
6304 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6305 || GET_MODE (op) == BLKmode)
6306 return force_reg (mode, convert_to_mode (mode, op, 0));
6307 abort ();
6308 }
6309
6310 r = force_reg (GET_MODE (op), op);
6311 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6312 {
6313 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6314 if (x)
6315 return x;
6316 }
6317
6318 x = gen_reg_rtx (mode);
6319 emit_insn (gen_spu_convert (x, r));
6320 return x;
6321}
6322
6323static void
6324spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6325{
6326 HOST_WIDE_INT v = 0;
6327 int lsbits;
6328 /* Check the range of immediate operands. */
6329 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6330 {
6331 int range = p - SPU_BTI_7;
73701e27
TS
6332
6333 if (!CONSTANT_P (op))
d8a07487 6334 error ("%s expects an integer literal in the range [%d, %d]",
b66b813d
AP
6335 d->name,
6336 spu_builtin_range[range].low, spu_builtin_range[range].high);
6337
6338 if (GET_CODE (op) == CONST
6339 && (GET_CODE (XEXP (op, 0)) == PLUS
6340 || GET_CODE (XEXP (op, 0)) == MINUS))
6341 {
6342 v = INTVAL (XEXP (XEXP (op, 0), 1));
6343 op = XEXP (XEXP (op, 0), 0);
6344 }
6345 else if (GET_CODE (op) == CONST_INT)
6346 v = INTVAL (op);
73701e27
TS
6347 else if (GET_CODE (op) == CONST_VECTOR
6348 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6349 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6350
6351 /* The default for v is 0 which is valid in every range. */
6352 if (v < spu_builtin_range[range].low
6353 || v > spu_builtin_range[range].high)
d8a07487 6354 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
73701e27
TS
6355 d->name,
6356 spu_builtin_range[range].low, spu_builtin_range[range].high,
6357 v);
b66b813d
AP
6358
6359 switch (p)
6360 {
6361 case SPU_BTI_S10_4:
6362 lsbits = 4;
6363 break;
6364 case SPU_BTI_U16_2:
6365 /* This is only used in lqa, and stqa. Even though the insns
6366 encode 16 bits of the address (all but the 2 least
6367 significant), only 14 bits are used because it is masked to
6368 be 16 byte aligned. */
6369 lsbits = 4;
6370 break;
6371 case SPU_BTI_S16_2:
6372 /* This is used for lqr and stqr. */
6373 lsbits = 2;
6374 break;
6375 default:
6376 lsbits = 0;
6377 }
6378
6379 if (GET_CODE (op) == LABEL_REF
6380 || (GET_CODE (op) == SYMBOL_REF
6381 && SYMBOL_REF_FUNCTION_P (op))
73701e27 6382 || (v & ((1 << lsbits) - 1)) != 0)
d8a07487 6383 warning (0, "%d least significant bits of %s are ignored", lsbits,
b66b813d
AP
6384 d->name);
6385 }
6386}
6387
6388
d7815554 6389static int
73701e27 6390expand_builtin_args (struct spu_builtin_description *d, tree exp,
b66b813d
AP
6391 rtx target, rtx ops[])
6392{
81f40b79 6393 enum insn_code icode = (enum insn_code) d->icode;
73701e27 6394 int i = 0, a;
b66b813d
AP
6395
6396 /* Expand the arguments into rtl. */
6397
6398 if (d->parm[0] != SPU_BTI_VOID)
6399 ops[i++] = target;
6400
d7815554 6401 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
b66b813d 6402 {
73701e27 6403 tree arg = CALL_EXPR_ARG (exp, a);
b66b813d
AP
6404 if (arg == 0)
6405 abort ();
bbbbb16a 6406 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
b66b813d 6407 }
d7815554 6408
f04713ee 6409 gcc_assert (i == insn_data[icode].n_generator_args);
d7815554 6410 return i;
b66b813d
AP
6411}
6412
6413static rtx
6414spu_expand_builtin_1 (struct spu_builtin_description *d,
73701e27 6415 tree exp, rtx target)
b66b813d
AP
6416{
6417 rtx pat;
6418 rtx ops[8];
81f40b79 6419 enum insn_code icode = (enum insn_code) d->icode;
ef4bddc2 6420 machine_mode mode, tmode;
b66b813d 6421 int i, p;
d7815554 6422 int n_operands;
b66b813d
AP
6423 tree return_type;
6424
6425 /* Set up ops[] with values from arglist. */
d7815554 6426 n_operands = expand_builtin_args (d, exp, target, ops);
b66b813d
AP
6427
6428 /* Handle the target operand which must be operand 0. */
6429 i = 0;
6430 if (d->parm[0] != SPU_BTI_VOID)
6431 {
6432
6433 /* We prefer the mode specified for the match_operand otherwise
6434 use the mode from the builtin function prototype. */
6435 tmode = insn_data[d->icode].operand[0].mode;
6436 if (tmode == VOIDmode)
6437 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6438
6439 /* Try to use target because not using it can lead to extra copies
6440 and when we are using all of the registers extra copies leads
6441 to extra spills. */
6442 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6443 ops[0] = target;
6444 else
6445 target = ops[0] = gen_reg_rtx (tmode);
6446
6447 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6448 abort ();
6449
6450 i++;
6451 }
6452
bbea461b
DN
6453 if (d->fcode == SPU_MASK_FOR_LOAD)
6454 {
ef4bddc2 6455 machine_mode mode = insn_data[icode].operand[1].mode;
bbea461b
DN
6456 tree arg;
6457 rtx addr, op, pat;
6458
6459 /* get addr */
73701e27 6460 arg = CALL_EXPR_ARG (exp, 0);
643afedb 6461 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
bbea461b
DN
6462 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6463 addr = memory_address (mode, op);
6464
6465 /* negate addr */
6466 op = gen_reg_rtx (GET_MODE (addr));
f7df4a84 6467 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
bbea461b
DN
6468 op = gen_rtx_MEM (mode, op);
6469
6470 pat = GEN_FCN (icode) (target, op);
6471 if (!pat)
6472 return 0;
6473 emit_insn (pat);
6474 return target;
6475 }
6476
b66b813d
AP
6477 /* Ignore align_hint, but still expand it's args in case they have
6478 side effects. */
6479 if (icode == CODE_FOR_spu_align_hint)
6480 return 0;
6481
6482 /* Handle the rest of the operands. */
d7815554 6483 for (p = 1; i < n_operands; i++, p++)
b66b813d
AP
6484 {
6485 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6486 mode = insn_data[d->icode].operand[i].mode;
6487 else
6488 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6489
6490 /* mode can be VOIDmode here for labels */
6491
6492 /* For specific intrinsics with an immediate operand, e.g.,
6493 si_ai(), we sometimes need to convert the scalar argument to a
6494 vector argument by splatting the scalar. */
6495 if (VECTOR_MODE_P (mode)
6496 && (GET_CODE (ops[i]) == CONST_INT
6497 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6717c544 6498 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
b66b813d
AP
6499 {
6500 if (GET_CODE (ops[i]) == CONST_INT)
6501 ops[i] = spu_const (mode, INTVAL (ops[i]));
6502 else
6503 {
6504 rtx reg = gen_reg_rtx (mode);
ef4bddc2 6505 machine_mode imode = GET_MODE_INNER (mode);
b66b813d
AP
6506 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6507 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6508 if (imode != GET_MODE (ops[i]))
6509 ops[i] = convert_to_mode (imode, ops[i],
6510 TYPE_UNSIGNED (spu_builtin_types
6511 [d->parm[i]]));
6512 emit_insn (gen_spu_splats (reg, ops[i]));
6513 ops[i] = reg;
6514 }
6515 }
6516
73701e27
TS
6517 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6518
b66b813d
AP
6519 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6520 ops[i] = spu_force_reg (mode, ops[i]);
b66b813d
AP
6521 }
6522
d7815554 6523 switch (n_operands)
b66b813d
AP
6524 {
6525 case 0:
6526 pat = GEN_FCN (icode) (0);
6527 break;
6528 case 1:
6529 pat = GEN_FCN (icode) (ops[0]);
6530 break;
6531 case 2:
6532 pat = GEN_FCN (icode) (ops[0], ops[1]);
6533 break;
6534 case 3:
6535 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6536 break;
6537 case 4:
6538 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6539 break;
6540 case 5:
6541 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6542 break;
6543 case 6:
6544 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6545 break;
6546 default:
6547 abort ();
6548 }
6549
6550 if (!pat)
6551 abort ();
6552
6553 if (d->type == B_CALL || d->type == B_BISLED)
6554 emit_call_insn (pat);
6555 else if (d->type == B_JUMP)
6556 {
6557 emit_jump_insn (pat);
6558 emit_barrier ();
6559 }
6560 else
6561 emit_insn (pat);
6562
6563 return_type = spu_builtin_types[d->parm[0]];
6564 if (d->parm[0] != SPU_BTI_VOID
6565 && GET_MODE (target) != TYPE_MODE (return_type))
6566 {
6567 /* target is the return value. It should always be the mode of
6568 the builtin function prototype. */
6569 target = spu_force_reg (TYPE_MODE (return_type), target);
6570 }
6571
6572 return target;
6573}
6574
6575rtx
6576spu_expand_builtin (tree exp,
6577 rtx target,
6578 rtx subtarget ATTRIBUTE_UNUSED,
ef4bddc2 6579 machine_mode mode ATTRIBUTE_UNUSED,
b66b813d
AP
6580 int ignore ATTRIBUTE_UNUSED)
6581{
73701e27 6582 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
fec6e65b 6583 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
b66b813d
AP
6584 struct spu_builtin_description *d;
6585
6586 if (fcode < NUM_SPU_BUILTINS)
6587 {
6588 d = &spu_builtins[fcode];
6589
73701e27 6590 return spu_expand_builtin_1 (d, exp, target);
b66b813d
AP
6591 }
6592 abort ();
6593}
6594
bbea461b
DN
6595/* Implement targetm.vectorize.builtin_mask_for_load. */
6596static tree
6597spu_builtin_mask_for_load (void)
6598{
8dc9f5bd 6599 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
bbea461b 6600}
73701e27 6601
e95b59d2
DN
6602/* Implement targetm.vectorize.builtin_vectorization_cost. */
6603static int
720f5239 6604spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
a21892ad 6605 tree vectype,
720f5239 6606 int misalign ATTRIBUTE_UNUSED)
35e1a5e7 6607{
a21892ad
BS
6608 unsigned elements;
6609
35e1a5e7
IR
6610 switch (type_of_cost)
6611 {
6612 case scalar_stmt:
6613 case vector_stmt:
6614 case vector_load:
6615 case vector_store:
6616 case vec_to_scalar:
6617 case scalar_to_vec:
6618 case cond_branch_not_taken:
6619 case vec_perm:
8bd37302 6620 case vec_promote_demote:
35e1a5e7
IR
6621 return 1;
6622
6623 case scalar_store:
6624 return 10;
6625
6626 case scalar_load:
6627 /* Load + rotate. */
6628 return 2;
6629
6630 case unaligned_load:
6631 return 2;
6632
6633 case cond_branch_taken:
6634 return 6;
6635
a21892ad
BS
6636 case vec_construct:
6637 elements = TYPE_VECTOR_SUBPARTS (vectype);
6638 return elements / 2 + 1;
6639
35e1a5e7
IR
6640 default:
6641 gcc_unreachable ();
6642 }
e95b59d2
DN
6643}
6644
c3e7ee41
BS
6645/* Implement targetm.vectorize.init_cost. */
6646
cf0d4e15 6647static void *
c3e7ee41
BS
6648spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6649{
92345349
BS
6650 unsigned *cost = XNEWVEC (unsigned, 3);
6651 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
c3e7ee41
BS
6652 return cost;
6653}
6654
6655/* Implement targetm.vectorize.add_stmt_cost. */
6656
cf0d4e15 6657static unsigned
c3e7ee41 6658spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
92345349
BS
6659 struct _stmt_vec_info *stmt_info, int misalign,
6660 enum vect_cost_model_location where)
c3e7ee41
BS
6661{
6662 unsigned *cost = (unsigned *) data;
6663 unsigned retval = 0;
6664
6665 if (flag_vect_cost_model)
6666 {
92345349 6667 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
c3e7ee41
BS
6668 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6669
6670 /* Statements in an inner loop relative to the loop being
6671 vectorized are weighted more heavily. The value here is
6672 arbitrary and could potentially be improved with analysis. */
92345349 6673 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
c3e7ee41
BS
6674 count *= 50; /* FIXME. */
6675
6676 retval = (unsigned) (count * stmt_cost);
92345349 6677 cost[where] += retval;
c3e7ee41
BS
6678 }
6679
6680 return retval;
6681}
6682
6683/* Implement targetm.vectorize.finish_cost. */
6684
92345349
BS
6685static void
6686spu_finish_cost (void *data, unsigned *prologue_cost,
6687 unsigned *body_cost, unsigned *epilogue_cost)
c3e7ee41 6688{
92345349
BS
6689 unsigned *cost = (unsigned *) data;
6690 *prologue_cost = cost[vect_prologue];
6691 *body_cost = cost[vect_body];
6692 *epilogue_cost = cost[vect_epilogue];
c3e7ee41
BS
6693}
6694
6695/* Implement targetm.vectorize.destroy_cost_data. */
6696
cf0d4e15 6697static void
c3e7ee41
BS
6698spu_destroy_cost_data (void *data)
6699{
6700 free (data);
6701}
6702
99c9c69a
DN
6703/* Return true iff, data reference of TYPE can reach vector alignment (16)
6704 after applying N number of iterations. This routine does not determine
6705 how may iterations are required to reach desired alignment. */
6706
6707static bool
3101faab 6708spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
99c9c69a
DN
6709{
6710 if (is_packed)
6711 return false;
6712
6713 /* All other types are naturally aligned. */
6714 return true;
6715}
6716
299456f3 6717/* Return the appropriate mode for a named address pointer. */
095a2d76 6718static scalar_int_mode
299456f3
BE
6719spu_addr_space_pointer_mode (addr_space_t addrspace)
6720{
6721 switch (addrspace)
6722 {
6723 case ADDR_SPACE_GENERIC:
6724 return ptr_mode;
6725 case ADDR_SPACE_EA:
6726 return EAmode;
6727 default:
6728 gcc_unreachable ();
6729 }
6730}
6731
6732/* Return the appropriate mode for a named address address. */
095a2d76 6733static scalar_int_mode
299456f3
BE
6734spu_addr_space_address_mode (addr_space_t addrspace)
6735{
6736 switch (addrspace)
6737 {
6738 case ADDR_SPACE_GENERIC:
6739 return Pmode;
6740 case ADDR_SPACE_EA:
6741 return EAmode;
6742 default:
6743 gcc_unreachable ();
6744 }
6745}
6746
6747/* Determine if one named address space is a subset of another. */
6748
6749static bool
6750spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6751{
6752 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6753 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6754
6755 if (subset == superset)
6756 return true;
6757
6758 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6759 being subsets but instead as disjoint address spaces. */
6760 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6761 return false;
6762
6763 else
6764 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6765}
6766
6767/* Convert from one address space to another. */
6768static rtx
6769spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6770{
6771 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6772 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6773
6774 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6775 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6776
6777 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6778 {
6779 rtx result, ls;
6780
6781 ls = gen_const_mem (DImode,
6782 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6783 set_mem_align (ls, 128);
6784
6785 result = gen_reg_rtx (Pmode);
6786 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6787 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6788 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6789 ls, const0_rtx, Pmode, 1);
6790
6791 emit_insn (gen_subsi3 (result, op, ls));
6792
6793 return result;
6794 }
6795
6796 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6797 {
6798 rtx result, ls;
6799
6800 ls = gen_const_mem (DImode,
6801 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6802 set_mem_align (ls, 128);
6803
6804 result = gen_reg_rtx (EAmode);
6805 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6806 op = force_reg (Pmode, op);
6807 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6808 ls, const0_rtx, EAmode, 1);
6809 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6810
6811 if (EAmode == SImode)
6812 emit_insn (gen_addsi3 (result, op, ls));
6813 else
6814 emit_insn (gen_adddi3 (result, op, ls));
6815
6816 return result;
6817 }
6818
6819 else
6820 gcc_unreachable ();
6821}
6822
6823
67186a97
TS
6824/* Count the total number of instructions in each pipe and return the
6825 maximum, which is used as the Minimum Iteration Interval (MII)
6826 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6827 -2 are instructions that can go in pipe0 or pipe1. */
6828static int
6829spu_sms_res_mii (struct ddg *g)
6830{
6831 int i;
6832 unsigned t[4] = {0, 0, 0, 0};
6833
6834 for (i = 0; i < g->num_nodes; i++)
6835 {
23c39aaa 6836 rtx_insn *insn = g->nodes[i].insn;
67186a97
TS
6837 int p = get_pipe (insn) + 2;
6838
819bfe0e
JM
6839 gcc_assert (p >= 0);
6840 gcc_assert (p < 4);
67186a97
TS
6841
6842 t[p]++;
6843 if (dump_file && INSN_P (insn))
6844 fprintf (dump_file, "i%d %s %d %d\n",
6845 INSN_UID (insn),
6846 insn_data[INSN_CODE(insn)].name,
6847 p, t[p]);
6848 }
6849 if (dump_file)
6850 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6851
6852 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6853}
6854
6855
73701e27
TS
6856void
6857spu_init_expanders (void)
eec9405e 6858{
73701e27 6859 if (cfun)
eec9405e
TS
6860 {
6861 rtx r0, r1;
6862 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6863 frame_pointer_needed is true. We don't know that until we're
6864 expanding the prologue. */
6865 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6866
6867 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6868 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6869 to be treated as aligned, so generate them here. */
6870 r0 = gen_reg_rtx (SImode);
6871 r1 = gen_reg_rtx (SImode);
6872 mark_reg_pointer (r0, 128);
6873 mark_reg_pointer (r1, 128);
6874 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6875 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6876 }
7fecf2c7
AP
6877}
6878
095a2d76 6879static scalar_int_mode
7fecf2c7
AP
6880spu_libgcc_cmp_return_mode (void)
6881{
6882
6883/* For SPU word mode is TI mode so it is better to use SImode
6884 for compare returns. */
6885 return SImode;
6886}
6887
095a2d76 6888static scalar_int_mode
7fecf2c7
AP
6889spu_libgcc_shift_count_mode (void)
6890{
6891/* For SPU word mode is TI mode so it is better to use SImode
6892 for shift counts. */
6893 return SImode;
6894}
9dcc2e87 6895
500a1f85
UW
6896/* Implement targetm.section_type_flags. */
6897static unsigned int
6898spu_section_type_flags (tree decl, const char *name, int reloc)
6899{
6900 /* .toe needs to have type @nobits. */
6901 if (strcmp (name, ".toe") == 0)
6902 return SECTION_BSS;
299456f3
BE
6903 /* Don't load _ea into the current address space. */
6904 if (strcmp (name, "._ea") == 0)
6905 return SECTION_WRITE | SECTION_DEBUG;
500a1f85
UW
6906 return default_section_type_flags (decl, name, reloc);
6907}
4a3a2376 6908
299456f3
BE
6909/* Implement targetm.select_section. */
6910static section *
6911spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6912{
6913 /* Variables and constants defined in the __ea address space
6914 go into a special section named "._ea". */
6915 if (TREE_TYPE (decl) != error_mark_node
6916 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6917 {
6918 /* We might get called with string constants, but get_named_section
6919 doesn't like them as they are not DECLs. Also, we need to set
6920 flags in that case. */
6921 if (!DECL_P (decl))
6922 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6923
6924 return get_named_section (decl, "._ea", reloc);
6925 }
6926
6927 return default_elf_select_section (decl, reloc, align);
6928}
6929
6930/* Implement targetm.unique_section. */
6931static void
6932spu_unique_section (tree decl, int reloc)
6933{
6934 /* We don't support unique section names in the __ea address
6935 space for now. */
6936 if (TREE_TYPE (decl) != error_mark_node
6937 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6938 return;
6939
6940 default_unique_section (decl, reloc);
6941}
6942
5345cf68
TS
6943/* Generate a constant or register which contains 2^SCALE. We assume
6944 the result is valid for MODE. Currently, MODE must be V4SFmode and
6945 SCALE must be SImode. */
6946rtx
ef4bddc2 6947spu_gen_exp2 (machine_mode mode, rtx scale)
5345cf68
TS
6948{
6949 gcc_assert (mode == V4SFmode);
6950 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6951 if (GET_CODE (scale) != CONST_INT)
6952 {
6953 /* unsigned int exp = (127 + scale) << 23;
6954 __vector float m = (__vector float) spu_splats (exp); */
6955 rtx reg = force_reg (SImode, scale);
6956 rtx exp = gen_reg_rtx (SImode);
6957 rtx mul = gen_reg_rtx (mode);
6958 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6959 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6960 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6961 return mul;
6962 }
6963 else
6964 {
6965 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6966 unsigned char arr[16];
6967 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6968 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6969 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6970 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6971 return array_to_constant (mode, arr);
6972 }
6973}
6974
eec9405e
TS
6975/* After reload, just change the convert into a move instruction
6976 or a dead instruction. */
6977void
6978spu_split_convert (rtx ops[])
6979{
6980 if (REGNO (ops[0]) == REGNO (ops[1]))
6981 emit_note (NOTE_INSN_DELETED);
6982 else
6983 {
6984 /* Use TImode always as this might help hard reg copyprop. */
6985 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6986 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6987 emit_insn (gen_move_insn (op0, op1));
6988 }
6989}
6990
75741fed 6991void
d707fc77 6992spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
75741fed
KW
6993{
6994 fprintf (file, "# profile\n");
6995 fprintf (file, "brsl $75, _mcount\n");
6996}
6997
d4f2460a
UW
6998/* Implement targetm.ref_may_alias_errno. */
6999static bool
7000spu_ref_may_alias_errno (ao_ref *ref)
7001{
7002 tree base = ao_ref_base (ref);
7003
7004 /* With SPU newlib, errno is defined as something like
7005 _impure_data._errno
7006 The default implementation of this target macro does not
7007 recognize such expressions, so special-code for it here. */
7008
7009 if (TREE_CODE (base) == VAR_DECL
7010 && !TREE_STATIC (base)
7011 && DECL_EXTERNAL (base)
7012 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7013 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7014 "_impure_data") == 0
7015 /* _errno is the first member of _impure_data. */
7016 && ref->offset == 0)
7017 return true;
7018
7019 return default_ref_may_alias_errno (ref);
7020}
7021
07ea0048
UW
7022/* Output thunk to FILE that implements a C++ virtual function call (with
7023 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7024 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7025 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7026 relative to the resulting this pointer. */
7027
7028static void
7029spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7030 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7031 tree function)
7032{
7033 rtx op[8];
7034
7035 /* Make sure unwind info is emitted for the thunk if needed. */
7036 final_start_function (emit_barrier (), file, 1);
7037
7038 /* Operand 0 is the target function. */
7039 op[0] = XEXP (DECL_RTL (function), 0);
7040
7041 /* Operand 1 is the 'this' pointer. */
7042 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7043 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7044 else
7045 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7046
7047 /* Operands 2/3 are the low/high halfwords of delta. */
7048 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7049 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7050
7051 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7052 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7053 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7054
7055 /* Operands 6/7 are temporary registers. */
7056 op[6] = gen_rtx_REG (Pmode, 79);
7057 op[7] = gen_rtx_REG (Pmode, 78);
7058
7059 /* Add DELTA to this pointer. */
7060 if (delta)
7061 {
7062 if (delta >= -0x200 && delta < 0x200)
7063 output_asm_insn ("ai\t%1,%1,%2", op);
7064 else if (delta >= -0x8000 && delta < 0x8000)
7065 {
7066 output_asm_insn ("il\t%6,%2", op);
7067 output_asm_insn ("a\t%1,%1,%6", op);
7068 }
7069 else
7070 {
7071 output_asm_insn ("ilhu\t%6,%3", op);
7072 output_asm_insn ("iohl\t%6,%2", op);
7073 output_asm_insn ("a\t%1,%1,%6", op);
7074 }
7075 }
7076
7077 /* Perform vcall adjustment. */
7078 if (vcall_offset)
7079 {
7080 output_asm_insn ("lqd\t%7,0(%1)", op);
7081 output_asm_insn ("rotqby\t%7,%7,%1", op);
7082
7083 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7084 output_asm_insn ("ai\t%7,%7,%4", op);
7085 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7086 {
7087 output_asm_insn ("il\t%6,%4", op);
7088 output_asm_insn ("a\t%7,%7,%6", op);
7089 }
7090 else
7091 {
7092 output_asm_insn ("ilhu\t%6,%5", op);
7093 output_asm_insn ("iohl\t%6,%4", op);
7094 output_asm_insn ("a\t%7,%7,%6", op);
7095 }
7096
7097 output_asm_insn ("lqd\t%6,0(%7)", op);
7098 output_asm_insn ("rotqby\t%6,%6,%7", op);
7099 output_asm_insn ("a\t%1,%1,%6", op);
7100 }
7101
7102 /* Jump to target. */
7103 output_asm_insn ("br\t%0", op);
7104
7105 final_end_function ();
7106}
7107
c354951b
AK
7108/* Canonicalize a comparison from one we don't have to one we do have. */
7109static void
7110spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7111 bool op0_preserve_value)
7112{
7113 if (!op0_preserve_value
7114 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7115 {
7116 rtx tem = *op0;
7117 *op0 = *op1;
7118 *op1 = tem;
7119 *code = (int)swap_condition ((enum rtx_code)*code);
7120 }
7121}
99400eed
UW
7122
7123/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7124 to perform. MEM is the memory on which to operate. VAL is the second
7125 operand of the binary operator. BEFORE and AFTER are optional locations to
7126 return the value of MEM either before of after the operation. */
7127void
7128spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7129 rtx orig_before, rtx orig_after)
7130{
7131 machine_mode mode = GET_MODE (mem);
7132 rtx before = orig_before, after = orig_after;
7133
7134 if (before == NULL_RTX)
7135 before = gen_reg_rtx (mode);
7136
7137 emit_move_insn (before, mem);
7138
7139 if (code == MULT) /* NAND operation */
7140 {
7141 rtx x = expand_simple_binop (mode, AND, before, val,
7142 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7143 after = expand_simple_unop (mode, NOT, x, after, 1);
7144 }
7145 else
7146 {
7147 after = expand_simple_binop (mode, code, before, val,
7148 after, 1, OPTAB_LIB_WIDEN);
7149 }
7150
7151 emit_move_insn (mem, after);
7152
7153 if (orig_after && after != orig_after)
7154 emit_move_insn (orig_after, after);
7155}
7156
99e1629f
RS
7157/* Implement TARGET_MODES_TIEABLE_P. */
7158
7159static bool
7160spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7161{
7162 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7163 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7164}
0d803030
RS
7165
7166/* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7167 in the lowpart of a register, which is only true for SPU. */
7168
7169static bool
7170spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7171{
7172 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7173 || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7174 || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7175}
bb149ca2
RS
7176
7177/* Implement TARGET_TRULY_NOOP_TRUNCATION. */
7178
7179static bool
7180spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec)
7181{
7182 return inprec <= 32 && outprec <= inprec;
7183}
c9c72699
UW
7184\f
7185/* Table of machine attributes. */
7186static const struct attribute_spec spu_attribute_table[] =
7187{
7188 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7189 affects_type_identity } */
7190 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7191 false },
7192 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7193 false },
7194 { NULL, 0, 0, false, false, false, NULL, false }
7195};
7196
7197/* TARGET overrides. */
7198
d81db636
SB
7199#undef TARGET_LRA_P
7200#define TARGET_LRA_P hook_bool_void_false
7201
c9c72699
UW
7202#undef TARGET_ADDR_SPACE_POINTER_MODE
7203#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7204
7205#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7206#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7207
7208#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7209#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7210 spu_addr_space_legitimate_address_p
7211
7212#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7213#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7214
7215#undef TARGET_ADDR_SPACE_SUBSET_P
7216#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7217
7218#undef TARGET_ADDR_SPACE_CONVERT
7219#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7220
7221#undef TARGET_INIT_BUILTINS
7222#define TARGET_INIT_BUILTINS spu_init_builtins
7223#undef TARGET_BUILTIN_DECL
7224#define TARGET_BUILTIN_DECL spu_builtin_decl
7225
7226#undef TARGET_EXPAND_BUILTIN
7227#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7228
7229#undef TARGET_UNWIND_WORD_MODE
7230#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7231
7232#undef TARGET_LEGITIMIZE_ADDRESS
7233#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7234
7235/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7236 and .quad for the debugger. When it is known that the assembler is fixed,
7237 these can be removed. */
7238#undef TARGET_ASM_UNALIGNED_SI_OP
7239#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7240
7241#undef TARGET_ASM_ALIGNED_DI_OP
7242#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7243
7244/* The .8byte directive doesn't seem to work well for a 32 bit
7245 architecture. */
7246#undef TARGET_ASM_UNALIGNED_DI_OP
7247#define TARGET_ASM_UNALIGNED_DI_OP NULL
7248
7249#undef TARGET_RTX_COSTS
7250#define TARGET_RTX_COSTS spu_rtx_costs
7251
7252#undef TARGET_ADDRESS_COST
b413068c 7253#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
c9c72699
UW
7254
7255#undef TARGET_SCHED_ISSUE_RATE
7256#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7257
7258#undef TARGET_SCHED_INIT_GLOBAL
7259#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7260
7261#undef TARGET_SCHED_INIT
7262#define TARGET_SCHED_INIT spu_sched_init
7263
7264#undef TARGET_SCHED_VARIABLE_ISSUE
7265#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7266
7267#undef TARGET_SCHED_REORDER
7268#define TARGET_SCHED_REORDER spu_sched_reorder
7269
7270#undef TARGET_SCHED_REORDER2
7271#define TARGET_SCHED_REORDER2 spu_sched_reorder
7272
7273#undef TARGET_SCHED_ADJUST_COST
7274#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7275
7276#undef TARGET_ATTRIBUTE_TABLE
7277#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7278
7279#undef TARGET_ASM_INTEGER
7280#define TARGET_ASM_INTEGER spu_assemble_integer
7281
7282#undef TARGET_SCALAR_MODE_SUPPORTED_P
7283#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7284
7285#undef TARGET_VECTOR_MODE_SUPPORTED_P
7286#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7287
7288#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7289#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7290
7291#undef TARGET_ASM_GLOBALIZE_LABEL
7292#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7293
7294#undef TARGET_PASS_BY_REFERENCE
7295#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7296
7297#undef TARGET_FUNCTION_ARG
7298#define TARGET_FUNCTION_ARG spu_function_arg
7299
7300#undef TARGET_FUNCTION_ARG_ADVANCE
7301#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7302
76b0cbf8
RS
7303#undef TARGET_FUNCTION_ARG_PADDING
7304#define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7305
c9c72699
UW
7306#undef TARGET_MUST_PASS_IN_STACK
7307#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7308
7309#undef TARGET_BUILD_BUILTIN_VA_LIST
7310#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7311
7312#undef TARGET_EXPAND_BUILTIN_VA_START
7313#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7314
7315#undef TARGET_SETUP_INCOMING_VARARGS
7316#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7317
7318#undef TARGET_MACHINE_DEPENDENT_REORG
7319#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7320
7321#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7322#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7323
7324#undef TARGET_INIT_LIBFUNCS
7325#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7326
7327#undef TARGET_RETURN_IN_MEMORY
7328#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7329
7330#undef TARGET_ENCODE_SECTION_INFO
7331#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7332
7333#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7334#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7335
7336#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7337#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7338
7339#undef TARGET_VECTORIZE_INIT_COST
7340#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7341
7342#undef TARGET_VECTORIZE_ADD_STMT_COST
7343#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7344
7345#undef TARGET_VECTORIZE_FINISH_COST
7346#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7347
7348#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7349#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7350
7351#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7352#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7353
7354#undef TARGET_LIBGCC_CMP_RETURN_MODE
7355#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7356
7357#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7358#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7359
7360#undef TARGET_SCHED_SMS_RES_MII
7361#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7362
7363#undef TARGET_SECTION_TYPE_FLAGS
7364#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7365
7366#undef TARGET_ASM_SELECT_SECTION
7367#define TARGET_ASM_SELECT_SECTION spu_select_section
7368
7369#undef TARGET_ASM_UNIQUE_SECTION
7370#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7371
7372#undef TARGET_LEGITIMATE_ADDRESS_P
7373#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7374
7375#undef TARGET_LEGITIMATE_CONSTANT_P
7376#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7377
7378#undef TARGET_TRAMPOLINE_INIT
7379#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7380
d45eae79
SL
7381#undef TARGET_WARN_FUNC_RETURN
7382#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7383
c9c72699
UW
7384#undef TARGET_OPTION_OVERRIDE
7385#define TARGET_OPTION_OVERRIDE spu_option_override
7386
7387#undef TARGET_CONDITIONAL_REGISTER_USAGE
7388#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7389
7390#undef TARGET_REF_MAY_ALIAS_ERRNO
7391#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7392
7393#undef TARGET_ASM_OUTPUT_MI_THUNK
7394#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7395#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7396#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7397
7398/* Variable tracking should be run after all optimizations which
7399 change order of insns. It also needs a valid CFG. */
7400#undef TARGET_DELAY_VARTRACK
7401#define TARGET_DELAY_VARTRACK true
7402
c354951b
AK
7403#undef TARGET_CANONICALIZE_COMPARISON
7404#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7405
1d0216c8
RS
7406#undef TARGET_CAN_USE_DOLOOP_P
7407#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7408
99e1629f
RS
7409#undef TARGET_MODES_TIEABLE_P
7410#define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7411
c43f4279
RS
7412#undef TARGET_HARD_REGNO_NREGS
7413#define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7414
0d803030
RS
7415#undef TARGET_CAN_CHANGE_MODE_CLASS
7416#define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7417
bb149ca2
RS
7418#undef TARGET_TRULY_NOOP_TRUNCATION
7419#define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7420
c9c72699
UW
7421struct gcc_target targetm = TARGET_INITIALIZER;
7422
4a3a2376 7423#include "gt-spu.h"