]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* config/sparc/sparc.c (sparc_emit_probe_stack_range): Fix small
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
711789cc 1/* Copyright (C) 2006-2013 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
0b205f4c 37#include "diagnostic-core.h"
644459d0 38#include "ggc.h"
39#include "hashtab.h"
40#include "tm_p.h"
41#include "target.h"
42#include "target-def.h"
43#include "langhooks.h"
44#include "reload.h"
644459d0 45#include "sched-int.h"
46#include "params.h"
644459d0 47#include "machmode.h"
75a70cf9 48#include "gimple.h"
644459d0 49#include "tm-constrs.h"
d52fd16a 50#include "ddg.h"
5a976006 51#include "sbitmap.h"
52#include "timevar.h"
53#include "df.h"
b9ed1410 54#include "dumpfile.h"
a7a0184d 55#include "cfgloop.h"
6352eedf 56
57/* Builtin types, data and prototypes. */
c2233b46 58
59enum spu_builtin_type_index
60{
61 SPU_BTI_END_OF_PARAMS,
62
63 /* We create new type nodes for these. */
64 SPU_BTI_V16QI,
65 SPU_BTI_V8HI,
66 SPU_BTI_V4SI,
67 SPU_BTI_V2DI,
68 SPU_BTI_V4SF,
69 SPU_BTI_V2DF,
70 SPU_BTI_UV16QI,
71 SPU_BTI_UV8HI,
72 SPU_BTI_UV4SI,
73 SPU_BTI_UV2DI,
74
75 /* A 16-byte type. (Implemented with V16QI_type_node) */
76 SPU_BTI_QUADWORD,
77
78 /* These all correspond to intSI_type_node */
79 SPU_BTI_7,
80 SPU_BTI_S7,
81 SPU_BTI_U7,
82 SPU_BTI_S10,
83 SPU_BTI_S10_4,
84 SPU_BTI_U14,
85 SPU_BTI_16,
86 SPU_BTI_S16,
87 SPU_BTI_S16_2,
88 SPU_BTI_U16,
89 SPU_BTI_U16_2,
90 SPU_BTI_U18,
91
92 /* These correspond to the standard types */
93 SPU_BTI_INTQI,
94 SPU_BTI_INTHI,
95 SPU_BTI_INTSI,
96 SPU_BTI_INTDI,
97
98 SPU_BTI_UINTQI,
99 SPU_BTI_UINTHI,
100 SPU_BTI_UINTSI,
101 SPU_BTI_UINTDI,
102
103 SPU_BTI_FLOAT,
104 SPU_BTI_DOUBLE,
105
106 SPU_BTI_VOID,
107 SPU_BTI_PTR,
108
109 SPU_BTI_MAX
110};
111
112#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
113#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
114#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
115#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
116#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
117#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
118#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
119#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
120#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
121#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
122
123static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
124
6352eedf 125struct spu_builtin_range
126{
127 int low, high;
128};
129
130static struct spu_builtin_range spu_builtin_range[] = {
131 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
132 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
133 {0ll, 0x7fll}, /* SPU_BTI_U7 */
134 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
135 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
136 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
137 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
138 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
139 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
140 {0ll, 0xffffll}, /* SPU_BTI_U16 */
141 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
143};
144
644459d0 145\f
146/* Target specific attribute specifications. */
147char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
148
149/* Prototypes and external defs. */
644459d0 150static int get_pipe (rtx insn);
644459d0 151static int spu_naked_function_p (tree func);
644459d0 152static int mem_is_padded_component_ref (rtx x);
c7b91b14 153static void fix_range (const char *);
9d98604b 154static rtx spu_expand_load (rtx, rtx, rtx, int);
644459d0 155
5474166e 156/* Which instruction set architecture to use. */
157int spu_arch;
158/* Which cpu are we tuning for. */
159int spu_tune;
160
5a976006 161/* The hardware requires 8 insns between a hint and the branch it
162 effects. This variable describes how many rtl instructions the
163 compiler needs to see before inserting a hint, and then the compiler
164 will insert enough nops to make it at least 8 insns. The default is
165 for the compiler to allow up to 2 nops be emitted. The nops are
166 inserted in pairs, so we round down. */
167int spu_hint_dist = (8*4) - (2*4);
168
644459d0 169enum spu_immediate {
170 SPU_NONE,
171 SPU_IL,
172 SPU_ILA,
173 SPU_ILH,
174 SPU_ILHU,
175 SPU_ORI,
176 SPU_ORHI,
177 SPU_ORBI,
99369027 178 SPU_IOHL
644459d0 179};
dea01258 180enum immediate_class
181{
182 IC_POOL, /* constant pool */
183 IC_IL1, /* one il* instruction */
184 IC_IL2, /* both ilhu and iohl instructions */
185 IC_IL1s, /* one il* instruction */
186 IC_IL2s, /* both ilhu and iohl instructions */
187 IC_FSMBI, /* the fsmbi instruction */
188 IC_CPAT, /* one of the c*d instructions */
5df189be 189 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 190};
644459d0 191
192static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
193static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 194static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
195static enum immediate_class classify_immediate (rtx op,
196 enum machine_mode mode);
644459d0 197
6cf5579e 198/* Pointer mode for __ea references. */
199#define EAmode (spu_ea_model != 32 ? DImode : SImode)
200
ef51d1e3 201\f
5eb28709 202/* Define the structure for the machine field in struct function. */
203struct GTY(()) machine_function
204{
205 /* Register to use for PIC accesses. */
206 rtx pic_reg;
207};
208
209/* How to allocate a 'struct machine_function'. */
210static struct machine_function *
211spu_init_machine_status (void)
212{
213 return ggc_alloc_cleared_machine_function ();
214}
215
4c834714 216/* Implement TARGET_OPTION_OVERRIDE. */
217static void
218spu_option_override (void)
644459d0 219{
5eb28709 220 /* Set up function hooks. */
221 init_machine_status = spu_init_machine_status;
222
14d408d9 223 /* Small loops will be unpeeled at -O3. For SPU it is more important
224 to keep code small by default. */
686e2769 225 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 226 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 227 global_options.x_param_values,
228 global_options_set.x_param_values);
14d408d9 229
644459d0 230 flag_omit_frame_pointer = 1;
231
5a976006 232 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 233 if (align_functions < 8)
234 align_functions = 8;
c7b91b14 235
5a976006 236 spu_hint_dist = 8*4 - spu_max_nops*4;
237 if (spu_hint_dist < 0)
238 spu_hint_dist = 0;
239
c7b91b14 240 if (spu_fixed_range_string)
241 fix_range (spu_fixed_range_string);
5474166e 242
243 /* Determine processor architectural level. */
244 if (spu_arch_string)
245 {
246 if (strcmp (&spu_arch_string[0], "cell") == 0)
247 spu_arch = PROCESSOR_CELL;
248 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
249 spu_arch = PROCESSOR_CELLEDP;
250 else
8e181c9d 251 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 252 }
253
254 /* Determine processor to tune for. */
255 if (spu_tune_string)
256 {
257 if (strcmp (&spu_tune_string[0], "cell") == 0)
258 spu_tune = PROCESSOR_CELL;
259 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
260 spu_tune = PROCESSOR_CELLEDP;
261 else
8e181c9d 262 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 263 }
98bbec1e 264
13684256 265 /* Change defaults according to the processor architecture. */
266 if (spu_arch == PROCESSOR_CELLEDP)
267 {
268 /* If no command line option has been otherwise specified, change
269 the default to -mno-safe-hints on celledp -- only the original
270 Cell/B.E. processors require this workaround. */
271 if (!(target_flags_explicit & MASK_SAFE_HINTS))
272 target_flags &= ~MASK_SAFE_HINTS;
273 }
274
98bbec1e 275 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 276}
277\f
278/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
279 struct attribute_spec.handler. */
280
644459d0 281/* True if MODE is valid for the target. By "valid", we mean able to
282 be manipulated in non-trivial ways. In particular, this means all
283 the arithmetic is supported. */
284static bool
285spu_scalar_mode_supported_p (enum machine_mode mode)
286{
287 switch (mode)
288 {
289 case QImode:
290 case HImode:
291 case SImode:
292 case SFmode:
293 case DImode:
294 case TImode:
295 case DFmode:
296 return true;
297
298 default:
299 return false;
300 }
301}
302
303/* Similarly for vector modes. "Supported" here is less strict. At
304 least some operations are supported; need to check optabs or builtins
305 for further details. */
306static bool
307spu_vector_mode_supported_p (enum machine_mode mode)
308{
309 switch (mode)
310 {
311 case V16QImode:
312 case V8HImode:
313 case V4SImode:
314 case V2DImode:
315 case V4SFmode:
316 case V2DFmode:
317 return true;
318
319 default:
320 return false;
321 }
322}
323
324/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
325 least significant bytes of the outer mode. This function returns
326 TRUE for the SUBREG's where this is correct. */
327int
328valid_subreg (rtx op)
329{
330 enum machine_mode om = GET_MODE (op);
331 enum machine_mode im = GET_MODE (SUBREG_REG (op));
332 return om != VOIDmode && im != VOIDmode
333 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 334 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
335 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 336}
337
338/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 339 and adjust the start offset. */
644459d0 340static rtx
341adjust_operand (rtx op, HOST_WIDE_INT * start)
342{
343 enum machine_mode mode;
344 int op_size;
38aca5eb 345 /* Strip any paradoxical SUBREG. */
346 if (GET_CODE (op) == SUBREG
347 && (GET_MODE_BITSIZE (GET_MODE (op))
348 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 349 {
350 if (start)
351 *start -=
352 GET_MODE_BITSIZE (GET_MODE (op)) -
353 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
354 op = SUBREG_REG (op);
355 }
356 /* If it is smaller than SI, assure a SUBREG */
357 op_size = GET_MODE_BITSIZE (GET_MODE (op));
358 if (op_size < 32)
359 {
360 if (start)
361 *start += 32 - op_size;
362 op_size = 32;
363 }
364 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
365 mode = mode_for_size (op_size, MODE_INT, 0);
366 if (mode != GET_MODE (op))
367 op = gen_rtx_SUBREG (mode, op, 0);
368 return op;
369}
370
371void
372spu_expand_extv (rtx ops[], int unsignedp)
373{
9d98604b 374 rtx dst = ops[0], src = ops[1];
644459d0 375 HOST_WIDE_INT width = INTVAL (ops[2]);
376 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 377 HOST_WIDE_INT align_mask;
378 rtx s0, s1, mask, r0;
644459d0 379
9d98604b 380 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 381
9d98604b 382 if (MEM_P (src))
644459d0 383 {
9d98604b 384 /* First, determine if we need 1 TImode load or 2. We need only 1
385 if the bits being extracted do not cross the alignment boundary
386 as determined by the MEM and its address. */
387
388 align_mask = -MEM_ALIGN (src);
389 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 390 {
9d98604b 391 /* Alignment is sufficient for 1 load. */
392 s0 = gen_reg_rtx (TImode);
393 r0 = spu_expand_load (s0, 0, src, start / 8);
394 start &= 7;
395 if (r0)
396 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 397 }
9d98604b 398 else
399 {
400 /* Need 2 loads. */
401 s0 = gen_reg_rtx (TImode);
402 s1 = gen_reg_rtx (TImode);
403 r0 = spu_expand_load (s0, s1, src, start / 8);
404 start &= 7;
405
406 gcc_assert (start + width <= 128);
407 if (r0)
408 {
409 rtx r1 = gen_reg_rtx (SImode);
410 mask = gen_reg_rtx (TImode);
411 emit_move_insn (mask, GEN_INT (-1));
412 emit_insn (gen_rotqby_ti (s0, s0, r0));
413 emit_insn (gen_rotqby_ti (s1, s1, r0));
414 if (GET_CODE (r0) == CONST_INT)
415 r1 = GEN_INT (INTVAL (r0) & 15);
416 else
417 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
418 emit_insn (gen_shlqby_ti (mask, mask, r1));
419 emit_insn (gen_selb (s0, s1, s0, mask));
420 }
421 }
422
423 }
424 else if (GET_CODE (src) == SUBREG)
425 {
426 rtx r = SUBREG_REG (src);
427 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
428 s0 = gen_reg_rtx (TImode);
429 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
430 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
431 else
432 emit_move_insn (s0, src);
433 }
434 else
435 {
436 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
437 s0 = gen_reg_rtx (TImode);
438 emit_move_insn (s0, src);
644459d0 439 }
440
9d98604b 441 /* Now s0 is TImode and contains the bits to extract at start. */
442
443 if (start)
444 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
445
446 if (128 - width)
f5ff0b21 447 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 448
9d98604b 449 emit_move_insn (dst, s0);
644459d0 450}
451
452void
453spu_expand_insv (rtx ops[])
454{
455 HOST_WIDE_INT width = INTVAL (ops[1]);
456 HOST_WIDE_INT start = INTVAL (ops[2]);
457 HOST_WIDE_INT maskbits;
4cbad5bb 458 enum machine_mode dst_mode;
644459d0 459 rtx dst = ops[0], src = ops[3];
4cbad5bb 460 int dst_size;
644459d0 461 rtx mask;
462 rtx shift_reg;
463 int shift;
464
465
466 if (GET_CODE (ops[0]) == MEM)
467 dst = gen_reg_rtx (TImode);
468 else
469 dst = adjust_operand (dst, &start);
470 dst_mode = GET_MODE (dst);
471 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
472
473 if (CONSTANT_P (src))
474 {
475 enum machine_mode m =
476 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
477 src = force_reg (m, convert_to_mode (m, src, 0));
478 }
479 src = adjust_operand (src, 0);
644459d0 480
481 mask = gen_reg_rtx (dst_mode);
482 shift_reg = gen_reg_rtx (dst_mode);
483 shift = dst_size - start - width;
484
485 /* It's not safe to use subreg here because the compiler assumes
486 that the SUBREG_REG is right justified in the SUBREG. */
487 convert_move (shift_reg, src, 1);
488
489 if (shift > 0)
490 {
491 switch (dst_mode)
492 {
493 case SImode:
494 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
495 break;
496 case DImode:
497 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
498 break;
499 case TImode:
500 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
501 break;
502 default:
503 abort ();
504 }
505 }
506 else if (shift < 0)
507 abort ();
508
509 switch (dst_size)
510 {
511 case 32:
512 maskbits = (-1ll << (32 - width - start));
513 if (start)
514 maskbits += (1ll << (32 - start));
515 emit_move_insn (mask, GEN_INT (maskbits));
516 break;
517 case 64:
518 maskbits = (-1ll << (64 - width - start));
519 if (start)
520 maskbits += (1ll << (64 - start));
521 emit_move_insn (mask, GEN_INT (maskbits));
522 break;
523 case 128:
524 {
525 unsigned char arr[16];
526 int i = start / 8;
527 memset (arr, 0, sizeof (arr));
528 arr[i] = 0xff >> (start & 7);
529 for (i++; i <= (start + width - 1) / 8; i++)
530 arr[i] = 0xff;
531 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
532 emit_move_insn (mask, array_to_constant (TImode, arr));
533 }
534 break;
535 default:
536 abort ();
537 }
538 if (GET_CODE (ops[0]) == MEM)
539 {
644459d0 540 rtx low = gen_reg_rtx (SImode);
644459d0 541 rtx rotl = gen_reg_rtx (SImode);
542 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 543 rtx addr;
544 rtx addr0;
545 rtx addr1;
644459d0 546 rtx mem;
547
9d98604b 548 addr = force_reg (Pmode, XEXP (ops[0], 0));
549 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 550 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
551 emit_insn (gen_negsi2 (rotl, low));
552 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
553 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 554 mem = change_address (ops[0], TImode, addr0);
644459d0 555 set_mem_alias_set (mem, 0);
556 emit_move_insn (dst, mem);
557 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 558 if (start + width > MEM_ALIGN (ops[0]))
559 {
560 rtx shl = gen_reg_rtx (SImode);
561 rtx mask1 = gen_reg_rtx (TImode);
562 rtx dst1 = gen_reg_rtx (TImode);
563 rtx mem1;
29c05e22 564 addr1 = plus_constant (Pmode, addr, 16);
9d98604b 565 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 566 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
567 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 568 mem1 = change_address (ops[0], TImode, addr1);
644459d0 569 set_mem_alias_set (mem1, 0);
570 emit_move_insn (dst1, mem1);
571 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
572 emit_move_insn (mem1, dst1);
573 }
9d98604b 574 emit_move_insn (mem, dst);
644459d0 575 }
576 else
71cd778d 577 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 578}
579
580
581int
582spu_expand_block_move (rtx ops[])
583{
584 HOST_WIDE_INT bytes, align, offset;
585 rtx src, dst, sreg, dreg, target;
586 int i;
587 if (GET_CODE (ops[2]) != CONST_INT
588 || GET_CODE (ops[3]) != CONST_INT
48eb4342 589 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 590 return 0;
591
592 bytes = INTVAL (ops[2]);
593 align = INTVAL (ops[3]);
594
595 if (bytes <= 0)
596 return 1;
597
598 dst = ops[0];
599 src = ops[1];
600
601 if (align == 16)
602 {
603 for (offset = 0; offset + 16 <= bytes; offset += 16)
604 {
605 dst = adjust_address (ops[0], V16QImode, offset);
606 src = adjust_address (ops[1], V16QImode, offset);
607 emit_move_insn (dst, src);
608 }
609 if (offset < bytes)
610 {
611 rtx mask;
612 unsigned char arr[16] = { 0 };
613 for (i = 0; i < bytes - offset; i++)
614 arr[i] = 0xff;
615 dst = adjust_address (ops[0], V16QImode, offset);
616 src = adjust_address (ops[1], V16QImode, offset);
617 mask = gen_reg_rtx (V16QImode);
618 sreg = gen_reg_rtx (V16QImode);
619 dreg = gen_reg_rtx (V16QImode);
620 target = gen_reg_rtx (V16QImode);
621 emit_move_insn (mask, array_to_constant (V16QImode, arr));
622 emit_move_insn (dreg, dst);
623 emit_move_insn (sreg, src);
624 emit_insn (gen_selb (target, dreg, sreg, mask));
625 emit_move_insn (dst, target);
626 }
627 return 1;
628 }
629 return 0;
630}
631
632enum spu_comp_code
633{ SPU_EQ, SPU_GT, SPU_GTU };
634
5474166e 635int spu_comp_icode[12][3] = {
636 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
637 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
638 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
639 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
640 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
641 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
642 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
643 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
644 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
645 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
646 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
647 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 648};
649
650/* Generate a compare for CODE. Return a brand-new rtx that represents
651 the result of the compare. GCC can figure this out too if we don't
652 provide all variations of compares, but GCC always wants to use
653 WORD_MODE, we can generate better code in most cases if we do it
654 ourselves. */
655void
74f4459c 656spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 657{
658 int reverse_compare = 0;
659 int reverse_test = 0;
5d70b918 660 rtx compare_result, eq_result;
661 rtx comp_rtx, eq_rtx;
644459d0 662 enum machine_mode comp_mode;
663 enum machine_mode op_mode;
b9c74b4d 664 enum spu_comp_code scode, eq_code;
665 enum insn_code ior_code;
74f4459c 666 enum rtx_code code = GET_CODE (cmp);
667 rtx op0 = XEXP (cmp, 0);
668 rtx op1 = XEXP (cmp, 1);
644459d0 669 int index;
5d70b918 670 int eq_test = 0;
644459d0 671
74f4459c 672 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 673 and so on, to keep the constant in operand 1. */
74f4459c 674 if (GET_CODE (op1) == CONST_INT)
644459d0 675 {
74f4459c 676 HOST_WIDE_INT val = INTVAL (op1) - 1;
677 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 678 switch (code)
679 {
680 case GE:
74f4459c 681 op1 = GEN_INT (val);
644459d0 682 code = GT;
683 break;
684 case LT:
74f4459c 685 op1 = GEN_INT (val);
644459d0 686 code = LE;
687 break;
688 case GEU:
74f4459c 689 op1 = GEN_INT (val);
644459d0 690 code = GTU;
691 break;
692 case LTU:
74f4459c 693 op1 = GEN_INT (val);
644459d0 694 code = LEU;
695 break;
696 default:
697 break;
698 }
699 }
700
686195ea 701 /* However, if we generate an integer result, performing a reverse test
702 would require an extra negation, so avoid that where possible. */
703 if (GET_CODE (op1) == CONST_INT && is_set == 1)
704 {
705 HOST_WIDE_INT val = INTVAL (op1) + 1;
706 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
707 switch (code)
708 {
709 case LE:
710 op1 = GEN_INT (val);
711 code = LT;
712 break;
713 case LEU:
714 op1 = GEN_INT (val);
715 code = LTU;
716 break;
717 default:
718 break;
719 }
720 }
721
5d70b918 722 comp_mode = SImode;
74f4459c 723 op_mode = GET_MODE (op0);
5d70b918 724
644459d0 725 switch (code)
726 {
727 case GE:
644459d0 728 scode = SPU_GT;
07027691 729 if (HONOR_NANS (op_mode))
5d70b918 730 {
731 reverse_compare = 0;
732 reverse_test = 0;
733 eq_test = 1;
734 eq_code = SPU_EQ;
735 }
736 else
737 {
738 reverse_compare = 1;
739 reverse_test = 1;
740 }
644459d0 741 break;
742 case LE:
644459d0 743 scode = SPU_GT;
07027691 744 if (HONOR_NANS (op_mode))
5d70b918 745 {
746 reverse_compare = 1;
747 reverse_test = 0;
748 eq_test = 1;
749 eq_code = SPU_EQ;
750 }
751 else
752 {
753 reverse_compare = 0;
754 reverse_test = 1;
755 }
644459d0 756 break;
757 case LT:
758 reverse_compare = 1;
759 reverse_test = 0;
760 scode = SPU_GT;
761 break;
762 case GEU:
763 reverse_compare = 1;
764 reverse_test = 1;
765 scode = SPU_GTU;
766 break;
767 case LEU:
768 reverse_compare = 0;
769 reverse_test = 1;
770 scode = SPU_GTU;
771 break;
772 case LTU:
773 reverse_compare = 1;
774 reverse_test = 0;
775 scode = SPU_GTU;
776 break;
777 case NE:
778 reverse_compare = 0;
779 reverse_test = 1;
780 scode = SPU_EQ;
781 break;
782
783 case EQ:
784 scode = SPU_EQ;
785 break;
786 case GT:
787 scode = SPU_GT;
788 break;
789 case GTU:
790 scode = SPU_GTU;
791 break;
792 default:
793 scode = SPU_EQ;
794 break;
795 }
796
644459d0 797 switch (op_mode)
798 {
799 case QImode:
800 index = 0;
801 comp_mode = QImode;
802 break;
803 case HImode:
804 index = 1;
805 comp_mode = HImode;
806 break;
807 case SImode:
808 index = 2;
809 break;
810 case DImode:
811 index = 3;
812 break;
813 case TImode:
814 index = 4;
815 break;
816 case SFmode:
817 index = 5;
818 break;
819 case DFmode:
820 index = 6;
821 break;
822 case V16QImode:
5474166e 823 index = 7;
824 comp_mode = op_mode;
825 break;
644459d0 826 case V8HImode:
5474166e 827 index = 8;
828 comp_mode = op_mode;
829 break;
644459d0 830 case V4SImode:
5474166e 831 index = 9;
832 comp_mode = op_mode;
833 break;
644459d0 834 case V4SFmode:
5474166e 835 index = 10;
836 comp_mode = V4SImode;
837 break;
644459d0 838 case V2DFmode:
5474166e 839 index = 11;
840 comp_mode = V2DImode;
644459d0 841 break;
5474166e 842 case V2DImode:
644459d0 843 default:
844 abort ();
845 }
846
74f4459c 847 if (GET_MODE (op1) == DFmode
07027691 848 && (scode != SPU_GT && scode != SPU_EQ))
849 abort ();
644459d0 850
74f4459c 851 if (is_set == 0 && op1 == const0_rtx
852 && (GET_MODE (op0) == SImode
686195ea 853 || GET_MODE (op0) == HImode
854 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
644459d0 855 {
856 /* Don't need to set a register with the result when we are
857 comparing against zero and branching. */
858 reverse_test = !reverse_test;
74f4459c 859 compare_result = op0;
644459d0 860 }
861 else
862 {
863 compare_result = gen_reg_rtx (comp_mode);
864
865 if (reverse_compare)
866 {
74f4459c 867 rtx t = op1;
868 op1 = op0;
869 op0 = t;
644459d0 870 }
871
872 if (spu_comp_icode[index][scode] == 0)
873 abort ();
874
875 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 876 (op0, op_mode))
877 op0 = force_reg (op_mode, op0);
644459d0 878 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 879 (op1, op_mode))
880 op1 = force_reg (op_mode, op1);
644459d0 881 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 882 op0, op1);
644459d0 883 if (comp_rtx == 0)
884 abort ();
885 emit_insn (comp_rtx);
886
5d70b918 887 if (eq_test)
888 {
889 eq_result = gen_reg_rtx (comp_mode);
890 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 891 op0, op1);
5d70b918 892 if (eq_rtx == 0)
893 abort ();
894 emit_insn (eq_rtx);
d6bf3b14 895 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 896 gcc_assert (ior_code != CODE_FOR_nothing);
897 emit_insn (GEN_FCN (ior_code)
898 (compare_result, compare_result, eq_result));
899 }
644459d0 900 }
901
902 if (is_set == 0)
903 {
904 rtx bcomp;
905 rtx loc_ref;
906
907 /* We don't have branch on QI compare insns, so we convert the
908 QI compare result to a HI result. */
909 if (comp_mode == QImode)
910 {
911 rtx old_res = compare_result;
912 compare_result = gen_reg_rtx (HImode);
913 comp_mode = HImode;
914 emit_insn (gen_extendqihi2 (compare_result, old_res));
915 }
916
917 if (reverse_test)
918 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
919 else
920 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
921
74f4459c 922 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 923 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
924 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
925 loc_ref, pc_rtx)));
926 }
927 else if (is_set == 2)
928 {
74f4459c 929 rtx target = operands[0];
644459d0 930 int compare_size = GET_MODE_BITSIZE (comp_mode);
931 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
932 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
933 rtx select_mask;
934 rtx op_t = operands[2];
935 rtx op_f = operands[3];
936
937 /* The result of the comparison can be SI, HI or QI mode. Create a
938 mask based on that result. */
939 if (target_size > compare_size)
940 {
941 select_mask = gen_reg_rtx (mode);
942 emit_insn (gen_extend_compare (select_mask, compare_result));
943 }
944 else if (target_size < compare_size)
945 select_mask =
946 gen_rtx_SUBREG (mode, compare_result,
947 (compare_size - target_size) / BITS_PER_UNIT);
948 else if (comp_mode != mode)
949 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
950 else
951 select_mask = compare_result;
952
953 if (GET_MODE (target) != GET_MODE (op_t)
954 || GET_MODE (target) != GET_MODE (op_f))
955 abort ();
956
957 if (reverse_test)
958 emit_insn (gen_selb (target, op_t, op_f, select_mask));
959 else
960 emit_insn (gen_selb (target, op_f, op_t, select_mask));
961 }
962 else
963 {
74f4459c 964 rtx target = operands[0];
644459d0 965 if (reverse_test)
966 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
967 gen_rtx_NOT (comp_mode, compare_result)));
968 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
969 emit_insn (gen_extendhisi2 (target, compare_result));
970 else if (GET_MODE (target) == SImode
971 && GET_MODE (compare_result) == QImode)
972 emit_insn (gen_extend_compare (target, compare_result));
973 else
974 emit_move_insn (target, compare_result);
975 }
976}
977
978HOST_WIDE_INT
979const_double_to_hwint (rtx x)
980{
981 HOST_WIDE_INT val;
982 REAL_VALUE_TYPE rv;
983 if (GET_MODE (x) == SFmode)
984 {
985 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
986 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
987 }
988 else if (GET_MODE (x) == DFmode)
989 {
990 long l[2];
991 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
992 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
993 val = l[0];
994 val = (val << 32) | (l[1] & 0xffffffff);
995 }
996 else
997 abort ();
998 return val;
999}
1000
1001rtx
1002hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1003{
1004 long tv[2];
1005 REAL_VALUE_TYPE rv;
1006 gcc_assert (mode == SFmode || mode == DFmode);
1007
1008 if (mode == SFmode)
1009 tv[0] = (v << 32) >> 32;
1010 else if (mode == DFmode)
1011 {
1012 tv[1] = (v << 32) >> 32;
1013 tv[0] = v >> 32;
1014 }
1015 real_from_target (&rv, tv, mode);
1016 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1017}
1018
1019void
1020print_operand_address (FILE * file, register rtx addr)
1021{
1022 rtx reg;
1023 rtx offset;
1024
e04cf423 1025 if (GET_CODE (addr) == AND
1026 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1027 && INTVAL (XEXP (addr, 1)) == -16)
1028 addr = XEXP (addr, 0);
1029
644459d0 1030 switch (GET_CODE (addr))
1031 {
1032 case REG:
1033 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1034 break;
1035
1036 case PLUS:
1037 reg = XEXP (addr, 0);
1038 offset = XEXP (addr, 1);
1039 if (GET_CODE (offset) == REG)
1040 {
1041 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1042 reg_names[REGNO (offset)]);
1043 }
1044 else if (GET_CODE (offset) == CONST_INT)
1045 {
1046 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1047 INTVAL (offset), reg_names[REGNO (reg)]);
1048 }
1049 else
1050 abort ();
1051 break;
1052
1053 case CONST:
1054 case LABEL_REF:
1055 case SYMBOL_REF:
1056 case CONST_INT:
1057 output_addr_const (file, addr);
1058 break;
1059
1060 default:
1061 debug_rtx (addr);
1062 abort ();
1063 }
1064}
1065
1066void
1067print_operand (FILE * file, rtx x, int code)
1068{
1069 enum machine_mode mode = GET_MODE (x);
1070 HOST_WIDE_INT val;
1071 unsigned char arr[16];
1072 int xcode = GET_CODE (x);
dea01258 1073 int i, info;
644459d0 1074 if (GET_MODE (x) == VOIDmode)
1075 switch (code)
1076 {
644459d0 1077 case 'L': /* 128 bits, signed */
1078 case 'm': /* 128 bits, signed */
1079 case 'T': /* 128 bits, signed */
1080 case 't': /* 128 bits, signed */
1081 mode = TImode;
1082 break;
644459d0 1083 case 'K': /* 64 bits, signed */
1084 case 'k': /* 64 bits, signed */
1085 case 'D': /* 64 bits, signed */
1086 case 'd': /* 64 bits, signed */
1087 mode = DImode;
1088 break;
644459d0 1089 case 'J': /* 32 bits, signed */
1090 case 'j': /* 32 bits, signed */
1091 case 's': /* 32 bits, signed */
1092 case 'S': /* 32 bits, signed */
1093 mode = SImode;
1094 break;
1095 }
1096 switch (code)
1097 {
1098
1099 case 'j': /* 32 bits, signed */
1100 case 'k': /* 64 bits, signed */
1101 case 'm': /* 128 bits, signed */
1102 if (xcode == CONST_INT
1103 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1104 {
1105 gcc_assert (logical_immediate_p (x, mode));
1106 constant_to_array (mode, x, arr);
1107 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1108 val = trunc_int_for_mode (val, SImode);
1109 switch (which_logical_immediate (val))
1110 {
1111 case SPU_ORI:
1112 break;
1113 case SPU_ORHI:
1114 fprintf (file, "h");
1115 break;
1116 case SPU_ORBI:
1117 fprintf (file, "b");
1118 break;
1119 default:
1120 gcc_unreachable();
1121 }
1122 }
1123 else
1124 gcc_unreachable();
1125 return;
1126
1127 case 'J': /* 32 bits, signed */
1128 case 'K': /* 64 bits, signed */
1129 case 'L': /* 128 bits, signed */
1130 if (xcode == CONST_INT
1131 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1132 {
1133 gcc_assert (logical_immediate_p (x, mode)
1134 || iohl_immediate_p (x, mode));
1135 constant_to_array (mode, x, arr);
1136 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1137 val = trunc_int_for_mode (val, SImode);
1138 switch (which_logical_immediate (val))
1139 {
1140 case SPU_ORI:
1141 case SPU_IOHL:
1142 break;
1143 case SPU_ORHI:
1144 val = trunc_int_for_mode (val, HImode);
1145 break;
1146 case SPU_ORBI:
1147 val = trunc_int_for_mode (val, QImode);
1148 break;
1149 default:
1150 gcc_unreachable();
1151 }
1152 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1153 }
1154 else
1155 gcc_unreachable();
1156 return;
1157
1158 case 't': /* 128 bits, signed */
1159 case 'd': /* 64 bits, signed */
1160 case 's': /* 32 bits, signed */
dea01258 1161 if (CONSTANT_P (x))
644459d0 1162 {
dea01258 1163 enum immediate_class c = classify_immediate (x, mode);
1164 switch (c)
1165 {
1166 case IC_IL1:
1167 constant_to_array (mode, x, arr);
1168 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1169 val = trunc_int_for_mode (val, SImode);
1170 switch (which_immediate_load (val))
1171 {
1172 case SPU_IL:
1173 break;
1174 case SPU_ILA:
1175 fprintf (file, "a");
1176 break;
1177 case SPU_ILH:
1178 fprintf (file, "h");
1179 break;
1180 case SPU_ILHU:
1181 fprintf (file, "hu");
1182 break;
1183 default:
1184 gcc_unreachable ();
1185 }
1186 break;
1187 case IC_CPAT:
1188 constant_to_array (mode, x, arr);
1189 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1190 if (info == 1)
1191 fprintf (file, "b");
1192 else if (info == 2)
1193 fprintf (file, "h");
1194 else if (info == 4)
1195 fprintf (file, "w");
1196 else if (info == 8)
1197 fprintf (file, "d");
1198 break;
1199 case IC_IL1s:
1200 if (xcode == CONST_VECTOR)
1201 {
1202 x = CONST_VECTOR_ELT (x, 0);
1203 xcode = GET_CODE (x);
1204 }
1205 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1206 fprintf (file, "a");
1207 else if (xcode == HIGH)
1208 fprintf (file, "hu");
1209 break;
1210 case IC_FSMBI:
5df189be 1211 case IC_FSMBI2:
dea01258 1212 case IC_IL2:
1213 case IC_IL2s:
1214 case IC_POOL:
1215 abort ();
1216 }
644459d0 1217 }
644459d0 1218 else
1219 gcc_unreachable ();
1220 return;
1221
1222 case 'T': /* 128 bits, signed */
1223 case 'D': /* 64 bits, signed */
1224 case 'S': /* 32 bits, signed */
dea01258 1225 if (CONSTANT_P (x))
644459d0 1226 {
dea01258 1227 enum immediate_class c = classify_immediate (x, mode);
1228 switch (c)
644459d0 1229 {
dea01258 1230 case IC_IL1:
1231 constant_to_array (mode, x, arr);
1232 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1233 val = trunc_int_for_mode (val, SImode);
1234 switch (which_immediate_load (val))
1235 {
1236 case SPU_IL:
1237 case SPU_ILA:
1238 break;
1239 case SPU_ILH:
1240 case SPU_ILHU:
1241 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1242 break;
1243 default:
1244 gcc_unreachable ();
1245 }
1246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1247 break;
1248 case IC_FSMBI:
1249 constant_to_array (mode, x, arr);
1250 val = 0;
1251 for (i = 0; i < 16; i++)
1252 {
1253 val <<= 1;
1254 val |= arr[i] & 1;
1255 }
1256 print_operand (file, GEN_INT (val), 0);
1257 break;
1258 case IC_CPAT:
1259 constant_to_array (mode, x, arr);
1260 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1262 break;
dea01258 1263 case IC_IL1s:
dea01258 1264 if (xcode == HIGH)
5df189be 1265 x = XEXP (x, 0);
1266 if (GET_CODE (x) == CONST_VECTOR)
1267 x = CONST_VECTOR_ELT (x, 0);
1268 output_addr_const (file, x);
1269 if (xcode == HIGH)
1270 fprintf (file, "@h");
644459d0 1271 break;
dea01258 1272 case IC_IL2:
1273 case IC_IL2s:
5df189be 1274 case IC_FSMBI2:
dea01258 1275 case IC_POOL:
1276 abort ();
644459d0 1277 }
c8befdb9 1278 }
644459d0 1279 else
1280 gcc_unreachable ();
1281 return;
1282
644459d0 1283 case 'C':
1284 if (xcode == CONST_INT)
1285 {
1286 /* Only 4 least significant bits are relevant for generate
1287 control word instructions. */
1288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1289 return;
1290 }
1291 break;
1292
1293 case 'M': /* print code for c*d */
1294 if (GET_CODE (x) == CONST_INT)
1295 switch (INTVAL (x))
1296 {
1297 case 1:
1298 fprintf (file, "b");
1299 break;
1300 case 2:
1301 fprintf (file, "h");
1302 break;
1303 case 4:
1304 fprintf (file, "w");
1305 break;
1306 case 8:
1307 fprintf (file, "d");
1308 break;
1309 default:
1310 gcc_unreachable();
1311 }
1312 else
1313 gcc_unreachable();
1314 return;
1315
1316 case 'N': /* Negate the operand */
1317 if (xcode == CONST_INT)
1318 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1319 else if (xcode == CONST_VECTOR)
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1321 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1322 return;
1323
1324 case 'I': /* enable/disable interrupts */
1325 if (xcode == CONST_INT)
1326 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1327 return;
1328
1329 case 'b': /* branch modifiers */
1330 if (xcode == REG)
1331 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1332 else if (COMPARISON_P (x))
1333 fprintf (file, "%s", xcode == NE ? "n" : "");
1334 return;
1335
1336 case 'i': /* indirect call */
1337 if (xcode == MEM)
1338 {
1339 if (GET_CODE (XEXP (x, 0)) == REG)
1340 /* Used in indirect function calls. */
1341 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1342 else
1343 output_address (XEXP (x, 0));
1344 }
1345 return;
1346
1347 case 'p': /* load/store */
1348 if (xcode == MEM)
1349 {
1350 x = XEXP (x, 0);
1351 xcode = GET_CODE (x);
1352 }
e04cf423 1353 if (xcode == AND)
1354 {
1355 x = XEXP (x, 0);
1356 xcode = GET_CODE (x);
1357 }
644459d0 1358 if (xcode == REG)
1359 fprintf (file, "d");
1360 else if (xcode == CONST_INT)
1361 fprintf (file, "a");
1362 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1363 fprintf (file, "r");
1364 else if (xcode == PLUS || xcode == LO_SUM)
1365 {
1366 if (GET_CODE (XEXP (x, 1)) == REG)
1367 fprintf (file, "x");
1368 else
1369 fprintf (file, "d");
1370 }
1371 return;
1372
5df189be 1373 case 'e':
1374 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1375 val &= 0x7;
1376 output_addr_const (file, GEN_INT (val));
1377 return;
1378
1379 case 'f':
1380 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1381 val &= 0x1f;
1382 output_addr_const (file, GEN_INT (val));
1383 return;
1384
1385 case 'g':
1386 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1387 val &= 0x3f;
1388 output_addr_const (file, GEN_INT (val));
1389 return;
1390
1391 case 'h':
1392 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1393 val = (val >> 3) & 0x1f;
1394 output_addr_const (file, GEN_INT (val));
1395 return;
1396
1397 case 'E':
1398 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1399 val = -val;
1400 val &= 0x7;
1401 output_addr_const (file, GEN_INT (val));
1402 return;
1403
1404 case 'F':
1405 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1406 val = -val;
1407 val &= 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'G':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val = -val;
1414 val &= 0x3f;
1415 output_addr_const (file, GEN_INT (val));
1416 return;
1417
1418 case 'H':
1419 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420 val = -(val & -8ll);
1421 val = (val >> 3) & 0x1f;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1424
56c7bfc2 1425 case 'v':
1426 case 'w':
1427 constant_to_array (mode, x, arr);
1428 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1429 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1430 return;
1431
644459d0 1432 case 0:
1433 if (xcode == REG)
1434 fprintf (file, "%s", reg_names[REGNO (x)]);
1435 else if (xcode == MEM)
1436 output_address (XEXP (x, 0));
1437 else if (xcode == CONST_VECTOR)
dea01258 1438 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1439 else
1440 output_addr_const (file, x);
1441 return;
1442
f6a0d06f 1443 /* unused letters
56c7bfc2 1444 o qr u yz
5df189be 1445 AB OPQR UVWXYZ */
644459d0 1446 default:
1447 output_operand_lossage ("invalid %%xn code");
1448 }
1449 gcc_unreachable ();
1450}
1451
644459d0 1452/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1453 caller saved register. For leaf functions it is more efficient to
1454 use a volatile register because we won't need to save and restore the
1455 pic register. This routine is only valid after register allocation
1456 is completed, so we can pick an unused register. */
1457static rtx
1458get_pic_reg (void)
1459{
644459d0 1460 if (!reload_completed && !reload_in_progress)
1461 abort ();
5eb28709 1462
1463 /* If we've already made the decision, we need to keep with it. Once we've
1464 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1465 return true since the register is now live; this should not cause us to
1466 "switch back" to using pic_offset_table_rtx. */
1467 if (!cfun->machine->pic_reg)
1468 {
d5bf7b64 1469 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
5eb28709 1470 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1471 else
1472 cfun->machine->pic_reg = pic_offset_table_rtx;
1473 }
1474
1475 return cfun->machine->pic_reg;
644459d0 1476}
1477
5df189be 1478/* Split constant addresses to handle cases that are too large.
1479 Add in the pic register when in PIC mode.
1480 Split immediates that require more than 1 instruction. */
dea01258 1481int
1482spu_split_immediate (rtx * ops)
c8befdb9 1483{
dea01258 1484 enum machine_mode mode = GET_MODE (ops[0]);
1485 enum immediate_class c = classify_immediate (ops[1], mode);
1486
1487 switch (c)
c8befdb9 1488 {
dea01258 1489 case IC_IL2:
1490 {
1491 unsigned char arrhi[16];
1492 unsigned char arrlo[16];
98bbec1e 1493 rtx to, temp, hi, lo;
dea01258 1494 int i;
98bbec1e 1495 enum machine_mode imode = mode;
1496 /* We need to do reals as ints because the constant used in the
1497 IOR might not be a legitimate real constant. */
1498 imode = int_mode_for_mode (mode);
dea01258 1499 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1500 if (imode != mode)
1501 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1502 else
1503 to = ops[0];
1504 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1505 for (i = 0; i < 16; i += 4)
1506 {
1507 arrlo[i + 2] = arrhi[i + 2];
1508 arrlo[i + 3] = arrhi[i + 3];
1509 arrlo[i + 0] = arrlo[i + 1] = 0;
1510 arrhi[i + 2] = arrhi[i + 3] = 0;
1511 }
98bbec1e 1512 hi = array_to_constant (imode, arrhi);
1513 lo = array_to_constant (imode, arrlo);
1514 emit_move_insn (temp, hi);
dea01258 1515 emit_insn (gen_rtx_SET
98bbec1e 1516 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1517 return 1;
1518 }
5df189be 1519 case IC_FSMBI2:
1520 {
1521 unsigned char arr_fsmbi[16];
1522 unsigned char arr_andbi[16];
1523 rtx to, reg_fsmbi, reg_and;
1524 int i;
1525 enum machine_mode imode = mode;
1526 /* We need to do reals as ints because the constant used in the
1527 * AND might not be a legitimate real constant. */
1528 imode = int_mode_for_mode (mode);
1529 constant_to_array (mode, ops[1], arr_fsmbi);
1530 if (imode != mode)
1531 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1532 else
1533 to = ops[0];
1534 for (i = 0; i < 16; i++)
1535 if (arr_fsmbi[i] != 0)
1536 {
1537 arr_andbi[0] = arr_fsmbi[i];
1538 arr_fsmbi[i] = 0xff;
1539 }
1540 for (i = 1; i < 16; i++)
1541 arr_andbi[i] = arr_andbi[0];
1542 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1543 reg_and = array_to_constant (imode, arr_andbi);
1544 emit_move_insn (to, reg_fsmbi);
1545 emit_insn (gen_rtx_SET
1546 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1547 return 1;
1548 }
dea01258 1549 case IC_POOL:
1550 if (reload_in_progress || reload_completed)
1551 {
1552 rtx mem = force_const_mem (mode, ops[1]);
1553 if (TARGET_LARGE_MEM)
1554 {
1555 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1556 emit_move_insn (addr, XEXP (mem, 0));
1557 mem = replace_equiv_address (mem, addr);
1558 }
1559 emit_move_insn (ops[0], mem);
1560 return 1;
1561 }
1562 break;
1563 case IC_IL1s:
1564 case IC_IL2s:
1565 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1566 {
1567 if (c == IC_IL2s)
1568 {
5df189be 1569 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1570 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1571 }
1572 else if (flag_pic)
1573 emit_insn (gen_pic (ops[0], ops[1]));
1574 if (flag_pic)
1575 {
1576 rtx pic_reg = get_pic_reg ();
1577 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
dea01258 1578 }
1579 return flag_pic || c == IC_IL2s;
1580 }
1581 break;
1582 case IC_IL1:
1583 case IC_FSMBI:
1584 case IC_CPAT:
1585 break;
c8befdb9 1586 }
dea01258 1587 return 0;
c8befdb9 1588}
1589
644459d0 1590/* SAVING is TRUE when we are generating the actual load and store
1591 instructions for REGNO. When determining the size of the stack
1592 needed for saving register we must allocate enough space for the
1593 worst case, because we don't always have the information early enough
1594 to not allocate it. But we can at least eliminate the actual loads
1595 and stores during the prologue/epilogue. */
1596static int
1597need_to_save_reg (int regno, int saving)
1598{
3072d30e 1599 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1600 return 1;
1601 if (flag_pic
1602 && regno == PIC_OFFSET_TABLE_REGNUM
5eb28709 1603 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
644459d0 1604 return 1;
1605 return 0;
1606}
1607
1608/* This function is only correct starting with local register
1609 allocation */
1610int
1611spu_saved_regs_size (void)
1612{
1613 int reg_save_size = 0;
1614 int regno;
1615
1616 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1617 if (need_to_save_reg (regno, 0))
1618 reg_save_size += 0x10;
1619 return reg_save_size;
1620}
1621
1622static rtx
1623frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1624{
1625 rtx reg = gen_rtx_REG (V4SImode, regno);
1626 rtx mem =
1627 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1628 return emit_insn (gen_movv4si (mem, reg));
1629}
1630
1631static rtx
1632frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1633{
1634 rtx reg = gen_rtx_REG (V4SImode, regno);
1635 rtx mem =
1636 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1637 return emit_insn (gen_movv4si (reg, mem));
1638}
1639
1640/* This happens after reload, so we need to expand it. */
1641static rtx
1642frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1643{
1644 rtx insn;
1645 if (satisfies_constraint_K (GEN_INT (imm)))
1646 {
1647 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1648 }
1649 else
1650 {
3072d30e 1651 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1652 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1653 if (REGNO (src) == REGNO (scratch))
1654 abort ();
1655 }
644459d0 1656 return insn;
1657}
1658
1659/* Return nonzero if this function is known to have a null epilogue. */
1660
1661int
1662direct_return (void)
1663{
1664 if (reload_completed)
1665 {
1666 if (cfun->static_chain_decl == 0
1667 && (spu_saved_regs_size ()
1668 + get_frame_size ()
abe32cce 1669 + crtl->outgoing_args_size
1670 + crtl->args.pretend_args_size == 0)
d5bf7b64 1671 && crtl->is_leaf)
644459d0 1672 return 1;
1673 }
1674 return 0;
1675}
1676
1677/*
1678 The stack frame looks like this:
1679 +-------------+
1680 | incoming |
a8e019fa 1681 | args |
1682 AP -> +-------------+
644459d0 1683 | $lr save |
1684 +-------------+
1685 prev SP | back chain |
1686 +-------------+
1687 | var args |
abe32cce 1688 | reg save | crtl->args.pretend_args_size bytes
644459d0 1689 +-------------+
1690 | ... |
1691 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1692 FP -> +-------------+
644459d0 1693 | ... |
a8e019fa 1694 | vars | get_frame_size() bytes
1695 HFP -> +-------------+
644459d0 1696 | ... |
1697 | outgoing |
abe32cce 1698 | args | crtl->outgoing_args_size bytes
644459d0 1699 +-------------+
1700 | $lr of next |
1701 | frame |
1702 +-------------+
a8e019fa 1703 | back chain |
1704 SP -> +-------------+
644459d0 1705
1706*/
1707void
1708spu_expand_prologue (void)
1709{
1710 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1711 HOST_WIDE_INT total_size;
1712 HOST_WIDE_INT saved_regs_size;
1713 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1714 rtx scratch_reg_0, scratch_reg_1;
1715 rtx insn, real;
1716
5eb28709 1717 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1718 cfun->machine->pic_reg = pic_offset_table_rtx;
644459d0 1719
1720 if (spu_naked_function_p (current_function_decl))
1721 return;
1722
1723 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1724 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1725
1726 saved_regs_size = spu_saved_regs_size ();
1727 total_size = size + saved_regs_size
abe32cce 1728 + crtl->outgoing_args_size
1729 + crtl->args.pretend_args_size;
644459d0 1730
d5bf7b64 1731 if (!crtl->is_leaf
18d50ae6 1732 || cfun->calls_alloca || total_size > 0)
644459d0 1733 total_size += STACK_POINTER_OFFSET;
1734
1735 /* Save this first because code after this might use the link
1736 register as a scratch register. */
d5bf7b64 1737 if (!crtl->is_leaf)
644459d0 1738 {
1739 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1740 RTX_FRAME_RELATED_P (insn) = 1;
1741 }
1742
1743 if (total_size > 0)
1744 {
abe32cce 1745 offset = -crtl->args.pretend_args_size;
644459d0 1746 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1747 if (need_to_save_reg (regno, 1))
1748 {
1749 offset -= 16;
1750 insn = frame_emit_store (regno, sp_reg, offset);
1751 RTX_FRAME_RELATED_P (insn) = 1;
1752 }
1753 }
1754
5eb28709 1755 if (flag_pic && cfun->machine->pic_reg)
644459d0 1756 {
5eb28709 1757 rtx pic_reg = cfun->machine->pic_reg;
644459d0 1758 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1759 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1760 }
1761
1762 if (total_size > 0)
1763 {
1764 if (flag_stack_check)
1765 {
d819917f 1766 /* We compare against total_size-1 because
644459d0 1767 ($sp >= total_size) <=> ($sp > total_size-1) */
1768 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1769 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1770 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1771 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1772 {
1773 emit_move_insn (scratch_v4si, size_v4si);
1774 size_v4si = scratch_v4si;
1775 }
1776 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1777 emit_insn (gen_vec_extractv4si
1778 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1779 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1780 }
1781
1782 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1783 the value of the previous $sp because we save it as the back
1784 chain. */
1785 if (total_size <= 2000)
1786 {
1787 /* In this case we save the back chain first. */
1788 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1789 insn =
1790 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1791 }
644459d0 1792 else
1793 {
1794 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1795 insn =
1796 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1797 }
1798 RTX_FRAME_RELATED_P (insn) = 1;
1799 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1800 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1801
1802 if (total_size > 2000)
1803 {
1804 /* Save the back chain ptr */
1805 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1806 }
1807
1808 if (frame_pointer_needed)
1809 {
1810 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1811 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1812 + crtl->outgoing_args_size;
644459d0 1813 /* Set the new frame_pointer */
d8dfeb55 1814 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1815 RTX_FRAME_RELATED_P (insn) = 1;
1816 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1817 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1818 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1819 }
1820 }
1821
8c0dd614 1822 if (flag_stack_usage_info)
a512540d 1823 current_function_static_stack_size = total_size;
644459d0 1824}
1825
1826void
1827spu_expand_epilogue (bool sibcall_p)
1828{
1829 int size = get_frame_size (), offset, regno;
1830 HOST_WIDE_INT saved_regs_size, total_size;
1831 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 1832 rtx scratch_reg_0;
644459d0 1833
644459d0 1834 if (spu_naked_function_p (current_function_decl))
1835 return;
1836
1837 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1838
1839 saved_regs_size = spu_saved_regs_size ();
1840 total_size = size + saved_regs_size
abe32cce 1841 + crtl->outgoing_args_size
1842 + crtl->args.pretend_args_size;
644459d0 1843
d5bf7b64 1844 if (!crtl->is_leaf
18d50ae6 1845 || cfun->calls_alloca || total_size > 0)
644459d0 1846 total_size += STACK_POINTER_OFFSET;
1847
1848 if (total_size > 0)
1849 {
18d50ae6 1850 if (cfun->calls_alloca)
644459d0 1851 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1852 else
1853 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1854
1855
1856 if (saved_regs_size > 0)
1857 {
abe32cce 1858 offset = -crtl->args.pretend_args_size;
644459d0 1859 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1860 if (need_to_save_reg (regno, 1))
1861 {
1862 offset -= 0x10;
1863 frame_emit_load (regno, sp_reg, offset);
1864 }
1865 }
1866 }
1867
d5bf7b64 1868 if (!crtl->is_leaf)
644459d0 1869 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1870
1871 if (!sibcall_p)
1872 {
18b42941 1873 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 1874 emit_jump_insn (gen__return ());
644459d0 1875 }
644459d0 1876}
1877
1878rtx
1879spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1880{
1881 if (count != 0)
1882 return 0;
1883 /* This is inefficient because it ends up copying to a save-register
1884 which then gets saved even though $lr has already been saved. But
1885 it does generate better code for leaf functions and we don't need
1886 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1887 used for __builtin_return_address anyway, so maybe we don't care if
1888 it's inefficient. */
1889 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1890}
1891\f
1892
1893/* Given VAL, generate a constant appropriate for MODE.
1894 If MODE is a vector mode, every element will be VAL.
1895 For TImode, VAL will be zero extended to 128 bits. */
1896rtx
1897spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1898{
1899 rtx inner;
1900 rtvec v;
1901 int units, i;
1902
1903 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1904 || GET_MODE_CLASS (mode) == MODE_FLOAT
1905 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1906 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1907
1908 if (GET_MODE_CLASS (mode) == MODE_INT)
1909 return immed_double_const (val, 0, mode);
1910
1911 /* val is the bit representation of the float */
1912 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1913 return hwint_to_const_double (mode, val);
1914
1915 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1916 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1917 else
1918 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1919
1920 units = GET_MODE_NUNITS (mode);
1921
1922 v = rtvec_alloc (units);
1923
1924 for (i = 0; i < units; ++i)
1925 RTVEC_ELT (v, i) = inner;
1926
1927 return gen_rtx_CONST_VECTOR (mode, v);
1928}
644459d0 1929
5474166e 1930/* Create a MODE vector constant from 4 ints. */
1931rtx
1932spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1933{
1934 unsigned char arr[16];
1935 arr[0] = (a >> 24) & 0xff;
1936 arr[1] = (a >> 16) & 0xff;
1937 arr[2] = (a >> 8) & 0xff;
1938 arr[3] = (a >> 0) & 0xff;
1939 arr[4] = (b >> 24) & 0xff;
1940 arr[5] = (b >> 16) & 0xff;
1941 arr[6] = (b >> 8) & 0xff;
1942 arr[7] = (b >> 0) & 0xff;
1943 arr[8] = (c >> 24) & 0xff;
1944 arr[9] = (c >> 16) & 0xff;
1945 arr[10] = (c >> 8) & 0xff;
1946 arr[11] = (c >> 0) & 0xff;
1947 arr[12] = (d >> 24) & 0xff;
1948 arr[13] = (d >> 16) & 0xff;
1949 arr[14] = (d >> 8) & 0xff;
1950 arr[15] = (d >> 0) & 0xff;
1951 return array_to_constant(mode, arr);
1952}
5a976006 1953\f
1954/* branch hint stuff */
5474166e 1955
644459d0 1956/* An array of these is used to propagate hints to predecessor blocks. */
1957struct spu_bb_info
1958{
5a976006 1959 rtx prop_jump; /* propagated from another block */
1960 int bb_index; /* the original block. */
644459d0 1961};
5a976006 1962static struct spu_bb_info *spu_bb_info;
644459d0 1963
5a976006 1964#define STOP_HINT_P(INSN) \
1965 (GET_CODE(INSN) == CALL_INSN \
1966 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1967 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1968
1969/* 1 when RTX is a hinted branch or its target. We keep track of
1970 what has been hinted so the safe-hint code can test it easily. */
1971#define HINTED_P(RTX) \
1972 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1973
1974/* 1 when RTX is an insn that must be scheduled on an even boundary. */
1975#define SCHED_ON_EVEN_P(RTX) \
1976 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1977
1978/* Emit a nop for INSN such that the two will dual issue. This assumes
1979 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1980 We check for TImode to handle a MULTI1 insn which has dual issued its
1981 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
1982 ADDR_VEC insns. */
1983static void
1984emit_nop_for_insn (rtx insn)
644459d0 1985{
5a976006 1986 int p;
1987 rtx new_insn;
1988 p = get_pipe (insn);
1989 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1990 new_insn = emit_insn_after (gen_lnop (), insn);
1991 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 1992 {
5a976006 1993 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
1994 PUT_MODE (new_insn, TImode);
1995 PUT_MODE (insn, VOIDmode);
1996 }
1997 else
1998 new_insn = emit_insn_after (gen_lnop (), insn);
1999 recog_memoized (new_insn);
d53c050c 2000 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
5a976006 2001}
2002
2003/* Insert nops in basic blocks to meet dual issue alignment
2004 requirements. Also make sure hbrp and hint instructions are at least
2005 one cycle apart, possibly inserting a nop. */
2006static void
2007pad_bb(void)
2008{
2009 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2010 int length;
2011 int addr;
2012
2013 /* This sets up INSN_ADDRESSES. */
2014 shorten_branches (get_insns ());
2015
2016 /* Keep track of length added by nops. */
2017 length = 0;
2018
2019 prev_insn = 0;
2020 insn = get_insns ();
2021 if (!active_insn_p (insn))
2022 insn = next_active_insn (insn);
2023 for (; insn; insn = next_insn)
2024 {
2025 next_insn = next_active_insn (insn);
2026 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2027 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2028 {
5a976006 2029 if (hbr_insn)
2030 {
2031 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2032 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2033 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2034 || (a1 - a0 == 4))
2035 {
2036 prev_insn = emit_insn_before (gen_lnop (), insn);
2037 PUT_MODE (prev_insn, GET_MODE (insn));
2038 PUT_MODE (insn, TImode);
d53c050c 2039 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
5a976006 2040 length += 4;
2041 }
2042 }
2043 hbr_insn = insn;
2044 }
2045 if (INSN_CODE (insn) == CODE_FOR_blockage)
2046 {
2047 if (GET_MODE (insn) == TImode)
2048 PUT_MODE (next_insn, TImode);
2049 insn = next_insn;
2050 next_insn = next_active_insn (insn);
2051 }
2052 addr = INSN_ADDRESSES (INSN_UID (insn));
2053 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2054 {
2055 if (((addr + length) & 7) != 0)
2056 {
2057 emit_nop_for_insn (prev_insn);
2058 length += 4;
2059 }
644459d0 2060 }
5a976006 2061 else if (GET_MODE (insn) == TImode
2062 && ((next_insn && GET_MODE (next_insn) != TImode)
2063 || get_attr_type (insn) == TYPE_MULTI0)
2064 && ((addr + length) & 7) != 0)
2065 {
2066 /* prev_insn will always be set because the first insn is
2067 always 8-byte aligned. */
2068 emit_nop_for_insn (prev_insn);
2069 length += 4;
2070 }
2071 prev_insn = insn;
644459d0 2072 }
644459d0 2073}
2074
5a976006 2075\f
2076/* Routines for branch hints. */
2077
644459d0 2078static void
5a976006 2079spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2080 int distance, sbitmap blocks)
644459d0 2081{
5a976006 2082 rtx branch_label = 0;
2083 rtx hint;
2084 rtx insn;
2085 rtx table;
644459d0 2086
2087 if (before == 0 || branch == 0 || target == 0)
2088 return;
2089
5a976006 2090 /* While scheduling we require hints to be no further than 600, so
2091 we need to enforce that here too */
644459d0 2092 if (distance > 600)
2093 return;
2094
5a976006 2095 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2096 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2097 before = NEXT_INSN (before);
644459d0 2098
2099 branch_label = gen_label_rtx ();
2100 LABEL_NUSES (branch_label)++;
2101 LABEL_PRESERVE_P (branch_label) = 1;
2102 insn = emit_label_before (branch_label, branch);
2103 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
08b7917c 2104 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
5a976006 2105
2106 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2107 recog_memoized (hint);
d53c050c 2108 INSN_LOCATION (hint) = INSN_LOCATION (branch);
5a976006 2109 HINTED_P (branch) = 1;
644459d0 2110
5a976006 2111 if (GET_CODE (target) == LABEL_REF)
2112 HINTED_P (XEXP (target, 0)) = 1;
2113 else if (tablejump_p (branch, 0, &table))
644459d0 2114 {
5a976006 2115 rtvec vec;
2116 int j;
2117 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2118 vec = XVEC (PATTERN (table), 0);
2119 else
2120 vec = XVEC (PATTERN (table), 1);
2121 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2122 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2123 }
5a976006 2124
2125 if (distance >= 588)
644459d0 2126 {
5a976006 2127 /* Make sure the hint isn't scheduled any earlier than this point,
2128 which could make it too far for the branch offest to fit */
2fbdf9ef 2129 insn = emit_insn_before (gen_blockage (), hint);
2130 recog_memoized (insn);
d53c050c 2131 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2132 }
2133 else if (distance <= 8 * 4)
2134 {
2135 /* To guarantee at least 8 insns between the hint and branch we
2136 insert nops. */
2137 int d;
2138 for (d = distance; d < 8 * 4; d += 4)
2139 {
2140 insn =
2141 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2142 recog_memoized (insn);
d53c050c 2143 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2144 }
2145
2146 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2147 insn = emit_insn_after (gen_blockage (), hint);
2148 recog_memoized (insn);
d53c050c 2149 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2150
2151 /* Make sure any nops inserted aren't scheduled after the call. */
2152 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2153 {
2154 insn = emit_insn_before (gen_blockage (), branch);
2155 recog_memoized (insn);
d53c050c 2156 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2fbdf9ef 2157 }
644459d0 2158 }
644459d0 2159}
2160
2161/* Returns 0 if we don't want a hint for this branch. Otherwise return
2162 the rtx for the branch target. */
2163static rtx
2164get_branch_target (rtx branch)
2165{
2166 if (GET_CODE (branch) == JUMP_INSN)
2167 {
2168 rtx set, src;
2169
2170 /* Return statements */
2171 if (GET_CODE (PATTERN (branch)) == RETURN)
2172 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2173
2174 /* jump table */
2175 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2176 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2177 return 0;
2178
fcc31b99 2179 /* ASM GOTOs. */
604157f6 2180 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2181 return NULL;
2182
644459d0 2183 set = single_set (branch);
2184 src = SET_SRC (set);
2185 if (GET_CODE (SET_DEST (set)) != PC)
2186 abort ();
2187
2188 if (GET_CODE (src) == IF_THEN_ELSE)
2189 {
2190 rtx lab = 0;
2191 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2192 if (note)
2193 {
2194 /* If the more probable case is not a fall through, then
2195 try a branch hint. */
2196 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2197 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2198 && GET_CODE (XEXP (src, 1)) != PC)
2199 lab = XEXP (src, 1);
2200 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2201 && GET_CODE (XEXP (src, 2)) != PC)
2202 lab = XEXP (src, 2);
2203 }
2204 if (lab)
2205 {
2206 if (GET_CODE (lab) == RETURN)
2207 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2208 return lab;
2209 }
2210 return 0;
2211 }
2212
2213 return src;
2214 }
2215 else if (GET_CODE (branch) == CALL_INSN)
2216 {
2217 rtx call;
2218 /* All of our call patterns are in a PARALLEL and the CALL is
2219 the first pattern in the PARALLEL. */
2220 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2221 abort ();
2222 call = XVECEXP (PATTERN (branch), 0, 0);
2223 if (GET_CODE (call) == SET)
2224 call = SET_SRC (call);
2225 if (GET_CODE (call) != CALL)
2226 abort ();
2227 return XEXP (XEXP (call, 0), 0);
2228 }
2229 return 0;
2230}
2231
5a976006 2232/* The special $hbr register is used to prevent the insn scheduler from
2233 moving hbr insns across instructions which invalidate them. It
2234 should only be used in a clobber, and this function searches for
2235 insns which clobber it. */
2236static bool
2237insn_clobbers_hbr (rtx insn)
2238{
2239 if (INSN_P (insn)
2240 && GET_CODE (PATTERN (insn)) == PARALLEL)
2241 {
2242 rtx parallel = PATTERN (insn);
2243 rtx clobber;
2244 int j;
2245 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2246 {
2247 clobber = XVECEXP (parallel, 0, j);
2248 if (GET_CODE (clobber) == CLOBBER
2249 && GET_CODE (XEXP (clobber, 0)) == REG
2250 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2251 return 1;
2252 }
2253 }
2254 return 0;
2255}
2256
2257/* Search up to 32 insns starting at FIRST:
2258 - at any kind of hinted branch, just return
2259 - at any unconditional branch in the first 15 insns, just return
2260 - at a call or indirect branch, after the first 15 insns, force it to
2261 an even address and return
2262 - at any unconditional branch, after the first 15 insns, force it to
2263 an even address.
2264 At then end of the search, insert an hbrp within 4 insns of FIRST,
2265 and an hbrp within 16 instructions of FIRST.
2266 */
644459d0 2267static void
5a976006 2268insert_hbrp_for_ilb_runout (rtx first)
644459d0 2269{
5a976006 2270 rtx insn, before_4 = 0, before_16 = 0;
2271 int addr = 0, length, first_addr = -1;
2272 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2273 int insert_lnop_after = 0;
2274 for (insn = first; insn; insn = NEXT_INSN (insn))
2275 if (INSN_P (insn))
2276 {
2277 if (first_addr == -1)
2278 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2279 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2280 length = get_attr_length (insn);
2281
2282 if (before_4 == 0 && addr + length >= 4 * 4)
2283 before_4 = insn;
2284 /* We test for 14 instructions because the first hbrp will add
2285 up to 2 instructions. */
2286 if (before_16 == 0 && addr + length >= 14 * 4)
2287 before_16 = insn;
2288
2289 if (INSN_CODE (insn) == CODE_FOR_hbr)
2290 {
2291 /* Make sure an hbrp is at least 2 cycles away from a hint.
2292 Insert an lnop after the hbrp when necessary. */
2293 if (before_4 == 0 && addr > 0)
2294 {
2295 before_4 = insn;
2296 insert_lnop_after |= 1;
2297 }
2298 else if (before_4 && addr <= 4 * 4)
2299 insert_lnop_after |= 1;
2300 if (before_16 == 0 && addr > 10 * 4)
2301 {
2302 before_16 = insn;
2303 insert_lnop_after |= 2;
2304 }
2305 else if (before_16 && addr <= 14 * 4)
2306 insert_lnop_after |= 2;
2307 }
644459d0 2308
5a976006 2309 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2310 {
2311 if (addr < hbrp_addr0)
2312 hbrp_addr0 = addr;
2313 else if (addr < hbrp_addr1)
2314 hbrp_addr1 = addr;
2315 }
644459d0 2316
5a976006 2317 if (CALL_P (insn) || JUMP_P (insn))
2318 {
2319 if (HINTED_P (insn))
2320 return;
2321
2322 /* Any branch after the first 15 insns should be on an even
2323 address to avoid a special case branch. There might be
2324 some nops and/or hbrps inserted, so we test after 10
2325 insns. */
2326 if (addr > 10 * 4)
2327 SCHED_ON_EVEN_P (insn) = 1;
2328 }
644459d0 2329
5a976006 2330 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2331 return;
2332
2333
2334 if (addr + length >= 32 * 4)
644459d0 2335 {
5a976006 2336 gcc_assert (before_4 && before_16);
2337 if (hbrp_addr0 > 4 * 4)
644459d0 2338 {
5a976006 2339 insn =
2340 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2341 recog_memoized (insn);
d53c050c 2342 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2343 INSN_ADDRESSES_NEW (insn,
2344 INSN_ADDRESSES (INSN_UID (before_4)));
2345 PUT_MODE (insn, GET_MODE (before_4));
2346 PUT_MODE (before_4, TImode);
2347 if (insert_lnop_after & 1)
644459d0 2348 {
5a976006 2349 insn = emit_insn_before (gen_lnop (), before_4);
2350 recog_memoized (insn);
d53c050c 2351 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2352 INSN_ADDRESSES_NEW (insn,
2353 INSN_ADDRESSES (INSN_UID (before_4)));
2354 PUT_MODE (insn, TImode);
644459d0 2355 }
644459d0 2356 }
5a976006 2357 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2358 && hbrp_addr1 > 16 * 4)
644459d0 2359 {
5a976006 2360 insn =
2361 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2362 recog_memoized (insn);
d53c050c 2363 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2364 INSN_ADDRESSES_NEW (insn,
2365 INSN_ADDRESSES (INSN_UID (before_16)));
2366 PUT_MODE (insn, GET_MODE (before_16));
2367 PUT_MODE (before_16, TImode);
2368 if (insert_lnop_after & 2)
644459d0 2369 {
5a976006 2370 insn = emit_insn_before (gen_lnop (), before_16);
2371 recog_memoized (insn);
d53c050c 2372 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2373 INSN_ADDRESSES_NEW (insn,
2374 INSN_ADDRESSES (INSN_UID
2375 (before_16)));
2376 PUT_MODE (insn, TImode);
644459d0 2377 }
2378 }
5a976006 2379 return;
644459d0 2380 }
644459d0 2381 }
5a976006 2382 else if (BARRIER_P (insn))
2383 return;
644459d0 2384
644459d0 2385}
5a976006 2386
2387/* The SPU might hang when it executes 48 inline instructions after a
2388 hinted branch jumps to its hinted target. The beginning of a
851d9296 2389 function and the return from a call might have been hinted, and
2390 must be handled as well. To prevent a hang we insert 2 hbrps. The
2391 first should be within 6 insns of the branch target. The second
2392 should be within 22 insns of the branch target. When determining
2393 if hbrps are necessary, we look for only 32 inline instructions,
2394 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2395 when inserting new hbrps, we insert them within 4 and 16 insns of
2396 the target. */
644459d0 2397static void
5a976006 2398insert_hbrp (void)
644459d0 2399{
5a976006 2400 rtx insn;
2401 if (TARGET_SAFE_HINTS)
644459d0 2402 {
5a976006 2403 shorten_branches (get_insns ());
2404 /* Insert hbrp at beginning of function */
2405 insn = next_active_insn (get_insns ());
2406 if (insn)
2407 insert_hbrp_for_ilb_runout (insn);
2408 /* Insert hbrp after hinted targets. */
2409 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2410 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2411 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2412 }
644459d0 2413}
2414
5a976006 2415static int in_spu_reorg;
2416
8a42230a 2417static void
2418spu_var_tracking (void)
2419{
2420 if (flag_var_tracking)
2421 {
2422 df_analyze ();
2423 timevar_push (TV_VAR_TRACKING);
2424 variable_tracking_main ();
2425 timevar_pop (TV_VAR_TRACKING);
2426 df_finish_pass (false);
2427 }
2428}
2429
5a976006 2430/* Insert branch hints. There are no branch optimizations after this
2431 pass, so it's safe to set our branch hints now. */
644459d0 2432static void
5a976006 2433spu_machine_dependent_reorg (void)
644459d0 2434{
5a976006 2435 sbitmap blocks;
2436 basic_block bb;
2437 rtx branch, insn;
2438 rtx branch_target = 0;
2439 int branch_addr = 0, insn_addr, required_dist = 0;
2440 int i;
2441 unsigned int j;
644459d0 2442
5a976006 2443 if (!TARGET_BRANCH_HINTS || optimize == 0)
2444 {
2445 /* We still do it for unoptimized code because an external
2446 function might have hinted a call or return. */
a54ca889 2447 compute_bb_for_insn ();
5a976006 2448 insert_hbrp ();
2449 pad_bb ();
8a42230a 2450 spu_var_tracking ();
a54ca889 2451 free_bb_for_insn ();
5a976006 2452 return;
2453 }
644459d0 2454
5a976006 2455 blocks = sbitmap_alloc (last_basic_block);
53c5d9d4 2456 bitmap_clear (blocks);
644459d0 2457
5a976006 2458 in_spu_reorg = 1;
2459 compute_bb_for_insn ();
2460
a7a0184d 2461 /* (Re-)discover loops so that bb->loop_father can be used
2462 in the analysis below. */
2463 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2464
5a976006 2465 compact_blocks ();
2466
2467 spu_bb_info =
2468 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2469 sizeof (struct spu_bb_info));
2470
2471 /* We need exact insn addresses and lengths. */
2472 shorten_branches (get_insns ());
2473
2474 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2475 {
5a976006 2476 bb = BASIC_BLOCK (i);
2477 branch = 0;
2478 if (spu_bb_info[i].prop_jump)
644459d0 2479 {
5a976006 2480 branch = spu_bb_info[i].prop_jump;
2481 branch_target = get_branch_target (branch);
2482 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2483 required_dist = spu_hint_dist;
2484 }
2485 /* Search from end of a block to beginning. In this loop, find
2486 jumps which need a branch and emit them only when:
2487 - it's an indirect branch and we're at the insn which sets
2488 the register
2489 - we're at an insn that will invalidate the hint. e.g., a
2490 call, another hint insn, inline asm that clobbers $hbr, and
2491 some inlined operations (divmodsi4). Don't consider jumps
2492 because they are only at the end of a block and are
2493 considered when we are deciding whether to propagate
2494 - we're getting too far away from the branch. The hbr insns
2495 only have a signed 10 bit offset
2496 We go back as far as possible so the branch will be considered
2497 for propagation when we get to the beginning of the block. */
2498 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2499 {
2500 if (INSN_P (insn))
2501 {
2502 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2503 if (branch
2504 && ((GET_CODE (branch_target) == REG
2505 && set_of (branch_target, insn) != NULL_RTX)
2506 || insn_clobbers_hbr (insn)
2507 || branch_addr - insn_addr > 600))
2508 {
2509 rtx next = NEXT_INSN (insn);
2510 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2511 if (insn != BB_END (bb)
2512 && branch_addr - next_addr >= required_dist)
2513 {
2514 if (dump_file)
2515 fprintf (dump_file,
2516 "hint for %i in block %i before %i\n",
2517 INSN_UID (branch), bb->index,
2518 INSN_UID (next));
2519 spu_emit_branch_hint (next, branch, branch_target,
2520 branch_addr - next_addr, blocks);
2521 }
2522 branch = 0;
2523 }
2524
2525 /* JUMP_P will only be true at the end of a block. When
2526 branch is already set it means we've previously decided
2527 to propagate a hint for that branch into this block. */
2528 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2529 {
2530 branch = 0;
2531 if ((branch_target = get_branch_target (insn)))
2532 {
2533 branch = insn;
2534 branch_addr = insn_addr;
2535 required_dist = spu_hint_dist;
2536 }
2537 }
2538 }
2539 if (insn == BB_HEAD (bb))
2540 break;
2541 }
2542
2543 if (branch)
2544 {
2545 /* If we haven't emitted a hint for this branch yet, it might
2546 be profitable to emit it in one of the predecessor blocks,
2547 especially for loops. */
2548 rtx bbend;
2549 basic_block prev = 0, prop = 0, prev2 = 0;
2550 int loop_exit = 0, simple_loop = 0;
2551 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2552
2553 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2554 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2555 prev = EDGE_PRED (bb, j)->src;
2556 else
2557 prev2 = EDGE_PRED (bb, j)->src;
2558
2559 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2560 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2561 loop_exit = 1;
2562 else if (EDGE_SUCC (bb, j)->dest == bb)
2563 simple_loop = 1;
2564
2565 /* If this branch is a loop exit then propagate to previous
2566 fallthru block. This catches the cases when it is a simple
2567 loop or when there is an initial branch into the loop. */
2568 if (prev && (loop_exit || simple_loop)
a7a0184d 2569 && bb_loop_depth (prev) <= bb_loop_depth (bb))
5a976006 2570 prop = prev;
2571
2572 /* If there is only one adjacent predecessor. Don't propagate
a7a0184d 2573 outside this loop. */
5a976006 2574 else if (prev && single_pred_p (bb)
a7a0184d 2575 && prev->loop_father == bb->loop_father)
5a976006 2576 prop = prev;
2577
2578 /* If this is the JOIN block of a simple IF-THEN then
9d75589a 2579 propagate the hint to the HEADER block. */
5a976006 2580 else if (prev && prev2
2581 && EDGE_COUNT (bb->preds) == 2
2582 && EDGE_COUNT (prev->preds) == 1
2583 && EDGE_PRED (prev, 0)->src == prev2
a7a0184d 2584 && prev2->loop_father == bb->loop_father
5a976006 2585 && GET_CODE (branch_target) != REG)
2586 prop = prev;
2587
2588 /* Don't propagate when:
2589 - this is a simple loop and the hint would be too far
2590 - this is not a simple loop and there are 16 insns in
2591 this block already
2592 - the predecessor block ends in a branch that will be
2593 hinted
2594 - the predecessor block ends in an insn that invalidates
2595 the hint */
2596 if (prop
2597 && prop->index >= 0
2598 && (bbend = BB_END (prop))
2599 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2600 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2601 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2602 {
2603 if (dump_file)
2604 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2605 "for %i (loop_exit %i simple_loop %i dist %i)\n",
a7a0184d 2606 bb->index, prop->index, bb_loop_depth (bb),
5a976006 2607 INSN_UID (branch), loop_exit, simple_loop,
2608 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2609
2610 spu_bb_info[prop->index].prop_jump = branch;
2611 spu_bb_info[prop->index].bb_index = i;
2612 }
2613 else if (branch_addr - next_addr >= required_dist)
2614 {
2615 if (dump_file)
2616 fprintf (dump_file, "hint for %i in block %i before %i\n",
2617 INSN_UID (branch), bb->index,
2618 INSN_UID (NEXT_INSN (insn)));
2619 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2620 branch_addr - next_addr, blocks);
2621 }
2622 branch = 0;
644459d0 2623 }
644459d0 2624 }
5a976006 2625 free (spu_bb_info);
644459d0 2626
53c5d9d4 2627 if (!bitmap_empty_p (blocks))
5a976006 2628 find_many_sub_basic_blocks (blocks);
2629
2630 /* We have to schedule to make sure alignment is ok. */
2631 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2632
2633 /* The hints need to be scheduled, so call it again. */
2634 schedule_insns ();
2fbdf9ef 2635 df_finish_pass (true);
5a976006 2636
2637 insert_hbrp ();
2638
2639 pad_bb ();
2640
8f1d58ad 2641 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2642 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2643 {
2644 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2645 between its branch label and the branch . We don't move the
2646 label because GCC expects it at the beginning of the block. */
2647 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2648 rtx label_ref = XVECEXP (unspec, 0, 0);
2649 rtx label = XEXP (label_ref, 0);
2650 rtx branch;
2651 int offset = 0;
2652 for (branch = NEXT_INSN (label);
2653 !JUMP_P (branch) && !CALL_P (branch);
2654 branch = NEXT_INSN (branch))
2655 if (NONJUMP_INSN_P (branch))
2656 offset += get_attr_length (branch);
2657 if (offset > 0)
29c05e22 2658 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
8f1d58ad 2659 }
5a976006 2660
8a42230a 2661 spu_var_tracking ();
5a976006 2662
a7a0184d 2663 loop_optimizer_finalize ();
2664
5a976006 2665 free_bb_for_insn ();
2666
2667 in_spu_reorg = 0;
644459d0 2668}
2669\f
2670
2671/* Insn scheduling routines, primarily for dual issue. */
2672static int
2673spu_sched_issue_rate (void)
2674{
2675 return 2;
2676}
2677
2678static int
5a976006 2679uses_ls_unit(rtx insn)
644459d0 2680{
5a976006 2681 rtx set = single_set (insn);
2682 if (set != 0
2683 && (GET_CODE (SET_DEST (set)) == MEM
2684 || GET_CODE (SET_SRC (set)) == MEM))
2685 return 1;
2686 return 0;
644459d0 2687}
2688
2689static int
2690get_pipe (rtx insn)
2691{
2692 enum attr_type t;
2693 /* Handle inline asm */
2694 if (INSN_CODE (insn) == -1)
2695 return -1;
2696 t = get_attr_type (insn);
2697 switch (t)
2698 {
2699 case TYPE_CONVERT:
2700 return -2;
2701 case TYPE_MULTI0:
2702 return -1;
2703
2704 case TYPE_FX2:
2705 case TYPE_FX3:
2706 case TYPE_SPR:
2707 case TYPE_NOP:
2708 case TYPE_FXB:
2709 case TYPE_FPD:
2710 case TYPE_FP6:
2711 case TYPE_FP7:
644459d0 2712 return 0;
2713
2714 case TYPE_LNOP:
2715 case TYPE_SHUF:
2716 case TYPE_LOAD:
2717 case TYPE_STORE:
2718 case TYPE_BR:
2719 case TYPE_MULTI1:
2720 case TYPE_HBR:
5a976006 2721 case TYPE_IPREFETCH:
644459d0 2722 return 1;
2723 default:
2724 abort ();
2725 }
2726}
2727
5a976006 2728
2729/* haifa-sched.c has a static variable that keeps track of the current
2730 cycle. It is passed to spu_sched_reorder, and we record it here for
2731 use by spu_sched_variable_issue. It won't be accurate if the
2732 scheduler updates it's clock_var between the two calls. */
2733static int clock_var;
2734
2735/* This is used to keep track of insn alignment. Set to 0 at the
2736 beginning of each block and increased by the "length" attr of each
2737 insn scheduled. */
2738static int spu_sched_length;
2739
2740/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2741 ready list appropriately in spu_sched_reorder(). */
2742static int pipe0_clock;
2743static int pipe1_clock;
2744
2745static int prev_clock_var;
2746
2747static int prev_priority;
2748
2749/* The SPU needs to load the next ilb sometime during the execution of
2750 the previous ilb. There is a potential conflict if every cycle has a
2751 load or store. To avoid the conflict we make sure the load/store
2752 unit is free for at least one cycle during the execution of insns in
2753 the previous ilb. */
2754static int spu_ls_first;
2755static int prev_ls_clock;
2756
2757static void
2758spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2759 int max_ready ATTRIBUTE_UNUSED)
2760{
2761 spu_sched_length = 0;
2762}
2763
2764static void
2765spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2766 int max_ready ATTRIBUTE_UNUSED)
2767{
2768 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2769 {
2770 /* When any block might be at least 8-byte aligned, assume they
2771 will all be at least 8-byte aligned to make sure dual issue
2772 works out correctly. */
2773 spu_sched_length = 0;
2774 }
2775 spu_ls_first = INT_MAX;
2776 clock_var = -1;
2777 prev_ls_clock = -1;
2778 pipe0_clock = -1;
2779 pipe1_clock = -1;
2780 prev_clock_var = -1;
2781 prev_priority = -1;
2782}
2783
644459d0 2784static int
5a976006 2785spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2786 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2787{
5a976006 2788 int len;
2789 int p;
644459d0 2790 if (GET_CODE (PATTERN (insn)) == USE
2791 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2792 || (len = get_attr_length (insn)) == 0)
2793 return more;
2794
2795 spu_sched_length += len;
2796
2797 /* Reset on inline asm */
2798 if (INSN_CODE (insn) == -1)
2799 {
2800 spu_ls_first = INT_MAX;
2801 pipe0_clock = -1;
2802 pipe1_clock = -1;
2803 return 0;
2804 }
2805 p = get_pipe (insn);
2806 if (p == 0)
2807 pipe0_clock = clock_var;
2808 else
2809 pipe1_clock = clock_var;
2810
2811 if (in_spu_reorg)
2812 {
2813 if (clock_var - prev_ls_clock > 1
2814 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2815 spu_ls_first = INT_MAX;
2816 if (uses_ls_unit (insn))
2817 {
2818 if (spu_ls_first == INT_MAX)
2819 spu_ls_first = spu_sched_length;
2820 prev_ls_clock = clock_var;
2821 }
2822
2823 /* The scheduler hasn't inserted the nop, but we will later on.
2824 Include those nops in spu_sched_length. */
2825 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2826 spu_sched_length += 4;
2827 prev_clock_var = clock_var;
2828
2829 /* more is -1 when called from spu_sched_reorder for new insns
2830 that don't have INSN_PRIORITY */
2831 if (more >= 0)
2832 prev_priority = INSN_PRIORITY (insn);
2833 }
2834
9d75589a 2835 /* Always try issuing more insns. spu_sched_reorder will decide
5a976006 2836 when the cycle should be advanced. */
2837 return 1;
2838}
2839
2840/* This function is called for both TARGET_SCHED_REORDER and
2841 TARGET_SCHED_REORDER2. */
2842static int
2843spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2844 rtx *ready, int *nreadyp, int clock)
2845{
2846 int i, nready = *nreadyp;
2847 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2848 rtx insn;
2849
2850 clock_var = clock;
2851
2852 if (nready <= 0 || pipe1_clock >= clock)
2853 return 0;
2854
2855 /* Find any rtl insns that don't generate assembly insns and schedule
2856 them first. */
2857 for (i = nready - 1; i >= 0; i--)
2858 {
2859 insn = ready[i];
2860 if (INSN_CODE (insn) == -1
2861 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 2862 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 2863 {
2864 ready[i] = ready[nready - 1];
2865 ready[nready - 1] = insn;
2866 return 1;
2867 }
2868 }
2869
2870 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2871 for (i = 0; i < nready; i++)
2872 if (INSN_CODE (ready[i]) != -1)
2873 {
2874 insn = ready[i];
2875 switch (get_attr_type (insn))
2876 {
2877 default:
2878 case TYPE_MULTI0:
2879 case TYPE_CONVERT:
2880 case TYPE_FX2:
2881 case TYPE_FX3:
2882 case TYPE_SPR:
2883 case TYPE_NOP:
2884 case TYPE_FXB:
2885 case TYPE_FPD:
2886 case TYPE_FP6:
2887 case TYPE_FP7:
2888 pipe_0 = i;
2889 break;
2890 case TYPE_LOAD:
2891 case TYPE_STORE:
2892 pipe_ls = i;
2893 case TYPE_LNOP:
2894 case TYPE_SHUF:
2895 case TYPE_BR:
2896 case TYPE_MULTI1:
2897 case TYPE_HBR:
2898 pipe_1 = i;
2899 break;
2900 case TYPE_IPREFETCH:
2901 pipe_hbrp = i;
2902 break;
2903 }
2904 }
2905
2906 /* In the first scheduling phase, schedule loads and stores together
2907 to increase the chance they will get merged during postreload CSE. */
2908 if (!reload_completed && pipe_ls >= 0)
2909 {
2910 insn = ready[pipe_ls];
2911 ready[pipe_ls] = ready[nready - 1];
2912 ready[nready - 1] = insn;
2913 return 1;
2914 }
2915
2916 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2917 if (pipe_hbrp >= 0)
2918 pipe_1 = pipe_hbrp;
2919
2920 /* When we have loads/stores in every cycle of the last 15 insns and
2921 we are about to schedule another load/store, emit an hbrp insn
2922 instead. */
2923 if (in_spu_reorg
2924 && spu_sched_length - spu_ls_first >= 4 * 15
2925 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2926 {
2927 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2928 recog_memoized (insn);
2929 if (pipe0_clock < clock)
2930 PUT_MODE (insn, TImode);
2931 spu_sched_variable_issue (file, verbose, insn, -1);
2932 return 0;
2933 }
2934
2935 /* In general, we want to emit nops to increase dual issue, but dual
2936 issue isn't faster when one of the insns could be scheduled later
2937 without effecting the critical path. We look at INSN_PRIORITY to
2938 make a good guess, but it isn't perfect so -mdual-nops=n can be
2939 used to effect it. */
2940 if (in_spu_reorg && spu_dual_nops < 10)
2941 {
9d75589a 2942 /* When we are at an even address and we are not issuing nops to
5a976006 2943 improve scheduling then we need to advance the cycle. */
2944 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2945 && (spu_dual_nops == 0
2946 || (pipe_1 != -1
2947 && prev_priority >
2948 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2949 return 0;
2950
2951 /* When at an odd address, schedule the highest priority insn
2952 without considering pipeline. */
2953 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2954 && (spu_dual_nops == 0
2955 || (prev_priority >
2956 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2957 return 1;
2958 }
2959
2960
2961 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2962 pipe0 insn in the ready list, schedule it. */
2963 if (pipe0_clock < clock && pipe_0 >= 0)
2964 schedule_i = pipe_0;
2965
2966 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2967 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2968 else
2969 schedule_i = pipe_1;
2970
2971 if (schedule_i > -1)
2972 {
2973 insn = ready[schedule_i];
2974 ready[schedule_i] = ready[nready - 1];
2975 ready[nready - 1] = insn;
2976 return 1;
2977 }
2978 return 0;
644459d0 2979}
2980
2981/* INSN is dependent on DEP_INSN. */
2982static int
5a976006 2983spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 2984{
5a976006 2985 rtx set;
2986
2987 /* The blockage pattern is used to prevent instructions from being
2988 moved across it and has no cost. */
2989 if (INSN_CODE (insn) == CODE_FOR_blockage
2990 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2991 return 0;
2992
9d98604b 2993 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2994 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 2995 return 0;
2996
2997 /* Make sure hbrps are spread out. */
2998 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2999 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3000 return 8;
3001
3002 /* Make sure hints and hbrps are 2 cycles apart. */
3003 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3004 || INSN_CODE (insn) == CODE_FOR_hbr)
3005 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3006 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3007 return 2;
3008
3009 /* An hbrp has no real dependency on other insns. */
3010 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3011 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3012 return 0;
3013
3014 /* Assuming that it is unlikely an argument register will be used in
3015 the first cycle of the called function, we reduce the cost for
3016 slightly better scheduling of dep_insn. When not hinted, the
3017 mispredicted branch would hide the cost as well. */
3018 if (CALL_P (insn))
3019 {
3020 rtx target = get_branch_target (insn);
3021 if (GET_CODE (target) != REG || !set_of (target, insn))
3022 return cost - 2;
3023 return cost;
3024 }
3025
3026 /* And when returning from a function, let's assume the return values
3027 are completed sooner too. */
3028 if (CALL_P (dep_insn))
644459d0 3029 return cost - 2;
5a976006 3030
3031 /* Make sure an instruction that loads from the back chain is schedule
3032 away from the return instruction so a hint is more likely to get
3033 issued. */
3034 if (INSN_CODE (insn) == CODE_FOR__return
3035 && (set = single_set (dep_insn))
3036 && GET_CODE (SET_DEST (set)) == REG
3037 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3038 return 20;
3039
644459d0 3040 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3041 scheduler makes every insn in a block anti-dependent on the final
3042 jump_insn. We adjust here so higher cost insns will get scheduled
3043 earlier. */
5a976006 3044 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3045 return insn_cost (dep_insn) - 3;
5a976006 3046
644459d0 3047 return cost;
3048}
3049\f
3050/* Create a CONST_DOUBLE from a string. */
842ae815 3051rtx
644459d0 3052spu_float_const (const char *string, enum machine_mode mode)
3053{
3054 REAL_VALUE_TYPE value;
3055 value = REAL_VALUE_ATOF (string, mode);
3056 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3057}
3058
644459d0 3059int
3060spu_constant_address_p (rtx x)
3061{
3062 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3063 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3064 || GET_CODE (x) == HIGH);
3065}
3066
3067static enum spu_immediate
3068which_immediate_load (HOST_WIDE_INT val)
3069{
3070 gcc_assert (val == trunc_int_for_mode (val, SImode));
3071
3072 if (val >= -0x8000 && val <= 0x7fff)
3073 return SPU_IL;
3074 if (val >= 0 && val <= 0x3ffff)
3075 return SPU_ILA;
3076 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3077 return SPU_ILH;
3078 if ((val & 0xffff) == 0)
3079 return SPU_ILHU;
3080
3081 return SPU_NONE;
3082}
3083
dea01258 3084/* Return true when OP can be loaded by one of the il instructions, or
3085 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3086int
3087immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3088{
3089 if (CONSTANT_P (op))
3090 {
3091 enum immediate_class c = classify_immediate (op, mode);
5df189be 3092 return c == IC_IL1 || c == IC_IL1s
3072d30e 3093 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3094 }
3095 return 0;
3096}
3097
3098/* Return true if the first SIZE bytes of arr is a constant that can be
3099 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3100 represent the size and offset of the instruction to use. */
3101static int
3102cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3103{
3104 int cpat, run, i, start;
3105 cpat = 1;
3106 run = 0;
3107 start = -1;
3108 for (i = 0; i < size && cpat; i++)
3109 if (arr[i] != i+16)
3110 {
3111 if (!run)
3112 {
3113 start = i;
3114 if (arr[i] == 3)
3115 run = 1;
3116 else if (arr[i] == 2 && arr[i+1] == 3)
3117 run = 2;
3118 else if (arr[i] == 0)
3119 {
3120 while (arr[i+run] == run && i+run < 16)
3121 run++;
3122 if (run != 4 && run != 8)
3123 cpat = 0;
3124 }
3125 else
3126 cpat = 0;
3127 if ((i & (run-1)) != 0)
3128 cpat = 0;
3129 i += run;
3130 }
3131 else
3132 cpat = 0;
3133 }
b01a6dc3 3134 if (cpat && (run || size < 16))
dea01258 3135 {
3136 if (run == 0)
3137 run = 1;
3138 if (prun)
3139 *prun = run;
3140 if (pstart)
3141 *pstart = start == -1 ? 16-run : start;
3142 return 1;
3143 }
3144 return 0;
3145}
3146
3147/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3148 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3149static enum immediate_class
3150classify_immediate (rtx op, enum machine_mode mode)
644459d0 3151{
3152 HOST_WIDE_INT val;
3153 unsigned char arr[16];
5df189be 3154 int i, j, repeated, fsmbi, repeat;
dea01258 3155
3156 gcc_assert (CONSTANT_P (op));
3157
644459d0 3158 if (GET_MODE (op) != VOIDmode)
3159 mode = GET_MODE (op);
3160
dea01258 3161 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3162 if (!flag_pic
3163 && mode == V4SImode
dea01258 3164 && GET_CODE (op) == CONST_VECTOR
3165 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3166 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3167 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3168 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3169 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3170 op = CONST_VECTOR_ELT (op, 0);
644459d0 3171
dea01258 3172 switch (GET_CODE (op))
3173 {
3174 case SYMBOL_REF:
3175 case LABEL_REF:
3176 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3177
dea01258 3178 case CONST:
0cfc65d4 3179 /* We can never know if the resulting address fits in 18 bits and can be
3180 loaded with ila. For now, assume the address will not overflow if
3181 the displacement is "small" (fits 'K' constraint). */
3182 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3183 {
3184 rtx sym = XEXP (XEXP (op, 0), 0);
3185 rtx cst = XEXP (XEXP (op, 0), 1);
3186
3187 if (GET_CODE (sym) == SYMBOL_REF
3188 && GET_CODE (cst) == CONST_INT
3189 && satisfies_constraint_K (cst))
3190 return IC_IL1s;
3191 }
3192 return IC_IL2s;
644459d0 3193
dea01258 3194 case HIGH:
3195 return IC_IL1s;
3196
3197 case CONST_VECTOR:
3198 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3199 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3200 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3201 return IC_POOL;
3202 /* Fall through. */
3203
3204 case CONST_INT:
3205 case CONST_DOUBLE:
3206 constant_to_array (mode, op, arr);
644459d0 3207
dea01258 3208 /* Check that each 4-byte slot is identical. */
3209 repeated = 1;
3210 for (i = 4; i < 16; i += 4)
3211 for (j = 0; j < 4; j++)
3212 if (arr[j] != arr[i + j])
3213 repeated = 0;
3214
3215 if (repeated)
3216 {
3217 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3218 val = trunc_int_for_mode (val, SImode);
3219
3220 if (which_immediate_load (val) != SPU_NONE)
3221 return IC_IL1;
3222 }
3223
3224 /* Any mode of 2 bytes or smaller can be loaded with an il
3225 instruction. */
3226 gcc_assert (GET_MODE_SIZE (mode) > 2);
3227
3228 fsmbi = 1;
5df189be 3229 repeat = 0;
dea01258 3230 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3231 if (arr[i] != 0 && repeat == 0)
3232 repeat = arr[i];
3233 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3234 fsmbi = 0;
3235 if (fsmbi)
5df189be 3236 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3237
3238 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3239 return IC_CPAT;
3240
3241 if (repeated)
3242 return IC_IL2;
3243
3244 return IC_POOL;
3245 default:
3246 break;
3247 }
3248 gcc_unreachable ();
644459d0 3249}
3250
3251static enum spu_immediate
3252which_logical_immediate (HOST_WIDE_INT val)
3253{
3254 gcc_assert (val == trunc_int_for_mode (val, SImode));
3255
3256 if (val >= -0x200 && val <= 0x1ff)
3257 return SPU_ORI;
3258 if (val >= 0 && val <= 0xffff)
3259 return SPU_IOHL;
3260 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3261 {
3262 val = trunc_int_for_mode (val, HImode);
3263 if (val >= -0x200 && val <= 0x1ff)
3264 return SPU_ORHI;
3265 if ((val & 0xff) == ((val >> 8) & 0xff))
3266 {
3267 val = trunc_int_for_mode (val, QImode);
3268 if (val >= -0x200 && val <= 0x1ff)
3269 return SPU_ORBI;
3270 }
3271 }
3272 return SPU_NONE;
3273}
3274
5df189be 3275/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3276 CONST_DOUBLEs. */
3277static int
3278const_vector_immediate_p (rtx x)
3279{
3280 int i;
3281 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3282 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3283 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3284 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3285 return 0;
3286 return 1;
3287}
3288
644459d0 3289int
3290logical_immediate_p (rtx op, enum machine_mode mode)
3291{
3292 HOST_WIDE_INT val;
3293 unsigned char arr[16];
3294 int i, j;
3295
3296 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3297 || GET_CODE (op) == CONST_VECTOR);
3298
5df189be 3299 if (GET_CODE (op) == CONST_VECTOR
3300 && !const_vector_immediate_p (op))
3301 return 0;
3302
644459d0 3303 if (GET_MODE (op) != VOIDmode)
3304 mode = GET_MODE (op);
3305
3306 constant_to_array (mode, op, arr);
3307
3308 /* Check that bytes are repeated. */
3309 for (i = 4; i < 16; i += 4)
3310 for (j = 0; j < 4; j++)
3311 if (arr[j] != arr[i + j])
3312 return 0;
3313
3314 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3315 val = trunc_int_for_mode (val, SImode);
3316
3317 i = which_logical_immediate (val);
3318 return i != SPU_NONE && i != SPU_IOHL;
3319}
3320
3321int
3322iohl_immediate_p (rtx op, enum machine_mode mode)
3323{
3324 HOST_WIDE_INT val;
3325 unsigned char arr[16];
3326 int i, j;
3327
3328 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3329 || GET_CODE (op) == CONST_VECTOR);
3330
5df189be 3331 if (GET_CODE (op) == CONST_VECTOR
3332 && !const_vector_immediate_p (op))
3333 return 0;
3334
644459d0 3335 if (GET_MODE (op) != VOIDmode)
3336 mode = GET_MODE (op);
3337
3338 constant_to_array (mode, op, arr);
3339
3340 /* Check that bytes are repeated. */
3341 for (i = 4; i < 16; i += 4)
3342 for (j = 0; j < 4; j++)
3343 if (arr[j] != arr[i + j])
3344 return 0;
3345
3346 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3347 val = trunc_int_for_mode (val, SImode);
3348
3349 return val >= 0 && val <= 0xffff;
3350}
3351
3352int
3353arith_immediate_p (rtx op, enum machine_mode mode,
3354 HOST_WIDE_INT low, HOST_WIDE_INT high)
3355{
3356 HOST_WIDE_INT val;
3357 unsigned char arr[16];
3358 int bytes, i, j;
3359
3360 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3361 || GET_CODE (op) == CONST_VECTOR);
3362
5df189be 3363 if (GET_CODE (op) == CONST_VECTOR
3364 && !const_vector_immediate_p (op))
3365 return 0;
3366
644459d0 3367 if (GET_MODE (op) != VOIDmode)
3368 mode = GET_MODE (op);
3369
3370 constant_to_array (mode, op, arr);
3371
3372 if (VECTOR_MODE_P (mode))
3373 mode = GET_MODE_INNER (mode);
3374
3375 bytes = GET_MODE_SIZE (mode);
3376 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3377
3378 /* Check that bytes are repeated. */
3379 for (i = bytes; i < 16; i += bytes)
3380 for (j = 0; j < bytes; j++)
3381 if (arr[j] != arr[i + j])
3382 return 0;
3383
3384 val = arr[0];
3385 for (j = 1; j < bytes; j++)
3386 val = (val << 8) | arr[j];
3387
3388 val = trunc_int_for_mode (val, mode);
3389
3390 return val >= low && val <= high;
3391}
3392
56c7bfc2 3393/* TRUE when op is an immediate and an exact power of 2, and given that
3394 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3395 all entries must be the same. */
3396bool
3397exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3398{
3399 enum machine_mode int_mode;
3400 HOST_WIDE_INT val;
3401 unsigned char arr[16];
3402 int bytes, i, j;
3403
3404 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3405 || GET_CODE (op) == CONST_VECTOR);
3406
3407 if (GET_CODE (op) == CONST_VECTOR
3408 && !const_vector_immediate_p (op))
3409 return 0;
3410
3411 if (GET_MODE (op) != VOIDmode)
3412 mode = GET_MODE (op);
3413
3414 constant_to_array (mode, op, arr);
3415
3416 if (VECTOR_MODE_P (mode))
3417 mode = GET_MODE_INNER (mode);
3418
3419 bytes = GET_MODE_SIZE (mode);
3420 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3421
3422 /* Check that bytes are repeated. */
3423 for (i = bytes; i < 16; i += bytes)
3424 for (j = 0; j < bytes; j++)
3425 if (arr[j] != arr[i + j])
3426 return 0;
3427
3428 val = arr[0];
3429 for (j = 1; j < bytes; j++)
3430 val = (val << 8) | arr[j];
3431
3432 val = trunc_int_for_mode (val, int_mode);
3433
3434 /* Currently, we only handle SFmode */
3435 gcc_assert (mode == SFmode);
3436 if (mode == SFmode)
3437 {
3438 int exp = (val >> 23) - 127;
3439 return val > 0 && (val & 0x007fffff) == 0
3440 && exp >= low && exp <= high;
3441 }
3442 return FALSE;
3443}
3444
6cf5579e 3445/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3446
3447static int
3448ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3449{
3450 rtx x = *px;
3451 tree decl;
3452
3453 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3454 {
3455 rtx plus = XEXP (x, 0);
3456 rtx op0 = XEXP (plus, 0);
3457 rtx op1 = XEXP (plus, 1);
3458 if (GET_CODE (op1) == CONST_INT)
3459 x = op0;
3460 }
3461
3462 return (GET_CODE (x) == SYMBOL_REF
3463 && (decl = SYMBOL_REF_DECL (x)) != 0
3464 && TREE_CODE (decl) == VAR_DECL
3465 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3466}
3467
644459d0 3468/* We accept:
5b865faf 3469 - any 32-bit constant (SImode, SFmode)
644459d0 3470 - any constant that can be generated with fsmbi (any mode)
5b865faf 3471 - a 64-bit constant where the high and low bits are identical
644459d0 3472 (DImode, DFmode)
5b865faf 3473 - a 128-bit constant where the four 32-bit words match. */
ca316360 3474bool
3475spu_legitimate_constant_p (enum machine_mode mode, rtx x)
644459d0 3476{
5df189be 3477 if (GET_CODE (x) == HIGH)
3478 x = XEXP (x, 0);
6cf5579e 3479
3480 /* Reject any __ea qualified reference. These can't appear in
3481 instructions but must be forced to the constant pool. */
3482 if (for_each_rtx (&x, ea_symbol_ref, 0))
3483 return 0;
3484
644459d0 3485 /* V4SI with all identical symbols is valid. */
5df189be 3486 if (!flag_pic
ca316360 3487 && mode == V4SImode
644459d0 3488 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3489 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3490 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3491 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3492 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3493 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3494
5df189be 3495 if (GET_CODE (x) == CONST_VECTOR
3496 && !const_vector_immediate_p (x))
3497 return 0;
644459d0 3498 return 1;
3499}
3500
3501/* Valid address are:
3502 - symbol_ref, label_ref, const
3503 - reg
9d98604b 3504 - reg + const_int, where const_int is 16 byte aligned
644459d0 3505 - reg + reg, alignment doesn't matter
3506 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3507 ignore the 4 least significant bits of the const. We only care about
3508 16 byte modes because the expand phase will change all smaller MEM
3509 references to TImode. */
3510static bool
3511spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3512 rtx x, bool reg_ok_strict)
644459d0 3513{
9d98604b 3514 int aligned = GET_MODE_SIZE (mode) >= 16;
3515 if (aligned
3516 && GET_CODE (x) == AND
644459d0 3517 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3518 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3519 x = XEXP (x, 0);
3520 switch (GET_CODE (x))
3521 {
644459d0 3522 case LABEL_REF:
6cf5579e 3523 return !TARGET_LARGE_MEM;
3524
9d98604b 3525 case SYMBOL_REF:
644459d0 3526 case CONST:
6cf5579e 3527 /* Keep __ea references until reload so that spu_expand_mov can see them
3528 in MEMs. */
3529 if (ea_symbol_ref (&x, 0))
3530 return !reload_in_progress && !reload_completed;
9d98604b 3531 return !TARGET_LARGE_MEM;
644459d0 3532
3533 case CONST_INT:
3534 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3535
3536 case SUBREG:
3537 x = XEXP (x, 0);
9d98604b 3538 if (REG_P (x))
3539 return 0;
644459d0 3540
3541 case REG:
3542 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3543
3544 case PLUS:
3545 case LO_SUM:
3546 {
3547 rtx op0 = XEXP (x, 0);
3548 rtx op1 = XEXP (x, 1);
3549 if (GET_CODE (op0) == SUBREG)
3550 op0 = XEXP (op0, 0);
3551 if (GET_CODE (op1) == SUBREG)
3552 op1 = XEXP (op1, 0);
644459d0 3553 if (GET_CODE (op0) == REG
3554 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3555 && GET_CODE (op1) == CONST_INT
fa695424 3556 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3557 /* If virtual registers are involved, the displacement will
3558 change later on anyway, so checking would be premature.
3559 Reload will make sure the final displacement after
3560 register elimination is OK. */
3561 || op0 == arg_pointer_rtx
3562 || op0 == frame_pointer_rtx
3563 || op0 == virtual_stack_vars_rtx)
9d98604b 3564 && (!aligned || (INTVAL (op1) & 15) == 0))
3565 return TRUE;
644459d0 3566 if (GET_CODE (op0) == REG
3567 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3568 && GET_CODE (op1) == REG
3569 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3570 return TRUE;
644459d0 3571 }
3572 break;
3573
3574 default:
3575 break;
3576 }
9d98604b 3577 return FALSE;
644459d0 3578}
3579
6cf5579e 3580/* Like spu_legitimate_address_p, except with named addresses. */
3581static bool
3582spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3583 bool reg_ok_strict, addr_space_t as)
3584{
3585 if (as == ADDR_SPACE_EA)
3586 return (REG_P (x) && (GET_MODE (x) == EAmode));
3587
3588 else if (as != ADDR_SPACE_GENERIC)
3589 gcc_unreachable ();
3590
3591 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3592}
3593
644459d0 3594/* When the address is reg + const_int, force the const_int into a
fa7637bd 3595 register. */
3defb88e 3596static rtx
644459d0 3597spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3598 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3599{
3600 rtx op0, op1;
3601 /* Make sure both operands are registers. */
3602 if (GET_CODE (x) == PLUS)
3603 {
3604 op0 = XEXP (x, 0);
3605 op1 = XEXP (x, 1);
3606 if (ALIGNED_SYMBOL_REF_P (op0))
3607 {
3608 op0 = force_reg (Pmode, op0);
3609 mark_reg_pointer (op0, 128);
3610 }
3611 else if (GET_CODE (op0) != REG)
3612 op0 = force_reg (Pmode, op0);
3613 if (ALIGNED_SYMBOL_REF_P (op1))
3614 {
3615 op1 = force_reg (Pmode, op1);
3616 mark_reg_pointer (op1, 128);
3617 }
3618 else if (GET_CODE (op1) != REG)
3619 op1 = force_reg (Pmode, op1);
3620 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3621 }
41e3a0c7 3622 return x;
644459d0 3623}
3624
6cf5579e 3625/* Like spu_legitimate_address, except with named address support. */
3626static rtx
3627spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3628 addr_space_t as)
3629{
3630 if (as != ADDR_SPACE_GENERIC)
3631 return x;
3632
3633 return spu_legitimize_address (x, oldx, mode);
3634}
3635
fa695424 3636/* Reload reg + const_int for out-of-range displacements. */
3637rtx
3638spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3639 int opnum, int type)
3640{
3641 bool removed_and = false;
3642
3643 if (GET_CODE (ad) == AND
3644 && CONST_INT_P (XEXP (ad, 1))
3645 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3646 {
3647 ad = XEXP (ad, 0);
3648 removed_and = true;
3649 }
3650
3651 if (GET_CODE (ad) == PLUS
3652 && REG_P (XEXP (ad, 0))
3653 && CONST_INT_P (XEXP (ad, 1))
3654 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3655 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3656 {
3657 /* Unshare the sum. */
3658 ad = copy_rtx (ad);
3659
3660 /* Reload the displacement. */
3661 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3662 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3663 opnum, (enum reload_type) type);
3664
3665 /* Add back AND for alignment if we stripped it. */
3666 if (removed_and)
3667 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3668
3669 return ad;
3670 }
3671
3672 return NULL_RTX;
3673}
3674
644459d0 3675/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3676 struct attribute_spec.handler. */
3677static tree
3678spu_handle_fndecl_attribute (tree * node,
3679 tree name,
3680 tree args ATTRIBUTE_UNUSED,
3681 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3682{
3683 if (TREE_CODE (*node) != FUNCTION_DECL)
3684 {
67a779df 3685 warning (0, "%qE attribute only applies to functions",
3686 name);
644459d0 3687 *no_add_attrs = true;
3688 }
3689
3690 return NULL_TREE;
3691}
3692
3693/* Handle the "vector" attribute. */
3694static tree
3695spu_handle_vector_attribute (tree * node, tree name,
3696 tree args ATTRIBUTE_UNUSED,
3697 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3698{
3699 tree type = *node, result = NULL_TREE;
3700 enum machine_mode mode;
3701 int unsigned_p;
3702
3703 while (POINTER_TYPE_P (type)
3704 || TREE_CODE (type) == FUNCTION_TYPE
3705 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3706 type = TREE_TYPE (type);
3707
3708 mode = TYPE_MODE (type);
3709
3710 unsigned_p = TYPE_UNSIGNED (type);
3711 switch (mode)
3712 {
3713 case DImode:
3714 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3715 break;
3716 case SImode:
3717 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3718 break;
3719 case HImode:
3720 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3721 break;
3722 case QImode:
3723 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3724 break;
3725 case SFmode:
3726 result = V4SF_type_node;
3727 break;
3728 case DFmode:
3729 result = V2DF_type_node;
3730 break;
3731 default:
3732 break;
3733 }
3734
3735 /* Propagate qualifiers attached to the element type
3736 onto the vector type. */
3737 if (result && result != type && TYPE_QUALS (type))
3738 result = build_qualified_type (result, TYPE_QUALS (type));
3739
3740 *no_add_attrs = true; /* No need to hang on to the attribute. */
3741
3742 if (!result)
67a779df 3743 warning (0, "%qE attribute ignored", name);
644459d0 3744 else
d991e6e8 3745 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3746
3747 return NULL_TREE;
3748}
3749
f2b32076 3750/* Return nonzero if FUNC is a naked function. */
644459d0 3751static int
3752spu_naked_function_p (tree func)
3753{
3754 tree a;
3755
3756 if (TREE_CODE (func) != FUNCTION_DECL)
3757 abort ();
3758
3759 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3760 return a != NULL_TREE;
3761}
3762
3763int
3764spu_initial_elimination_offset (int from, int to)
3765{
3766 int saved_regs_size = spu_saved_regs_size ();
3767 int sp_offset = 0;
d5bf7b64 3768 if (!crtl->is_leaf || crtl->outgoing_args_size
644459d0 3769 || get_frame_size () || saved_regs_size)
3770 sp_offset = STACK_POINTER_OFFSET;
3771 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3772 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3773 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3774 return get_frame_size ();
644459d0 3775 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3776 return sp_offset + crtl->outgoing_args_size
644459d0 3777 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3778 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3779 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3780 else
3781 gcc_unreachable ();
644459d0 3782}
3783
3784rtx
fb80456a 3785spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3786{
3787 enum machine_mode mode = TYPE_MODE (type);
3788 int byte_size = ((mode == BLKmode)
3789 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3790
3791 /* Make sure small structs are left justified in a register. */
3792 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3793 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3794 {
3795 enum machine_mode smode;
3796 rtvec v;
3797 int i;
3798 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3799 int n = byte_size / UNITS_PER_WORD;
3800 v = rtvec_alloc (nregs);
3801 for (i = 0; i < n; i++)
3802 {
3803 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3804 gen_rtx_REG (TImode,
3805 FIRST_RETURN_REGNUM
3806 + i),
3807 GEN_INT (UNITS_PER_WORD * i));
3808 byte_size -= UNITS_PER_WORD;
3809 }
3810
3811 if (n < nregs)
3812 {
3813 if (byte_size < 4)
3814 byte_size = 4;
3815 smode =
3816 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3817 RTVEC_ELT (v, n) =
3818 gen_rtx_EXPR_LIST (VOIDmode,
3819 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3820 GEN_INT (UNITS_PER_WORD * n));
3821 }
3822 return gen_rtx_PARALLEL (mode, v);
3823 }
3824 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3825}
3826
ee9034d4 3827static rtx
39cba157 3828spu_function_arg (cumulative_args_t cum_v,
644459d0 3829 enum machine_mode mode,
ee9034d4 3830 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3831{
39cba157 3832 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
644459d0 3833 int byte_size;
3834
a08c5dd0 3835 if (*cum >= MAX_REGISTER_ARGS)
644459d0 3836 return 0;
3837
3838 byte_size = ((mode == BLKmode)
3839 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3840
3841 /* The ABI does not allow parameters to be passed partially in
3842 reg and partially in stack. */
a08c5dd0 3843 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 3844 return 0;
3845
3846 /* Make sure small structs are left justified in a register. */
3847 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3848 && byte_size < UNITS_PER_WORD && byte_size > 0)
3849 {
3850 enum machine_mode smode;
3851 rtx gr_reg;
3852 if (byte_size < 4)
3853 byte_size = 4;
3854 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3855 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 3856 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 3857 const0_rtx);
3858 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3859 }
3860 else
a08c5dd0 3861 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 3862}
3863
ee9034d4 3864static void
39cba157 3865spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
ee9034d4 3866 const_tree type, bool named ATTRIBUTE_UNUSED)
3867{
39cba157 3868 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3869
ee9034d4 3870 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3871 ? 1
3872 : mode == BLKmode
3873 ? ((int_size_in_bytes (type) + 15) / 16)
3874 : mode == VOIDmode
3875 ? 1
3876 : HARD_REGNO_NREGS (cum, mode));
3877}
3878
644459d0 3879/* Variable sized types are passed by reference. */
3880static bool
39cba157 3881spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
644459d0 3882 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3883 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3884{
3885 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3886}
3887\f
3888
3889/* Var args. */
3890
3891/* Create and return the va_list datatype.
3892
3893 On SPU, va_list is an array type equivalent to
3894
3895 typedef struct __va_list_tag
3896 {
3897 void *__args __attribute__((__aligned(16)));
3898 void *__skip __attribute__((__aligned(16)));
3899
3900 } va_list[1];
3901
fa7637bd 3902 where __args points to the arg that will be returned by the next
644459d0 3903 va_arg(), and __skip points to the previous stack frame such that
3904 when __args == __skip we should advance __args by 32 bytes. */
3905static tree
3906spu_build_builtin_va_list (void)
3907{
3908 tree f_args, f_skip, record, type_decl;
3909 bool owp;
3910
3911 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3912
3913 type_decl =
54e46243 3914 build_decl (BUILTINS_LOCATION,
3915 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3916
54e46243 3917 f_args = build_decl (BUILTINS_LOCATION,
3918 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3919 f_skip = build_decl (BUILTINS_LOCATION,
3920 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3921
3922 DECL_FIELD_CONTEXT (f_args) = record;
3923 DECL_ALIGN (f_args) = 128;
3924 DECL_USER_ALIGN (f_args) = 1;
3925
3926 DECL_FIELD_CONTEXT (f_skip) = record;
3927 DECL_ALIGN (f_skip) = 128;
3928 DECL_USER_ALIGN (f_skip) = 1;
3929
bc907808 3930 TYPE_STUB_DECL (record) = type_decl;
644459d0 3931 TYPE_NAME (record) = type_decl;
3932 TYPE_FIELDS (record) = f_args;
1767a056 3933 DECL_CHAIN (f_args) = f_skip;
644459d0 3934
3935 /* We know this is being padded and we want it too. It is an internal
3936 type so hide the warnings from the user. */
3937 owp = warn_padded;
3938 warn_padded = false;
3939
3940 layout_type (record);
3941
3942 warn_padded = owp;
3943
3944 /* The correct type is an array type of one element. */
3945 return build_array_type (record, build_index_type (size_zero_node));
3946}
3947
3948/* Implement va_start by filling the va_list structure VALIST.
3949 NEXTARG points to the first anonymous stack argument.
3950
3951 The following global variables are used to initialize
3952 the va_list structure:
3953
abe32cce 3954 crtl->args.info;
644459d0 3955 the CUMULATIVE_ARGS for this function
3956
abe32cce 3957 crtl->args.arg_offset_rtx:
644459d0 3958 holds the offset of the first anonymous stack argument
3959 (relative to the virtual arg pointer). */
3960
8a58ed0a 3961static void
644459d0 3962spu_va_start (tree valist, rtx nextarg)
3963{
3964 tree f_args, f_skip;
3965 tree args, skip, t;
3966
3967 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 3968 f_skip = DECL_CHAIN (f_args);
644459d0 3969
170efcd4 3970 valist = build_simple_mem_ref (valist);
644459d0 3971 args =
3972 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3973 skip =
3974 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3975
3976 /* Find the __args area. */
3977 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3978 if (crtl->args.pretend_args_size > 0)
2cc66f2a 3979 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
75a70cf9 3980 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3981 TREE_SIDE_EFFECTS (t) = 1;
3982 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3983
3984 /* Find the __skip area. */
3985 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2cc66f2a 3986 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3987 - STACK_POINTER_OFFSET));
75a70cf9 3988 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3989 TREE_SIDE_EFFECTS (t) = 1;
3990 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3991}
3992
3993/* Gimplify va_arg by updating the va_list structure
3994 VALIST as required to retrieve an argument of type
3995 TYPE, and returning that argument.
3996
3997 ret = va_arg(VALIST, TYPE);
3998
3999 generates code equivalent to:
4000
4001 paddedsize = (sizeof(TYPE) + 15) & -16;
4002 if (VALIST.__args + paddedsize > VALIST.__skip
4003 && VALIST.__args <= VALIST.__skip)
4004 addr = VALIST.__skip + 32;
4005 else
4006 addr = VALIST.__args;
4007 VALIST.__args = addr + paddedsize;
4008 ret = *(TYPE *)addr;
4009 */
4010static tree
75a70cf9 4011spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4012 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4013{
4014 tree f_args, f_skip;
4015 tree args, skip;
4016 HOST_WIDE_INT size, rsize;
2cc66f2a 4017 tree addr, tmp;
644459d0 4018 bool pass_by_reference_p;
4019
4020 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4021 f_skip = DECL_CHAIN (f_args);
644459d0 4022
182cf5a9 4023 valist = build_simple_mem_ref (valist);
644459d0 4024 args =
4025 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4026 skip =
4027 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4028
4029 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4030
4031 /* if an object is dynamically sized, a pointer to it is passed
4032 instead of the object itself. */
27a82950 4033 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4034 false);
644459d0 4035 if (pass_by_reference_p)
4036 type = build_pointer_type (type);
4037 size = int_size_in_bytes (type);
4038 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4039
4040 /* build conditional expression to calculate addr. The expression
4041 will be gimplified later. */
2cc66f2a 4042 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
644459d0 4043 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4044 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4045 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4046 unshare_expr (skip)));
644459d0 4047
4048 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2cc66f2a 4049 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4050 unshare_expr (args));
644459d0 4051
75a70cf9 4052 gimplify_assign (addr, tmp, pre_p);
644459d0 4053
4054 /* update VALIST.__args */
2cc66f2a 4055 tmp = fold_build_pointer_plus_hwi (addr, rsize);
75a70cf9 4056 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4057
8115f0af 4058 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4059 addr);
644459d0 4060
4061 if (pass_by_reference_p)
4062 addr = build_va_arg_indirect_ref (addr);
4063
4064 return build_va_arg_indirect_ref (addr);
4065}
4066
4067/* Save parameter registers starting with the register that corresponds
4068 to the first unnamed parameters. If the first unnamed parameter is
4069 in the stack then save no registers. Set pretend_args_size to the
4070 amount of space needed to save the registers. */
39cba157 4071static void
4072spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
644459d0 4073 tree type, int *pretend_size, int no_rtl)
4074{
4075 if (!no_rtl)
4076 {
4077 rtx tmp;
4078 int regno;
4079 int offset;
39cba157 4080 int ncum = *get_cumulative_args (cum);
644459d0 4081
4082 /* cum currently points to the last named argument, we want to
4083 start at the next argument. */
39cba157 4084 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
644459d0 4085
4086 offset = -STACK_POINTER_OFFSET;
4087 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4088 {
4089 tmp = gen_frame_mem (V4SImode,
29c05e22 4090 plus_constant (Pmode, virtual_incoming_args_rtx,
644459d0 4091 offset));
4092 emit_move_insn (tmp,
4093 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4094 offset += 16;
4095 }
4096 *pretend_size = offset + STACK_POINTER_OFFSET;
4097 }
4098}
4099\f
b2d7ede1 4100static void
644459d0 4101spu_conditional_register_usage (void)
4102{
4103 if (flag_pic)
4104 {
4105 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4106 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4107 }
644459d0 4108}
4109
9d98604b 4110/* This is called any time we inspect the alignment of a register for
4111 addresses. */
644459d0 4112static int
9d98604b 4113reg_aligned_for_addr (rtx x)
644459d0 4114{
9d98604b 4115 int regno =
4116 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4117 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4118}
4119
69ced2d6 4120/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4121 into its SYMBOL_REF_FLAGS. */
4122static void
4123spu_encode_section_info (tree decl, rtx rtl, int first)
4124{
4125 default_encode_section_info (decl, rtl, first);
4126
4127 /* If a variable has a forced alignment to < 16 bytes, mark it with
4128 SYMBOL_FLAG_ALIGN1. */
4129 if (TREE_CODE (decl) == VAR_DECL
4130 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4131 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4132}
4133
644459d0 4134/* Return TRUE if we are certain the mem refers to a complete object
4135 which is both 16-byte aligned and padded to a 16-byte boundary. This
4136 would make it safe to store with a single instruction.
4137 We guarantee the alignment and padding for static objects by aligning
4138 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4139 FIXME: We currently cannot guarantee this for objects on the stack
4140 because assign_parm_setup_stack calls assign_stack_local with the
4141 alignment of the parameter mode and in that case the alignment never
4142 gets adjusted by LOCAL_ALIGNMENT. */
4143static int
4144store_with_one_insn_p (rtx mem)
4145{
9d98604b 4146 enum machine_mode mode = GET_MODE (mem);
644459d0 4147 rtx addr = XEXP (mem, 0);
9d98604b 4148 if (mode == BLKmode)
644459d0 4149 return 0;
9d98604b 4150 if (GET_MODE_SIZE (mode) >= 16)
4151 return 1;
644459d0 4152 /* Only static objects. */
4153 if (GET_CODE (addr) == SYMBOL_REF)
4154 {
4155 /* We use the associated declaration to make sure the access is
fa7637bd 4156 referring to the whole object.
851d9296 4157 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4158 if it is necessary. Will there be cases where one exists, and
4159 the other does not? Will there be cases where both exist, but
4160 have different types? */
4161 tree decl = MEM_EXPR (mem);
4162 if (decl
4163 && TREE_CODE (decl) == VAR_DECL
4164 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4165 return 1;
4166 decl = SYMBOL_REF_DECL (addr);
4167 if (decl
4168 && TREE_CODE (decl) == VAR_DECL
4169 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4170 return 1;
4171 }
4172 return 0;
4173}
4174
9d98604b 4175/* Return 1 when the address is not valid for a simple load and store as
4176 required by the '_mov*' patterns. We could make this less strict
4177 for loads, but we prefer mem's to look the same so they are more
4178 likely to be merged. */
4179static int
4180address_needs_split (rtx mem)
4181{
4182 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4183 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4184 || !(store_with_one_insn_p (mem)
4185 || mem_is_padded_component_ref (mem))))
4186 return 1;
4187
4188 return 0;
4189}
4190
6cf5579e 4191static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4192static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4193static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4194
4195/* MEM is known to be an __ea qualified memory access. Emit a call to
4196 fetch the ppu memory to local store, and return its address in local
4197 store. */
4198
4199static void
4200ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4201{
4202 if (is_store)
4203 {
4204 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4205 if (!cache_fetch_dirty)
4206 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4207 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4208 2, ea_addr, EAmode, ndirty, SImode);
4209 }
4210 else
4211 {
4212 if (!cache_fetch)
4213 cache_fetch = init_one_libfunc ("__cache_fetch");
4214 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4215 1, ea_addr, EAmode);
4216 }
4217}
4218
4219/* Like ea_load_store, but do the cache tag comparison and, for stores,
4220 dirty bit marking, inline.
4221
4222 The cache control data structure is an array of
4223
4224 struct __cache_tag_array
4225 {
4226 unsigned int tag_lo[4];
4227 unsigned int tag_hi[4];
4228 void *data_pointer[4];
4229 int reserved[4];
4230 vector unsigned short dirty_bits[4];
4231 } */
4232
4233static void
4234ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4235{
4236 rtx ea_addr_si;
4237 HOST_WIDE_INT v;
4238 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4239 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4240 rtx index_mask = gen_reg_rtx (SImode);
4241 rtx tag_arr = gen_reg_rtx (Pmode);
4242 rtx splat_mask = gen_reg_rtx (TImode);
4243 rtx splat = gen_reg_rtx (V4SImode);
4244 rtx splat_hi = NULL_RTX;
4245 rtx tag_index = gen_reg_rtx (Pmode);
4246 rtx block_off = gen_reg_rtx (SImode);
4247 rtx tag_addr = gen_reg_rtx (Pmode);
4248 rtx tag = gen_reg_rtx (V4SImode);
4249 rtx cache_tag = gen_reg_rtx (V4SImode);
4250 rtx cache_tag_hi = NULL_RTX;
4251 rtx cache_ptrs = gen_reg_rtx (TImode);
4252 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4253 rtx tag_equal = gen_reg_rtx (V4SImode);
4254 rtx tag_equal_hi = NULL_RTX;
4255 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4256 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4257 rtx eq_index = gen_reg_rtx (SImode);
4258 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4259
4260 if (spu_ea_model != 32)
4261 {
4262 splat_hi = gen_reg_rtx (V4SImode);
4263 cache_tag_hi = gen_reg_rtx (V4SImode);
4264 tag_equal_hi = gen_reg_rtx (V4SImode);
4265 }
4266
29c05e22 4267 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
6cf5579e 4268 emit_move_insn (tag_arr, tag_arr_sym);
4269 v = 0x0001020300010203LL;
4270 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4271 ea_addr_si = ea_addr;
4272 if (spu_ea_model != 32)
4273 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4274
4275 /* tag_index = ea_addr & (tag_array_size - 128) */
4276 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4277
4278 /* splat ea_addr to all 4 slots. */
4279 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4280 /* Similarly for high 32 bits of ea_addr. */
4281 if (spu_ea_model != 32)
4282 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4283
4284 /* block_off = ea_addr & 127 */
4285 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4286
4287 /* tag_addr = tag_arr + tag_index */
4288 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4289
4290 /* Read cache tags. */
4291 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4292 if (spu_ea_model != 32)
4293 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
29c05e22 4294 plus_constant (Pmode,
4295 tag_addr, 16)));
6cf5579e 4296
4297 /* tag = ea_addr & -128 */
4298 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4299
4300 /* Read all four cache data pointers. */
4301 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
29c05e22 4302 plus_constant (Pmode,
4303 tag_addr, 32)));
6cf5579e 4304
4305 /* Compare tags. */
4306 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4307 if (spu_ea_model != 32)
4308 {
4309 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4310 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4311 }
4312
4313 /* At most one of the tags compare equal, so tag_equal has one
4314 32-bit slot set to all 1's, with the other slots all zero.
4315 gbb picks off low bit from each byte in the 128-bit registers,
4316 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4317 we have a hit. */
4318 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4319 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4320
4321 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4322 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4323
4324 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4325 (rotating eq_index mod 16 bytes). */
4326 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4327 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4328
4329 /* Add block offset to form final data address. */
4330 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4331
4332 /* Check that we did hit. */
4333 hit_label = gen_label_rtx ();
4334 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4335 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4336 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4337 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4338 hit_ref, pc_rtx)));
4339 /* Say that this branch is very likely to happen. */
4340 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4341 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4342
4343 ea_load_store (mem, is_store, ea_addr, data_addr);
4344 cont_label = gen_label_rtx ();
4345 emit_jump_insn (gen_jump (cont_label));
4346 emit_barrier ();
4347
4348 emit_label (hit_label);
4349
4350 if (is_store)
4351 {
4352 HOST_WIDE_INT v_hi;
4353 rtx dirty_bits = gen_reg_rtx (TImode);
4354 rtx dirty_off = gen_reg_rtx (SImode);
4355 rtx dirty_128 = gen_reg_rtx (TImode);
4356 rtx neg_block_off = gen_reg_rtx (SImode);
4357
4358 /* Set up mask with one dirty bit per byte of the mem we are
4359 writing, starting from top bit. */
4360 v_hi = v = -1;
4361 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4362 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4363 {
4364 v_hi = v;
4365 v = 0;
4366 }
4367 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4368
4369 /* Form index into cache dirty_bits. eq_index is one of
4370 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4371 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4372 offset to each of the four dirty_bits elements. */
4373 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4374
4375 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4376
4377 /* Rotate bit mask to proper bit. */
4378 emit_insn (gen_negsi2 (neg_block_off, block_off));
4379 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4380 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4381
4382 /* Or in the new dirty bits. */
4383 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4384
4385 /* Store. */
4386 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4387 }
4388
4389 emit_label (cont_label);
4390}
4391
4392static rtx
4393expand_ea_mem (rtx mem, bool is_store)
4394{
4395 rtx ea_addr;
4396 rtx data_addr = gen_reg_rtx (Pmode);
4397 rtx new_mem;
4398
4399 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4400 if (optimize_size || optimize == 0)
4401 ea_load_store (mem, is_store, ea_addr, data_addr);
4402 else
4403 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4404
4405 if (ea_alias_set == -1)
4406 ea_alias_set = new_alias_set ();
4407
4408 /* We generate a new MEM RTX to refer to the copy of the data
4409 in the cache. We do not copy memory attributes (except the
4410 alignment) from the original MEM, as they may no longer apply
4411 to the cache copy. */
4412 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4413 set_mem_alias_set (new_mem, ea_alias_set);
4414 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4415
4416 return new_mem;
4417}
4418
644459d0 4419int
4420spu_expand_mov (rtx * ops, enum machine_mode mode)
4421{
4422 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4423 {
4424 /* Perform the move in the destination SUBREG's inner mode. */
4425 ops[0] = SUBREG_REG (ops[0]);
4426 mode = GET_MODE (ops[0]);
4427 ops[1] = gen_lowpart_common (mode, ops[1]);
4428 gcc_assert (ops[1]);
4429 }
644459d0 4430
4431 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4432 {
4433 rtx from = SUBREG_REG (ops[1]);
8d72495d 4434 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4435
4436 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4437 && GET_MODE_CLASS (imode) == MODE_INT
4438 && subreg_lowpart_p (ops[1]));
4439
4440 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4441 imode = SImode;
4442 if (imode != GET_MODE (from))
4443 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4444
4445 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4446 {
d6bf3b14 4447 enum insn_code icode = convert_optab_handler (trunc_optab,
4448 mode, imode);
644459d0 4449 emit_insn (GEN_FCN (icode) (ops[0], from));
4450 }
4451 else
4452 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4453 return 1;
4454 }
4455
4456 /* At least one of the operands needs to be a register. */
4457 if ((reload_in_progress | reload_completed) == 0
4458 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4459 {
4460 rtx temp = force_reg (mode, ops[1]);
4461 emit_move_insn (ops[0], temp);
4462 return 1;
4463 }
4464 if (reload_in_progress || reload_completed)
4465 {
dea01258 4466 if (CONSTANT_P (ops[1]))
4467 return spu_split_immediate (ops);
644459d0 4468 return 0;
4469 }
9d98604b 4470
4471 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4472 extend them. */
4473 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4474 {
9d98604b 4475 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4476 if (val != INTVAL (ops[1]))
644459d0 4477 {
9d98604b 4478 emit_move_insn (ops[0], GEN_INT (val));
4479 return 1;
644459d0 4480 }
4481 }
9d98604b 4482 if (MEM_P (ops[0]))
6cf5579e 4483 {
4484 if (MEM_ADDR_SPACE (ops[0]))
4485 ops[0] = expand_ea_mem (ops[0], true);
4486 return spu_split_store (ops);
4487 }
9d98604b 4488 if (MEM_P (ops[1]))
6cf5579e 4489 {
4490 if (MEM_ADDR_SPACE (ops[1]))
4491 ops[1] = expand_ea_mem (ops[1], false);
4492 return spu_split_load (ops);
4493 }
9d98604b 4494
644459d0 4495 return 0;
4496}
4497
9d98604b 4498static void
4499spu_convert_move (rtx dst, rtx src)
644459d0 4500{
9d98604b 4501 enum machine_mode mode = GET_MODE (dst);
4502 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4503 rtx reg;
4504 gcc_assert (GET_MODE (src) == TImode);
4505 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4506 emit_insn (gen_rtx_SET (VOIDmode, reg,
4507 gen_rtx_TRUNCATE (int_mode,
4508 gen_rtx_LSHIFTRT (TImode, src,
4509 GEN_INT (int_mode == DImode ? 64 : 96)))));
4510 if (int_mode != mode)
4511 {
4512 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4513 emit_move_insn (dst, reg);
4514 }
4515}
644459d0 4516
9d98604b 4517/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4518 the address from SRC and SRC+16. Return a REG or CONST_INT that
4519 specifies how many bytes to rotate the loaded registers, plus any
4520 extra from EXTRA_ROTQBY. The address and rotate amounts are
4521 normalized to improve merging of loads and rotate computations. */
4522static rtx
4523spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4524{
4525 rtx addr = XEXP (src, 0);
4526 rtx p0, p1, rot, addr0, addr1;
4527 int rot_amt;
644459d0 4528
4529 rot = 0;
4530 rot_amt = 0;
9d98604b 4531
4532 if (MEM_ALIGN (src) >= 128)
4533 /* Address is already aligned; simply perform a TImode load. */ ;
4534 else if (GET_CODE (addr) == PLUS)
644459d0 4535 {
4536 /* 8 cases:
4537 aligned reg + aligned reg => lqx
4538 aligned reg + unaligned reg => lqx, rotqby
4539 aligned reg + aligned const => lqd
4540 aligned reg + unaligned const => lqd, rotqbyi
4541 unaligned reg + aligned reg => lqx, rotqby
4542 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4543 unaligned reg + aligned const => lqd, rotqby
4544 unaligned reg + unaligned const -> not allowed by legitimate address
4545 */
4546 p0 = XEXP (addr, 0);
4547 p1 = XEXP (addr, 1);
9d98604b 4548 if (!reg_aligned_for_addr (p0))
644459d0 4549 {
9d98604b 4550 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4551 {
9d98604b 4552 rot = gen_reg_rtx (SImode);
4553 emit_insn (gen_addsi3 (rot, p0, p1));
4554 }
4555 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4556 {
4557 if (INTVAL (p1) > 0
4558 && REG_POINTER (p0)
4559 && INTVAL (p1) * BITS_PER_UNIT
4560 < REGNO_POINTER_ALIGN (REGNO (p0)))
4561 {
4562 rot = gen_reg_rtx (SImode);
4563 emit_insn (gen_addsi3 (rot, p0, p1));
4564 addr = p0;
4565 }
4566 else
4567 {
4568 rtx x = gen_reg_rtx (SImode);
4569 emit_move_insn (x, p1);
4570 if (!spu_arith_operand (p1, SImode))
4571 p1 = x;
4572 rot = gen_reg_rtx (SImode);
4573 emit_insn (gen_addsi3 (rot, p0, p1));
4574 addr = gen_rtx_PLUS (Pmode, p0, x);
4575 }
644459d0 4576 }
4577 else
4578 rot = p0;
4579 }
4580 else
4581 {
4582 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4583 {
4584 rot_amt = INTVAL (p1) & 15;
9d98604b 4585 if (INTVAL (p1) & -16)
4586 {
4587 p1 = GEN_INT (INTVAL (p1) & -16);
4588 addr = gen_rtx_PLUS (SImode, p0, p1);
4589 }
4590 else
4591 addr = p0;
644459d0 4592 }
9d98604b 4593 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4594 rot = p1;
4595 }
4596 }
9d98604b 4597 else if (REG_P (addr))
644459d0 4598 {
9d98604b 4599 if (!reg_aligned_for_addr (addr))
644459d0 4600 rot = addr;
4601 }
4602 else if (GET_CODE (addr) == CONST)
4603 {
4604 if (GET_CODE (XEXP (addr, 0)) == PLUS
4605 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4606 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4607 {
4608 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4609 if (rot_amt & -16)
4610 addr = gen_rtx_CONST (Pmode,
4611 gen_rtx_PLUS (Pmode,
4612 XEXP (XEXP (addr, 0), 0),
4613 GEN_INT (rot_amt & -16)));
4614 else
4615 addr = XEXP (XEXP (addr, 0), 0);
4616 }
4617 else
9d98604b 4618 {
4619 rot = gen_reg_rtx (Pmode);
4620 emit_move_insn (rot, addr);
4621 }
644459d0 4622 }
4623 else if (GET_CODE (addr) == CONST_INT)
4624 {
4625 rot_amt = INTVAL (addr);
4626 addr = GEN_INT (rot_amt & -16);
4627 }
4628 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4629 {
4630 rot = gen_reg_rtx (Pmode);
4631 emit_move_insn (rot, addr);
4632 }
644459d0 4633
9d98604b 4634 rot_amt += extra_rotby;
644459d0 4635
4636 rot_amt &= 15;
4637
4638 if (rot && rot_amt)
4639 {
9d98604b 4640 rtx x = gen_reg_rtx (SImode);
4641 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4642 rot = x;
644459d0 4643 rot_amt = 0;
4644 }
9d98604b 4645 if (!rot && rot_amt)
4646 rot = GEN_INT (rot_amt);
4647
4648 addr0 = copy_rtx (addr);
4649 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4650 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4651
4652 if (dst1)
4653 {
29c05e22 4654 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
9d98604b 4655 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4656 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4657 }
644459d0 4658
9d98604b 4659 return rot;
4660}
4661
4662int
4663spu_split_load (rtx * ops)
4664{
4665 enum machine_mode mode = GET_MODE (ops[0]);
4666 rtx addr, load, rot;
4667 int rot_amt;
644459d0 4668
9d98604b 4669 if (GET_MODE_SIZE (mode) >= 16)
4670 return 0;
644459d0 4671
9d98604b 4672 addr = XEXP (ops[1], 0);
4673 gcc_assert (GET_CODE (addr) != AND);
4674
4675 if (!address_needs_split (ops[1]))
4676 {
4677 ops[1] = change_address (ops[1], TImode, addr);
4678 load = gen_reg_rtx (TImode);
4679 emit_insn (gen__movti (load, ops[1]));
4680 spu_convert_move (ops[0], load);
4681 return 1;
4682 }
4683
4684 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4685
4686 load = gen_reg_rtx (TImode);
4687 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4688
4689 if (rot)
4690 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4691
9d98604b 4692 spu_convert_move (ops[0], load);
4693 return 1;
644459d0 4694}
4695
9d98604b 4696int
644459d0 4697spu_split_store (rtx * ops)
4698{
4699 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4700 rtx reg;
644459d0 4701 rtx addr, p0, p1, p1_lo, smem;
4702 int aform;
4703 int scalar;
4704
9d98604b 4705 if (GET_MODE_SIZE (mode) >= 16)
4706 return 0;
4707
644459d0 4708 addr = XEXP (ops[0], 0);
9d98604b 4709 gcc_assert (GET_CODE (addr) != AND);
4710
4711 if (!address_needs_split (ops[0]))
4712 {
4713 reg = gen_reg_rtx (TImode);
4714 emit_insn (gen_spu_convert (reg, ops[1]));
4715 ops[0] = change_address (ops[0], TImode, addr);
4716 emit_move_insn (ops[0], reg);
4717 return 1;
4718 }
644459d0 4719
4720 if (GET_CODE (addr) == PLUS)
4721 {
4722 /* 8 cases:
4723 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4724 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4725 aligned reg + aligned const => lqd, c?d, shuf, stqx
4726 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4727 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4728 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4729 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4730 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4731 */
4732 aform = 0;
4733 p0 = XEXP (addr, 0);
4734 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4735 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4736 {
4737 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4738 if (reg_aligned_for_addr (p0))
4739 {
4740 p1 = GEN_INT (INTVAL (p1) & -16);
4741 if (p1 == const0_rtx)
4742 addr = p0;
4743 else
4744 addr = gen_rtx_PLUS (SImode, p0, p1);
4745 }
4746 else
4747 {
4748 rtx x = gen_reg_rtx (SImode);
4749 emit_move_insn (x, p1);
4750 addr = gen_rtx_PLUS (SImode, p0, x);
4751 }
644459d0 4752 }
4753 }
9d98604b 4754 else if (REG_P (addr))
644459d0 4755 {
4756 aform = 0;
4757 p0 = addr;
4758 p1 = p1_lo = const0_rtx;
4759 }
4760 else
4761 {
4762 aform = 1;
4763 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4764 p1 = 0; /* aform doesn't use p1 */
4765 p1_lo = addr;
4766 if (ALIGNED_SYMBOL_REF_P (addr))
4767 p1_lo = const0_rtx;
9d98604b 4768 else if (GET_CODE (addr) == CONST
4769 && GET_CODE (XEXP (addr, 0)) == PLUS
4770 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4771 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4772 {
9d98604b 4773 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4774 if ((v & -16) != 0)
4775 addr = gen_rtx_CONST (Pmode,
4776 gen_rtx_PLUS (Pmode,
4777 XEXP (XEXP (addr, 0), 0),
4778 GEN_INT (v & -16)));
4779 else
4780 addr = XEXP (XEXP (addr, 0), 0);
4781 p1_lo = GEN_INT (v & 15);
644459d0 4782 }
4783 else if (GET_CODE (addr) == CONST_INT)
4784 {
4785 p1_lo = GEN_INT (INTVAL (addr) & 15);
4786 addr = GEN_INT (INTVAL (addr) & -16);
4787 }
9d98604b 4788 else
4789 {
4790 p1_lo = gen_reg_rtx (SImode);
4791 emit_move_insn (p1_lo, addr);
4792 }
644459d0 4793 }
4794
4cbad5bb 4795 gcc_assert (aform == 0 || aform == 1);
9d98604b 4796 reg = gen_reg_rtx (TImode);
e04cf423 4797
644459d0 4798 scalar = store_with_one_insn_p (ops[0]);
4799 if (!scalar)
4800 {
4801 /* We could copy the flags from the ops[0] MEM to mem here,
4802 We don't because we want this load to be optimized away if
4803 possible, and copying the flags will prevent that in certain
4804 cases, e.g. consider the volatile flag. */
4805
9d98604b 4806 rtx pat = gen_reg_rtx (TImode);
e04cf423 4807 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4808 set_mem_alias_set (lmem, 0);
4809 emit_insn (gen_movti (reg, lmem));
644459d0 4810
9d98604b 4811 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4812 p0 = stack_pointer_rtx;
4813 if (!p1_lo)
4814 p1_lo = const0_rtx;
4815
4816 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4817 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4818 }
644459d0 4819 else
4820 {
4821 if (GET_CODE (ops[1]) == REG)
4822 emit_insn (gen_spu_convert (reg, ops[1]));
4823 else if (GET_CODE (ops[1]) == SUBREG)
4824 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4825 else
4826 abort ();
4827 }
4828
4829 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4830 emit_insn (gen_ashlti3
4831 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4832
9d98604b 4833 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4834 /* We can't use the previous alias set because the memory has changed
4835 size and can potentially overlap objects of other types. */
4836 set_mem_alias_set (smem, 0);
4837
e04cf423 4838 emit_insn (gen_movti (smem, reg));
9d98604b 4839 return 1;
644459d0 4840}
4841
4842/* Return TRUE if X is MEM which is a struct member reference
4843 and the member can safely be loaded and stored with a single
4844 instruction because it is padded. */
4845static int
4846mem_is_padded_component_ref (rtx x)
4847{
4848 tree t = MEM_EXPR (x);
4849 tree r;
4850 if (!t || TREE_CODE (t) != COMPONENT_REF)
4851 return 0;
4852 t = TREE_OPERAND (t, 1);
4853 if (!t || TREE_CODE (t) != FIELD_DECL
4854 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4855 return 0;
4856 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4857 r = DECL_FIELD_CONTEXT (t);
4858 if (!r || TREE_CODE (r) != RECORD_TYPE)
4859 return 0;
4860 /* Make sure they are the same mode */
4861 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4862 return 0;
4863 /* If there are no following fields then the field alignment assures
fa7637bd 4864 the structure is padded to the alignment which means this field is
4865 padded too. */
644459d0 4866 if (TREE_CHAIN (t) == 0)
4867 return 1;
4868 /* If the following field is also aligned then this field will be
4869 padded. */
4870 t = TREE_CHAIN (t);
4871 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4872 return 1;
4873 return 0;
4874}
4875
c7b91b14 4876/* Parse the -mfixed-range= option string. */
4877static void
4878fix_range (const char *const_str)
4879{
4880 int i, first, last;
4881 char *str, *dash, *comma;
4882
4883 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4884 REG2 are either register names or register numbers. The effect
4885 of this option is to mark the registers in the range from REG1 to
4886 REG2 as ``fixed'' so they won't be used by the compiler. */
4887
4888 i = strlen (const_str);
4889 str = (char *) alloca (i + 1);
4890 memcpy (str, const_str, i + 1);
4891
4892 while (1)
4893 {
4894 dash = strchr (str, '-');
4895 if (!dash)
4896 {
4897 warning (0, "value of -mfixed-range must have form REG1-REG2");
4898 return;
4899 }
4900 *dash = '\0';
4901 comma = strchr (dash + 1, ',');
4902 if (comma)
4903 *comma = '\0';
4904
4905 first = decode_reg_name (str);
4906 if (first < 0)
4907 {
4908 warning (0, "unknown register name: %s", str);
4909 return;
4910 }
4911
4912 last = decode_reg_name (dash + 1);
4913 if (last < 0)
4914 {
4915 warning (0, "unknown register name: %s", dash + 1);
4916 return;
4917 }
4918
4919 *dash = '-';
4920
4921 if (first > last)
4922 {
4923 warning (0, "%s-%s is an empty range", str, dash + 1);
4924 return;
4925 }
4926
4927 for (i = first; i <= last; ++i)
4928 fixed_regs[i] = call_used_regs[i] = 1;
4929
4930 if (!comma)
4931 break;
4932
4933 *comma = ',';
4934 str = comma + 1;
4935 }
4936}
4937
644459d0 4938/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4939 can be generated using the fsmbi instruction. */
4940int
4941fsmbi_const_p (rtx x)
4942{
dea01258 4943 if (CONSTANT_P (x))
4944 {
5df189be 4945 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4946 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4947 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4948 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4949 }
4950 return 0;
4951}
4952
4953/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4954 can be generated using the cbd, chd, cwd or cdd instruction. */
4955int
4956cpat_const_p (rtx x, enum machine_mode mode)
4957{
4958 if (CONSTANT_P (x))
4959 {
4960 enum immediate_class c = classify_immediate (x, mode);
4961 return c == IC_CPAT;
4962 }
4963 return 0;
4964}
644459d0 4965
dea01258 4966rtx
4967gen_cpat_const (rtx * ops)
4968{
4969 unsigned char dst[16];
4970 int i, offset, shift, isize;
4971 if (GET_CODE (ops[3]) != CONST_INT
4972 || GET_CODE (ops[2]) != CONST_INT
4973 || (GET_CODE (ops[1]) != CONST_INT
4974 && GET_CODE (ops[1]) != REG))
4975 return 0;
4976 if (GET_CODE (ops[1]) == REG
4977 && (!REG_POINTER (ops[1])
4978 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4979 return 0;
644459d0 4980
4981 for (i = 0; i < 16; i++)
dea01258 4982 dst[i] = i + 16;
4983 isize = INTVAL (ops[3]);
4984 if (isize == 1)
4985 shift = 3;
4986 else if (isize == 2)
4987 shift = 2;
4988 else
4989 shift = 0;
4990 offset = (INTVAL (ops[2]) +
4991 (GET_CODE (ops[1]) ==
4992 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4993 for (i = 0; i < isize; i++)
4994 dst[offset + i] = i + shift;
4995 return array_to_constant (TImode, dst);
644459d0 4996}
4997
4998/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4999 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5000 than 16 bytes, the value is repeated across the rest of the array. */
5001void
5002constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5003{
5004 HOST_WIDE_INT val;
5005 int i, j, first;
5006
5007 memset (arr, 0, 16);
5008 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5009 if (GET_CODE (x) == CONST_INT
5010 || (GET_CODE (x) == CONST_DOUBLE
5011 && (mode == SFmode || mode == DFmode)))
5012 {
5013 gcc_assert (mode != VOIDmode && mode != BLKmode);
5014
5015 if (GET_CODE (x) == CONST_DOUBLE)
5016 val = const_double_to_hwint (x);
5017 else
5018 val = INTVAL (x);
5019 first = GET_MODE_SIZE (mode) - 1;
5020 for (i = first; i >= 0; i--)
5021 {
5022 arr[i] = val & 0xff;
5023 val >>= 8;
5024 }
5025 /* Splat the constant across the whole array. */
5026 for (j = 0, i = first + 1; i < 16; i++)
5027 {
5028 arr[i] = arr[j];
5029 j = (j == first) ? 0 : j + 1;
5030 }
5031 }
5032 else if (GET_CODE (x) == CONST_DOUBLE)
5033 {
5034 val = CONST_DOUBLE_LOW (x);
5035 for (i = 15; i >= 8; i--)
5036 {
5037 arr[i] = val & 0xff;
5038 val >>= 8;
5039 }
5040 val = CONST_DOUBLE_HIGH (x);
5041 for (i = 7; i >= 0; i--)
5042 {
5043 arr[i] = val & 0xff;
5044 val >>= 8;
5045 }
5046 }
5047 else if (GET_CODE (x) == CONST_VECTOR)
5048 {
5049 int units;
5050 rtx elt;
5051 mode = GET_MODE_INNER (mode);
5052 units = CONST_VECTOR_NUNITS (x);
5053 for (i = 0; i < units; i++)
5054 {
5055 elt = CONST_VECTOR_ELT (x, i);
5056 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5057 {
5058 if (GET_CODE (elt) == CONST_DOUBLE)
5059 val = const_double_to_hwint (elt);
5060 else
5061 val = INTVAL (elt);
5062 first = GET_MODE_SIZE (mode) - 1;
5063 if (first + i * GET_MODE_SIZE (mode) > 16)
5064 abort ();
5065 for (j = first; j >= 0; j--)
5066 {
5067 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5068 val >>= 8;
5069 }
5070 }
5071 }
5072 }
5073 else
5074 gcc_unreachable();
5075}
5076
5077/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5078 smaller than 16 bytes, use the bytes that would represent that value
5079 in a register, e.g., for QImode return the value of arr[3]. */
5080rtx
e96f2783 5081array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5082{
5083 enum machine_mode inner_mode;
5084 rtvec v;
5085 int units, size, i, j, k;
5086 HOST_WIDE_INT val;
5087
5088 if (GET_MODE_CLASS (mode) == MODE_INT
5089 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5090 {
5091 j = GET_MODE_SIZE (mode);
5092 i = j < 4 ? 4 - j : 0;
5093 for (val = 0; i < j; i++)
5094 val = (val << 8) | arr[i];
5095 val = trunc_int_for_mode (val, mode);
5096 return GEN_INT (val);
5097 }
5098
5099 if (mode == TImode)
5100 {
5101 HOST_WIDE_INT high;
5102 for (i = high = 0; i < 8; i++)
5103 high = (high << 8) | arr[i];
5104 for (i = 8, val = 0; i < 16; i++)
5105 val = (val << 8) | arr[i];
5106 return immed_double_const (val, high, TImode);
5107 }
5108 if (mode == SFmode)
5109 {
5110 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5111 val = trunc_int_for_mode (val, SImode);
171b6d22 5112 return hwint_to_const_double (SFmode, val);
644459d0 5113 }
5114 if (mode == DFmode)
5115 {
1f915911 5116 for (i = 0, val = 0; i < 8; i++)
5117 val = (val << 8) | arr[i];
171b6d22 5118 return hwint_to_const_double (DFmode, val);
644459d0 5119 }
5120
5121 if (!VECTOR_MODE_P (mode))
5122 abort ();
5123
5124 units = GET_MODE_NUNITS (mode);
5125 size = GET_MODE_UNIT_SIZE (mode);
5126 inner_mode = GET_MODE_INNER (mode);
5127 v = rtvec_alloc (units);
5128
5129 for (k = i = 0; i < units; ++i)
5130 {
5131 val = 0;
5132 for (j = 0; j < size; j++, k++)
5133 val = (val << 8) | arr[k];
5134
5135 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5136 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5137 else
5138 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5139 }
5140 if (k > 16)
5141 abort ();
5142
5143 return gen_rtx_CONST_VECTOR (mode, v);
5144}
5145
5146static void
5147reloc_diagnostic (rtx x)
5148{
712d2297 5149 tree decl = 0;
644459d0 5150 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5151 return;
5152
5153 if (GET_CODE (x) == SYMBOL_REF)
5154 decl = SYMBOL_REF_DECL (x);
5155 else if (GET_CODE (x) == CONST
5156 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5157 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5158
5159 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5160 if (decl && !DECL_P (decl))
5161 decl = 0;
5162
644459d0 5163 /* The decl could be a string constant. */
5164 if (decl && DECL_P (decl))
712d2297 5165 {
5166 location_t loc;
5167 /* We use last_assemble_variable_decl to get line information. It's
5168 not always going to be right and might not even be close, but will
5169 be right for the more common cases. */
5170 if (!last_assemble_variable_decl || in_section == ctors_section)
5171 loc = DECL_SOURCE_LOCATION (decl);
5172 else
5173 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5174
712d2297 5175 if (TARGET_WARN_RELOC)
5176 warning_at (loc, 0,
5177 "creating run-time relocation for %qD", decl);
5178 else
5179 error_at (loc,
5180 "creating run-time relocation for %qD", decl);
5181 }
5182 else
5183 {
5184 if (TARGET_WARN_RELOC)
5185 warning_at (input_location, 0, "creating run-time relocation");
5186 else
5187 error_at (input_location, "creating run-time relocation");
5188 }
644459d0 5189}
5190
5191/* Hook into assemble_integer so we can generate an error for run-time
5192 relocations. The SPU ABI disallows them. */
5193static bool
5194spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5195{
5196 /* By default run-time relocations aren't supported, but we allow them
5197 in case users support it in their own run-time loader. And we provide
5198 a warning for those users that don't. */
5199 if ((GET_CODE (x) == SYMBOL_REF)
5200 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5201 reloc_diagnostic (x);
5202
5203 return default_assemble_integer (x, size, aligned_p);
5204}
5205
5206static void
5207spu_asm_globalize_label (FILE * file, const char *name)
5208{
5209 fputs ("\t.global\t", file);
5210 assemble_name (file, name);
5211 fputs ("\n", file);
5212}
5213
5214static bool
20d892d1 5215spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5216 int opno ATTRIBUTE_UNUSED, int *total,
f529eb25 5217 bool speed ATTRIBUTE_UNUSED)
644459d0 5218{
5219 enum machine_mode mode = GET_MODE (x);
5220 int cost = COSTS_N_INSNS (2);
5221
5222 /* Folding to a CONST_VECTOR will use extra space but there might
5223 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5224 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5225 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5226 because this cost will only be compared against a single insn.
5227 if (code == CONST_VECTOR)
ca316360 5228 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5229 */
5230
5231 /* Use defaults for float operations. Not accurate but good enough. */
5232 if (mode == DFmode)
5233 {
5234 *total = COSTS_N_INSNS (13);
5235 return true;
5236 }
5237 if (mode == SFmode)
5238 {
5239 *total = COSTS_N_INSNS (6);
5240 return true;
5241 }
5242 switch (code)
5243 {
5244 case CONST_INT:
5245 if (satisfies_constraint_K (x))
5246 *total = 0;
5247 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5248 *total = COSTS_N_INSNS (1);
5249 else
5250 *total = COSTS_N_INSNS (3);
5251 return true;
5252
5253 case CONST:
5254 *total = COSTS_N_INSNS (3);
5255 return true;
5256
5257 case LABEL_REF:
5258 case SYMBOL_REF:
5259 *total = COSTS_N_INSNS (0);
5260 return true;
5261
5262 case CONST_DOUBLE:
5263 *total = COSTS_N_INSNS (5);
5264 return true;
5265
5266 case FLOAT_EXTEND:
5267 case FLOAT_TRUNCATE:
5268 case FLOAT:
5269 case UNSIGNED_FLOAT:
5270 case FIX:
5271 case UNSIGNED_FIX:
5272 *total = COSTS_N_INSNS (7);
5273 return true;
5274
5275 case PLUS:
5276 if (mode == TImode)
5277 {
5278 *total = COSTS_N_INSNS (9);
5279 return true;
5280 }
5281 break;
5282
5283 case MULT:
5284 cost =
5285 GET_CODE (XEXP (x, 0)) ==
5286 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5287 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5288 {
5289 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5290 {
5291 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5292 cost = COSTS_N_INSNS (14);
5293 if ((val & 0xffff) == 0)
5294 cost = COSTS_N_INSNS (9);
5295 else if (val > 0 && val < 0x10000)
5296 cost = COSTS_N_INSNS (11);
5297 }
5298 }
5299 *total = cost;
5300 return true;
5301 case DIV:
5302 case UDIV:
5303 case MOD:
5304 case UMOD:
5305 *total = COSTS_N_INSNS (20);
5306 return true;
5307 case ROTATE:
5308 case ROTATERT:
5309 case ASHIFT:
5310 case ASHIFTRT:
5311 case LSHIFTRT:
5312 *total = COSTS_N_INSNS (4);
5313 return true;
5314 case UNSPEC:
5315 if (XINT (x, 1) == UNSPEC_CONVERT)
5316 *total = COSTS_N_INSNS (0);
5317 else
5318 *total = COSTS_N_INSNS (4);
5319 return true;
5320 }
5321 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5322 if (GET_MODE_CLASS (mode) == MODE_INT
5323 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5324 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5325 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5326 *total = cost;
5327 return true;
5328}
5329
1bd43494 5330static enum machine_mode
5331spu_unwind_word_mode (void)
644459d0 5332{
1bd43494 5333 return SImode;
644459d0 5334}
5335
5336/* Decide whether we can make a sibling call to a function. DECL is the
5337 declaration of the function being targeted by the call and EXP is the
5338 CALL_EXPR representing the call. */
5339static bool
5340spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5341{
5342 return decl && !TARGET_LARGE_MEM;
5343}
5344
5345/* We need to correctly update the back chain pointer and the Available
5346 Stack Size (which is in the second slot of the sp register.) */
5347void
5348spu_allocate_stack (rtx op0, rtx op1)
5349{
5350 HOST_WIDE_INT v;
5351 rtx chain = gen_reg_rtx (V4SImode);
5352 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5353 rtx sp = gen_reg_rtx (V4SImode);
5354 rtx splatted = gen_reg_rtx (V4SImode);
5355 rtx pat = gen_reg_rtx (TImode);
5356
5357 /* copy the back chain so we can save it back again. */
5358 emit_move_insn (chain, stack_bot);
5359
5360 op1 = force_reg (SImode, op1);
5361
5362 v = 0x1020300010203ll;
5363 emit_move_insn (pat, immed_double_const (v, v, TImode));
5364 emit_insn (gen_shufb (splatted, op1, op1, pat));
5365
5366 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5367 emit_insn (gen_subv4si3 (sp, sp, splatted));
5368
5369 if (flag_stack_check)
5370 {
5371 rtx avail = gen_reg_rtx(SImode);
5372 rtx result = gen_reg_rtx(SImode);
5373 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5374 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5375 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5376 }
5377
5378 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5379
5380 emit_move_insn (stack_bot, chain);
5381
5382 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5383}
5384
5385void
5386spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5387{
5388 static unsigned char arr[16] =
5389 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5390 rtx temp = gen_reg_rtx (SImode);
5391 rtx temp2 = gen_reg_rtx (SImode);
5392 rtx temp3 = gen_reg_rtx (V4SImode);
5393 rtx temp4 = gen_reg_rtx (V4SImode);
5394 rtx pat = gen_reg_rtx (TImode);
5395 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5396
5397 /* Restore the backchain from the first word, sp from the second. */
5398 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5399 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5400
5401 emit_move_insn (pat, array_to_constant (TImode, arr));
5402
5403 /* Compute Available Stack Size for sp */
5404 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5405 emit_insn (gen_shufb (temp3, temp, temp, pat));
5406
5407 /* Compute Available Stack Size for back chain */
5408 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5409 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5410 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5411
5412 emit_insn (gen_addv4si3 (sp, sp, temp3));
5413 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5414}
5415
5416static void
5417spu_init_libfuncs (void)
5418{
5419 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5420 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5421 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5422 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5423 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5424 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5425 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5426 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5427 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4d3aeb29 5428 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
644459d0 5429 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5430 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5431
5432 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5433 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5434
5825ec3f 5435 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5436 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5437 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5438 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5439 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5440 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5441 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5442 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5443 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5444 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5445 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5446 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5447
19a53068 5448 set_optab_libfunc (smul_optab, TImode, "__multi3");
5449 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5450 set_optab_libfunc (smod_optab, TImode, "__modti3");
5451 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5452 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5453 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5454}
5455
5456/* Make a subreg, stripping any existing subreg. We could possibly just
5457 call simplify_subreg, but in this case we know what we want. */
5458rtx
5459spu_gen_subreg (enum machine_mode mode, rtx x)
5460{
5461 if (GET_CODE (x) == SUBREG)
5462 x = SUBREG_REG (x);
5463 if (GET_MODE (x) == mode)
5464 return x;
5465 return gen_rtx_SUBREG (mode, x, 0);
5466}
5467
5468static bool
fb80456a 5469spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5470{
5471 return (TYPE_MODE (type) == BLKmode
5472 && ((type) == 0
5473 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5474 || int_size_in_bytes (type) >
5475 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5476}
5477\f
5478/* Create the built-in types and functions */
5479
c2233b46 5480enum spu_function_code
5481{
5482#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5483#include "spu-builtins.def"
5484#undef DEF_BUILTIN
5485 NUM_SPU_BUILTINS
5486};
5487
5488extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5489
644459d0 5490struct spu_builtin_description spu_builtins[] = {
5491#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5492 {fcode, icode, name, type, params},
644459d0 5493#include "spu-builtins.def"
5494#undef DEF_BUILTIN
5495};
5496
0c5c4d59 5497static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5498
5499/* Returns the spu builtin decl for CODE. */
e6925042 5500
5501static tree
5502spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5503{
5504 if (code >= NUM_SPU_BUILTINS)
5505 return error_mark_node;
5506
0c5c4d59 5507 return spu_builtin_decls[code];
e6925042 5508}
5509
5510
644459d0 5511static void
5512spu_init_builtins (void)
5513{
5514 struct spu_builtin_description *d;
5515 unsigned int i;
5516
5517 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5518 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5519 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5520 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5521 V4SF_type_node = build_vector_type (float_type_node, 4);
5522 V2DF_type_node = build_vector_type (double_type_node, 2);
5523
5524 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5525 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5526 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5527 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5528
c4ecce0c 5529 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5530
5531 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5532 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5533 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5534 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5535 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5536 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5537 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5538 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5539 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5540 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5541 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5542 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5543
5544 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5545 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5546 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5547 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5548 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5549 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5550 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5552
5553 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5554 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5555
5556 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5557
5558 spu_builtin_types[SPU_BTI_PTR] =
5559 build_pointer_type (build_qualified_type
5560 (void_type_node,
5561 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5562
5563 /* For each builtin we build a new prototype. The tree code will make
5564 sure nodes are shared. */
5565 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5566 {
5567 tree p;
5568 char name[64]; /* build_function will make a copy. */
5569 int parm;
5570
5571 if (d->name == 0)
5572 continue;
5573
5dfbd18f 5574 /* Find last parm. */
644459d0 5575 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5576 ;
644459d0 5577
5578 p = void_list_node;
5579 while (parm > 1)
5580 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5581
5582 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5583
5584 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5585 spu_builtin_decls[i] =
3726fe5e 5586 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5587 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5588 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5589
5590 /* These builtins don't throw. */
0c5c4d59 5591 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5592 }
5593}
5594
cf31d486 5595void
5596spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5597{
5598 static unsigned char arr[16] =
5599 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5600
5601 rtx temp = gen_reg_rtx (Pmode);
5602 rtx temp2 = gen_reg_rtx (V4SImode);
5603 rtx temp3 = gen_reg_rtx (V4SImode);
5604 rtx pat = gen_reg_rtx (TImode);
5605 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5606
5607 emit_move_insn (pat, array_to_constant (TImode, arr));
5608
5609 /* Restore the sp. */
5610 emit_move_insn (temp, op1);
5611 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5612
5613 /* Compute available stack size for sp. */
5614 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5615 emit_insn (gen_shufb (temp3, temp, temp, pat));
5616
5617 emit_insn (gen_addv4si3 (sp, sp, temp3));
5618 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5619}
5620
644459d0 5621int
5622spu_safe_dma (HOST_WIDE_INT channel)
5623{
006e4b96 5624 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5625}
5626
5627void
5628spu_builtin_splats (rtx ops[])
5629{
5630 enum machine_mode mode = GET_MODE (ops[0]);
5631 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5632 {
5633 unsigned char arr[16];
5634 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5635 emit_move_insn (ops[0], array_to_constant (mode, arr));
5636 }
644459d0 5637 else
5638 {
5639 rtx reg = gen_reg_rtx (TImode);
5640 rtx shuf;
5641 if (GET_CODE (ops[1]) != REG
5642 && GET_CODE (ops[1]) != SUBREG)
5643 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5644 switch (mode)
5645 {
5646 case V2DImode:
5647 case V2DFmode:
5648 shuf =
5649 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5650 TImode);
5651 break;
5652 case V4SImode:
5653 case V4SFmode:
5654 shuf =
5655 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5656 TImode);
5657 break;
5658 case V8HImode:
5659 shuf =
5660 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5661 TImode);
5662 break;
5663 case V16QImode:
5664 shuf =
5665 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5666 TImode);
5667 break;
5668 default:
5669 abort ();
5670 }
5671 emit_move_insn (reg, shuf);
5672 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5673 }
5674}
5675
5676void
5677spu_builtin_extract (rtx ops[])
5678{
5679 enum machine_mode mode;
5680 rtx rot, from, tmp;
5681
5682 mode = GET_MODE (ops[1]);
5683
5684 if (GET_CODE (ops[2]) == CONST_INT)
5685 {
5686 switch (mode)
5687 {
5688 case V16QImode:
5689 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5690 break;
5691 case V8HImode:
5692 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5693 break;
5694 case V4SFmode:
5695 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5696 break;
5697 case V4SImode:
5698 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5699 break;
5700 case V2DImode:
5701 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5702 break;
5703 case V2DFmode:
5704 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5705 break;
5706 default:
5707 abort ();
5708 }
5709 return;
5710 }
5711
5712 from = spu_gen_subreg (TImode, ops[1]);
5713 rot = gen_reg_rtx (TImode);
5714 tmp = gen_reg_rtx (SImode);
5715
5716 switch (mode)
5717 {
5718 case V16QImode:
5719 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5720 break;
5721 case V8HImode:
5722 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5723 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5724 break;
5725 case V4SFmode:
5726 case V4SImode:
5727 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5728 break;
5729 case V2DImode:
5730 case V2DFmode:
5731 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5732 break;
5733 default:
5734 abort ();
5735 }
5736 emit_insn (gen_rotqby_ti (rot, from, tmp));
5737
5738 emit_insn (gen_spu_convert (ops[0], rot));
5739}
5740
5741void
5742spu_builtin_insert (rtx ops[])
5743{
5744 enum machine_mode mode = GET_MODE (ops[0]);
5745 enum machine_mode imode = GET_MODE_INNER (mode);
5746 rtx mask = gen_reg_rtx (TImode);
5747 rtx offset;
5748
5749 if (GET_CODE (ops[3]) == CONST_INT)
5750 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5751 else
5752 {
5753 offset = gen_reg_rtx (SImode);
5754 emit_insn (gen_mulsi3
5755 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5756 }
5757 emit_insn (gen_cpat
5758 (mask, stack_pointer_rtx, offset,
5759 GEN_INT (GET_MODE_SIZE (imode))));
5760 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5761}
5762
5763void
5764spu_builtin_promote (rtx ops[])
5765{
5766 enum machine_mode mode, imode;
5767 rtx rot, from, offset;
5768 HOST_WIDE_INT pos;
5769
5770 mode = GET_MODE (ops[0]);
5771 imode = GET_MODE_INNER (mode);
5772
5773 from = gen_reg_rtx (TImode);
5774 rot = spu_gen_subreg (TImode, ops[0]);
5775
5776 emit_insn (gen_spu_convert (from, ops[1]));
5777
5778 if (GET_CODE (ops[2]) == CONST_INT)
5779 {
5780 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5781 if (GET_MODE_SIZE (imode) < 4)
5782 pos += 4 - GET_MODE_SIZE (imode);
5783 offset = GEN_INT (pos & 15);
5784 }
5785 else
5786 {
5787 offset = gen_reg_rtx (SImode);
5788 switch (mode)
5789 {
5790 case V16QImode:
5791 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5792 break;
5793 case V8HImode:
5794 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5795 emit_insn (gen_addsi3 (offset, offset, offset));
5796 break;
5797 case V4SFmode:
5798 case V4SImode:
5799 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5800 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5801 break;
5802 case V2DImode:
5803 case V2DFmode:
5804 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5805 break;
5806 default:
5807 abort ();
5808 }
5809 }
5810 emit_insn (gen_rotqby_ti (rot, from, offset));
5811}
5812
e96f2783 5813static void
5814spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5815{
e96f2783 5816 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5817 rtx shuf = gen_reg_rtx (V4SImode);
5818 rtx insn = gen_reg_rtx (V4SImode);
5819 rtx shufc;
5820 rtx insnc;
5821 rtx mem;
5822
5823 fnaddr = force_reg (SImode, fnaddr);
5824 cxt = force_reg (SImode, cxt);
5825
5826 if (TARGET_LARGE_MEM)
5827 {
5828 rtx rotl = gen_reg_rtx (V4SImode);
5829 rtx mask = gen_reg_rtx (V4SImode);
5830 rtx bi = gen_reg_rtx (SImode);
e96f2783 5831 static unsigned char const shufa[16] = {
644459d0 5832 2, 3, 0, 1, 18, 19, 16, 17,
5833 0, 1, 2, 3, 16, 17, 18, 19
5834 };
e96f2783 5835 static unsigned char const insna[16] = {
644459d0 5836 0x41, 0, 0, 79,
5837 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5838 0x60, 0x80, 0, 79,
5839 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5840 };
5841
5842 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5843 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5844
5845 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5846 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5847 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5848 emit_insn (gen_selb (insn, insnc, rotl, mask));
5849
e96f2783 5850 mem = adjust_address (m_tramp, V4SImode, 0);
5851 emit_move_insn (mem, insn);
644459d0 5852
5853 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5854 mem = adjust_address (m_tramp, Pmode, 16);
5855 emit_move_insn (mem, bi);
644459d0 5856 }
5857 else
5858 {
5859 rtx scxt = gen_reg_rtx (SImode);
5860 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5861 static unsigned char const insna[16] = {
644459d0 5862 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5863 0x30, 0, 0, 0,
5864 0, 0, 0, 0,
5865 0, 0, 0, 0
5866 };
5867
5868 shufc = gen_reg_rtx (TImode);
5869 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5870
5871 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5872 fits 18 bits and the last 4 are zeros. This will be true if
5873 the stack pointer is initialized to 0x3fff0 at program start,
5874 otherwise the ila instruction will be garbage. */
5875
5876 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5877 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5878 emit_insn (gen_cpat
5879 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5880 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5881 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5882
e96f2783 5883 mem = adjust_address (m_tramp, V4SImode, 0);
5884 emit_move_insn (mem, insn);
644459d0 5885 }
5886 emit_insn (gen_sync ());
5887}
5888
08c6cbd2 5889static bool
5890spu_warn_func_return (tree decl)
5891{
5892 /* Naked functions are implemented entirely in assembly, including the
5893 return sequence, so suppress warnings about this. */
5894 return !spu_naked_function_p (decl);
5895}
5896
644459d0 5897void
5898spu_expand_sign_extend (rtx ops[])
5899{
5900 unsigned char arr[16];
5901 rtx pat = gen_reg_rtx (TImode);
5902 rtx sign, c;
5903 int i, last;
5904 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5905 if (GET_MODE (ops[1]) == QImode)
5906 {
5907 sign = gen_reg_rtx (HImode);
5908 emit_insn (gen_extendqihi2 (sign, ops[1]));
5909 for (i = 0; i < 16; i++)
5910 arr[i] = 0x12;
5911 arr[last] = 0x13;
5912 }
5913 else
5914 {
5915 for (i = 0; i < 16; i++)
5916 arr[i] = 0x10;
5917 switch (GET_MODE (ops[1]))
5918 {
5919 case HImode:
5920 sign = gen_reg_rtx (SImode);
5921 emit_insn (gen_extendhisi2 (sign, ops[1]));
5922 arr[last] = 0x03;
5923 arr[last - 1] = 0x02;
5924 break;
5925 case SImode:
5926 sign = gen_reg_rtx (SImode);
5927 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5928 for (i = 0; i < 4; i++)
5929 arr[last - i] = 3 - i;
5930 break;
5931 case DImode:
5932 sign = gen_reg_rtx (SImode);
5933 c = gen_reg_rtx (SImode);
5934 emit_insn (gen_spu_convert (c, ops[1]));
5935 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5936 for (i = 0; i < 8; i++)
5937 arr[last - i] = 7 - i;
5938 break;
5939 default:
5940 abort ();
5941 }
5942 }
5943 emit_move_insn (pat, array_to_constant (TImode, arr));
5944 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5945}
5946
5947/* expand vector initialization. If there are any constant parts,
5948 load constant parts first. Then load any non-constant parts. */
5949void
5950spu_expand_vector_init (rtx target, rtx vals)
5951{
5952 enum machine_mode mode = GET_MODE (target);
5953 int n_elts = GET_MODE_NUNITS (mode);
5954 int n_var = 0;
5955 bool all_same = true;
790c536c 5956 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5957 int i;
5958
5959 first = XVECEXP (vals, 0, 0);
5960 for (i = 0; i < n_elts; ++i)
5961 {
5962 x = XVECEXP (vals, 0, i);
e442af0b 5963 if (!(CONST_INT_P (x)
5964 || GET_CODE (x) == CONST_DOUBLE
5965 || GET_CODE (x) == CONST_FIXED))
644459d0 5966 ++n_var;
5967 else
5968 {
5969 if (first_constant == NULL_RTX)
5970 first_constant = x;
5971 }
5972 if (i > 0 && !rtx_equal_p (x, first))
5973 all_same = false;
5974 }
5975
5976 /* if all elements are the same, use splats to repeat elements */
5977 if (all_same)
5978 {
5979 if (!CONSTANT_P (first)
5980 && !register_operand (first, GET_MODE (x)))
5981 first = force_reg (GET_MODE (first), first);
5982 emit_insn (gen_spu_splats (target, first));
5983 return;
5984 }
5985
5986 /* load constant parts */
5987 if (n_var != n_elts)
5988 {
5989 if (n_var == 0)
5990 {
5991 emit_move_insn (target,
5992 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5993 }
5994 else
5995 {
5996 rtx constant_parts_rtx = copy_rtx (vals);
5997
5998 gcc_assert (first_constant != NULL_RTX);
5999 /* fill empty slots with the first constant, this increases
6000 our chance of using splats in the recursive call below. */
6001 for (i = 0; i < n_elts; ++i)
e442af0b 6002 {
6003 x = XVECEXP (constant_parts_rtx, 0, i);
6004 if (!(CONST_INT_P (x)
6005 || GET_CODE (x) == CONST_DOUBLE
6006 || GET_CODE (x) == CONST_FIXED))
6007 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6008 }
644459d0 6009
6010 spu_expand_vector_init (target, constant_parts_rtx);
6011 }
6012 }
6013
6014 /* load variable parts */
6015 if (n_var != 0)
6016 {
6017 rtx insert_operands[4];
6018
6019 insert_operands[0] = target;
6020 insert_operands[2] = target;
6021 for (i = 0; i < n_elts; ++i)
6022 {
6023 x = XVECEXP (vals, 0, i);
e442af0b 6024 if (!(CONST_INT_P (x)
6025 || GET_CODE (x) == CONST_DOUBLE
6026 || GET_CODE (x) == CONST_FIXED))
644459d0 6027 {
6028 if (!register_operand (x, GET_MODE (x)))
6029 x = force_reg (GET_MODE (x), x);
6030 insert_operands[1] = x;
6031 insert_operands[3] = GEN_INT (i);
6032 spu_builtin_insert (insert_operands);
6033 }
6034 }
6035 }
6036}
6352eedf 6037
5474166e 6038/* Return insn index for the vector compare instruction for given CODE,
6039 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6040
6041static int
6042get_vec_cmp_insn (enum rtx_code code,
6043 enum machine_mode dest_mode,
6044 enum machine_mode op_mode)
6045
6046{
6047 switch (code)
6048 {
6049 case EQ:
6050 if (dest_mode == V16QImode && op_mode == V16QImode)
6051 return CODE_FOR_ceq_v16qi;
6052 if (dest_mode == V8HImode && op_mode == V8HImode)
6053 return CODE_FOR_ceq_v8hi;
6054 if (dest_mode == V4SImode && op_mode == V4SImode)
6055 return CODE_FOR_ceq_v4si;
6056 if (dest_mode == V4SImode && op_mode == V4SFmode)
6057 return CODE_FOR_ceq_v4sf;
6058 if (dest_mode == V2DImode && op_mode == V2DFmode)
6059 return CODE_FOR_ceq_v2df;
6060 break;
6061 case GT:
6062 if (dest_mode == V16QImode && op_mode == V16QImode)
6063 return CODE_FOR_cgt_v16qi;
6064 if (dest_mode == V8HImode && op_mode == V8HImode)
6065 return CODE_FOR_cgt_v8hi;
6066 if (dest_mode == V4SImode && op_mode == V4SImode)
6067 return CODE_FOR_cgt_v4si;
6068 if (dest_mode == V4SImode && op_mode == V4SFmode)
6069 return CODE_FOR_cgt_v4sf;
6070 if (dest_mode == V2DImode && op_mode == V2DFmode)
6071 return CODE_FOR_cgt_v2df;
6072 break;
6073 case GTU:
6074 if (dest_mode == V16QImode && op_mode == V16QImode)
6075 return CODE_FOR_clgt_v16qi;
6076 if (dest_mode == V8HImode && op_mode == V8HImode)
6077 return CODE_FOR_clgt_v8hi;
6078 if (dest_mode == V4SImode && op_mode == V4SImode)
6079 return CODE_FOR_clgt_v4si;
6080 break;
6081 default:
6082 break;
6083 }
6084 return -1;
6085}
6086
6087/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6088 DMODE is expected destination mode. This is a recursive function. */
6089
6090static rtx
6091spu_emit_vector_compare (enum rtx_code rcode,
6092 rtx op0, rtx op1,
6093 enum machine_mode dmode)
6094{
6095 int vec_cmp_insn;
6096 rtx mask;
6097 enum machine_mode dest_mode;
6098 enum machine_mode op_mode = GET_MODE (op1);
6099
6100 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6101
6102 /* Floating point vector compare instructions uses destination V4SImode.
6103 Double floating point vector compare instructions uses destination V2DImode.
6104 Move destination to appropriate mode later. */
6105 if (dmode == V4SFmode)
6106 dest_mode = V4SImode;
6107 else if (dmode == V2DFmode)
6108 dest_mode = V2DImode;
6109 else
6110 dest_mode = dmode;
6111
6112 mask = gen_reg_rtx (dest_mode);
6113 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6114
6115 if (vec_cmp_insn == -1)
6116 {
6117 bool swap_operands = false;
6118 bool try_again = false;
6119 switch (rcode)
6120 {
6121 case LT:
6122 rcode = GT;
6123 swap_operands = true;
6124 try_again = true;
6125 break;
6126 case LTU:
6127 rcode = GTU;
6128 swap_operands = true;
6129 try_again = true;
6130 break;
6131 case NE:
e20943d4 6132 case UNEQ:
6133 case UNLE:
6134 case UNLT:
6135 case UNGE:
6136 case UNGT:
6137 case UNORDERED:
5474166e 6138 /* Treat A != B as ~(A==B). */
6139 {
e20943d4 6140 enum rtx_code rev_code;
5474166e 6141 enum insn_code nor_code;
e20943d4 6142 rtx rev_mask;
6143
6144 rev_code = reverse_condition_maybe_unordered (rcode);
6145 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6146
d6bf3b14 6147 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6148 gcc_assert (nor_code != CODE_FOR_nothing);
e20943d4 6149 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
5474166e 6150 if (dmode != dest_mode)
6151 {
6152 rtx temp = gen_reg_rtx (dest_mode);
6153 convert_move (temp, mask, 0);
6154 return temp;
6155 }
6156 return mask;
6157 }
6158 break;
6159 case GE:
6160 case GEU:
6161 case LE:
6162 case LEU:
6163 /* Try GT/GTU/LT/LTU OR EQ */
6164 {
6165 rtx c_rtx, eq_rtx;
6166 enum insn_code ior_code;
6167 enum rtx_code new_code;
6168
6169 switch (rcode)
6170 {
6171 case GE: new_code = GT; break;
6172 case GEU: new_code = GTU; break;
6173 case LE: new_code = LT; break;
6174 case LEU: new_code = LTU; break;
6175 default:
6176 gcc_unreachable ();
6177 }
6178
6179 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6180 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6181
d6bf3b14 6182 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6183 gcc_assert (ior_code != CODE_FOR_nothing);
6184 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6185 if (dmode != dest_mode)
6186 {
6187 rtx temp = gen_reg_rtx (dest_mode);
6188 convert_move (temp, mask, 0);
6189 return temp;
6190 }
6191 return mask;
6192 }
6193 break;
e20943d4 6194 case LTGT:
6195 /* Try LT OR GT */
6196 {
6197 rtx lt_rtx, gt_rtx;
6198 enum insn_code ior_code;
6199
6200 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6201 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6202
6203 ior_code = optab_handler (ior_optab, dest_mode);
6204 gcc_assert (ior_code != CODE_FOR_nothing);
6205 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6206 if (dmode != dest_mode)
6207 {
6208 rtx temp = gen_reg_rtx (dest_mode);
6209 convert_move (temp, mask, 0);
6210 return temp;
6211 }
6212 return mask;
6213 }
6214 break;
6215 case ORDERED:
6216 /* Implement as (A==A) & (B==B) */
6217 {
6218 rtx a_rtx, b_rtx;
6219 enum insn_code and_code;
6220
6221 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6222 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6223
6224 and_code = optab_handler (and_optab, dest_mode);
6225 gcc_assert (and_code != CODE_FOR_nothing);
6226 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6227 if (dmode != dest_mode)
6228 {
6229 rtx temp = gen_reg_rtx (dest_mode);
6230 convert_move (temp, mask, 0);
6231 return temp;
6232 }
6233 return mask;
6234 }
6235 break;
5474166e 6236 default:
6237 gcc_unreachable ();
6238 }
6239
6240 /* You only get two chances. */
6241 if (try_again)
6242 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6243
6244 gcc_assert (vec_cmp_insn != -1);
6245
6246 if (swap_operands)
6247 {
6248 rtx tmp;
6249 tmp = op0;
6250 op0 = op1;
6251 op1 = tmp;
6252 }
6253 }
6254
6255 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6256 if (dmode != dest_mode)
6257 {
6258 rtx temp = gen_reg_rtx (dest_mode);
6259 convert_move (temp, mask, 0);
6260 return temp;
6261 }
6262 return mask;
6263}
6264
6265
6266/* Emit vector conditional expression.
6267 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6268 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6269
6270int
6271spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6272 rtx cond, rtx cc_op0, rtx cc_op1)
6273{
6274 enum machine_mode dest_mode = GET_MODE (dest);
6275 enum rtx_code rcode = GET_CODE (cond);
6276 rtx mask;
6277
6278 /* Get the vector mask for the given relational operations. */
6279 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6280
6281 emit_insn(gen_selb (dest, op2, op1, mask));
6282
6283 return 1;
6284}
6285
6352eedf 6286static rtx
6287spu_force_reg (enum machine_mode mode, rtx op)
6288{
6289 rtx x, r;
6290 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6291 {
6292 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6293 || GET_MODE (op) == BLKmode)
6294 return force_reg (mode, convert_to_mode (mode, op, 0));
6295 abort ();
6296 }
6297
6298 r = force_reg (GET_MODE (op), op);
6299 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6300 {
6301 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6302 if (x)
6303 return x;
6304 }
6305
6306 x = gen_reg_rtx (mode);
6307 emit_insn (gen_spu_convert (x, r));
6308 return x;
6309}
6310
6311static void
6312spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6313{
6314 HOST_WIDE_INT v = 0;
6315 int lsbits;
6316 /* Check the range of immediate operands. */
6317 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6318 {
6319 int range = p - SPU_BTI_7;
5df189be 6320
6321 if (!CONSTANT_P (op))
bf776685 6322 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6323 d->name,
6324 spu_builtin_range[range].low, spu_builtin_range[range].high);
6325
6326 if (GET_CODE (op) == CONST
6327 && (GET_CODE (XEXP (op, 0)) == PLUS
6328 || GET_CODE (XEXP (op, 0)) == MINUS))
6329 {
6330 v = INTVAL (XEXP (XEXP (op, 0), 1));
6331 op = XEXP (XEXP (op, 0), 0);
6332 }
6333 else if (GET_CODE (op) == CONST_INT)
6334 v = INTVAL (op);
5df189be 6335 else if (GET_CODE (op) == CONST_VECTOR
6336 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6337 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6338
6339 /* The default for v is 0 which is valid in every range. */
6340 if (v < spu_builtin_range[range].low
6341 || v > spu_builtin_range[range].high)
bf776685 6342 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6343 d->name,
6344 spu_builtin_range[range].low, spu_builtin_range[range].high,
6345 v);
6352eedf 6346
6347 switch (p)
6348 {
6349 case SPU_BTI_S10_4:
6350 lsbits = 4;
6351 break;
6352 case SPU_BTI_U16_2:
6353 /* This is only used in lqa, and stqa. Even though the insns
6354 encode 16 bits of the address (all but the 2 least
6355 significant), only 14 bits are used because it is masked to
6356 be 16 byte aligned. */
6357 lsbits = 4;
6358 break;
6359 case SPU_BTI_S16_2:
6360 /* This is used for lqr and stqr. */
6361 lsbits = 2;
6362 break;
6363 default:
6364 lsbits = 0;
6365 }
6366
6367 if (GET_CODE (op) == LABEL_REF
6368 || (GET_CODE (op) == SYMBOL_REF
6369 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6370 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6371 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6372 d->name);
6373 }
6374}
6375
6376
70ca06f8 6377static int
5df189be 6378expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6379 rtx target, rtx ops[])
6380{
bc620c5c 6381 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6382 int i = 0, a;
6352eedf 6383
6384 /* Expand the arguments into rtl. */
6385
6386 if (d->parm[0] != SPU_BTI_VOID)
6387 ops[i++] = target;
6388
70ca06f8 6389 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6390 {
5df189be 6391 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6392 if (arg == 0)
6393 abort ();
b9c74b4d 6394 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6395 }
70ca06f8 6396
32f79657 6397 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6398 return i;
6352eedf 6399}
6400
6401static rtx
6402spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6403 tree exp, rtx target)
6352eedf 6404{
6405 rtx pat;
6406 rtx ops[8];
bc620c5c 6407 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6408 enum machine_mode mode, tmode;
6409 int i, p;
70ca06f8 6410 int n_operands;
6352eedf 6411 tree return_type;
6412
6413 /* Set up ops[] with values from arglist. */
70ca06f8 6414 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6415
6416 /* Handle the target operand which must be operand 0. */
6417 i = 0;
6418 if (d->parm[0] != SPU_BTI_VOID)
6419 {
6420
6421 /* We prefer the mode specified for the match_operand otherwise
6422 use the mode from the builtin function prototype. */
6423 tmode = insn_data[d->icode].operand[0].mode;
6424 if (tmode == VOIDmode)
6425 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6426
6427 /* Try to use target because not using it can lead to extra copies
6428 and when we are using all of the registers extra copies leads
6429 to extra spills. */
6430 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6431 ops[0] = target;
6432 else
6433 target = ops[0] = gen_reg_rtx (tmode);
6434
6435 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6436 abort ();
6437
6438 i++;
6439 }
6440
a76866d3 6441 if (d->fcode == SPU_MASK_FOR_LOAD)
6442 {
6443 enum machine_mode mode = insn_data[icode].operand[1].mode;
6444 tree arg;
6445 rtx addr, op, pat;
6446
6447 /* get addr */
5df189be 6448 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6449 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6450 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6451 addr = memory_address (mode, op);
6452
6453 /* negate addr */
6454 op = gen_reg_rtx (GET_MODE (addr));
6455 emit_insn (gen_rtx_SET (VOIDmode, op,
6456 gen_rtx_NEG (GET_MODE (addr), addr)));
6457 op = gen_rtx_MEM (mode, op);
6458
6459 pat = GEN_FCN (icode) (target, op);
6460 if (!pat)
6461 return 0;
6462 emit_insn (pat);
6463 return target;
6464 }
6465
6352eedf 6466 /* Ignore align_hint, but still expand it's args in case they have
6467 side effects. */
6468 if (icode == CODE_FOR_spu_align_hint)
6469 return 0;
6470
6471 /* Handle the rest of the operands. */
70ca06f8 6472 for (p = 1; i < n_operands; i++, p++)
6352eedf 6473 {
6474 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6475 mode = insn_data[d->icode].operand[i].mode;
6476 else
6477 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6478
6479 /* mode can be VOIDmode here for labels */
6480
6481 /* For specific intrinsics with an immediate operand, e.g.,
6482 si_ai(), we sometimes need to convert the scalar argument to a
6483 vector argument by splatting the scalar. */
6484 if (VECTOR_MODE_P (mode)
6485 && (GET_CODE (ops[i]) == CONST_INT
6486 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6487 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6488 {
6489 if (GET_CODE (ops[i]) == CONST_INT)
6490 ops[i] = spu_const (mode, INTVAL (ops[i]));
6491 else
6492 {
6493 rtx reg = gen_reg_rtx (mode);
6494 enum machine_mode imode = GET_MODE_INNER (mode);
6495 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6496 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6497 if (imode != GET_MODE (ops[i]))
6498 ops[i] = convert_to_mode (imode, ops[i],
6499 TYPE_UNSIGNED (spu_builtin_types
6500 [d->parm[i]]));
6501 emit_insn (gen_spu_splats (reg, ops[i]));
6502 ops[i] = reg;
6503 }
6504 }
6505
5df189be 6506 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6507
6352eedf 6508 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6509 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6510 }
6511
70ca06f8 6512 switch (n_operands)
6352eedf 6513 {
6514 case 0:
6515 pat = GEN_FCN (icode) (0);
6516 break;
6517 case 1:
6518 pat = GEN_FCN (icode) (ops[0]);
6519 break;
6520 case 2:
6521 pat = GEN_FCN (icode) (ops[0], ops[1]);
6522 break;
6523 case 3:
6524 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6525 break;
6526 case 4:
6527 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6528 break;
6529 case 5:
6530 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6531 break;
6532 case 6:
6533 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6534 break;
6535 default:
6536 abort ();
6537 }
6538
6539 if (!pat)
6540 abort ();
6541
6542 if (d->type == B_CALL || d->type == B_BISLED)
6543 emit_call_insn (pat);
6544 else if (d->type == B_JUMP)
6545 {
6546 emit_jump_insn (pat);
6547 emit_barrier ();
6548 }
6549 else
6550 emit_insn (pat);
6551
6552 return_type = spu_builtin_types[d->parm[0]];
6553 if (d->parm[0] != SPU_BTI_VOID
6554 && GET_MODE (target) != TYPE_MODE (return_type))
6555 {
6556 /* target is the return value. It should always be the mode of
6557 the builtin function prototype. */
6558 target = spu_force_reg (TYPE_MODE (return_type), target);
6559 }
6560
6561 return target;
6562}
6563
6564rtx
6565spu_expand_builtin (tree exp,
6566 rtx target,
6567 rtx subtarget ATTRIBUTE_UNUSED,
6568 enum machine_mode mode ATTRIBUTE_UNUSED,
6569 int ignore ATTRIBUTE_UNUSED)
6570{
5df189be 6571 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6572 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6573 struct spu_builtin_description *d;
6574
6575 if (fcode < NUM_SPU_BUILTINS)
6576 {
6577 d = &spu_builtins[fcode];
6578
5df189be 6579 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6580 }
6581 abort ();
6582}
6583
a76866d3 6584/* Implement targetm.vectorize.builtin_mask_for_load. */
6585static tree
6586spu_builtin_mask_for_load (void)
6587{
0c5c4d59 6588 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6589}
5df189be 6590
a28df51d 6591/* Implement targetm.vectorize.builtin_vectorization_cost. */
6592static int
0822b158 6593spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
d13adc77 6594 tree vectype,
0822b158 6595 int misalign ATTRIBUTE_UNUSED)
559093aa 6596{
d13adc77 6597 unsigned elements;
6598
559093aa 6599 switch (type_of_cost)
6600 {
6601 case scalar_stmt:
6602 case vector_stmt:
6603 case vector_load:
6604 case vector_store:
6605 case vec_to_scalar:
6606 case scalar_to_vec:
6607 case cond_branch_not_taken:
6608 case vec_perm:
5df2530b 6609 case vec_promote_demote:
559093aa 6610 return 1;
6611
6612 case scalar_store:
6613 return 10;
6614
6615 case scalar_load:
6616 /* Load + rotate. */
6617 return 2;
6618
6619 case unaligned_load:
6620 return 2;
6621
6622 case cond_branch_taken:
6623 return 6;
6624
d13adc77 6625 case vec_construct:
6626 elements = TYPE_VECTOR_SUBPARTS (vectype);
6627 return elements / 2 + 1;
6628
559093aa 6629 default:
6630 gcc_unreachable ();
6631 }
a28df51d 6632}
6633
4db2b577 6634/* Implement targetm.vectorize.init_cost. */
6635
61b33788 6636static void *
4db2b577 6637spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6638{
f97dec81 6639 unsigned *cost = XNEWVEC (unsigned, 3);
6640 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
4db2b577 6641 return cost;
6642}
6643
6644/* Implement targetm.vectorize.add_stmt_cost. */
6645
61b33788 6646static unsigned
4db2b577 6647spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
f97dec81 6648 struct _stmt_vec_info *stmt_info, int misalign,
6649 enum vect_cost_model_location where)
4db2b577 6650{
6651 unsigned *cost = (unsigned *) data;
6652 unsigned retval = 0;
6653
6654 if (flag_vect_cost_model)
6655 {
f97dec81 6656 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4db2b577 6657 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6658
6659 /* Statements in an inner loop relative to the loop being
6660 vectorized are weighted more heavily. The value here is
6661 arbitrary and could potentially be improved with analysis. */
f97dec81 6662 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4db2b577 6663 count *= 50; /* FIXME. */
6664
6665 retval = (unsigned) (count * stmt_cost);
f97dec81 6666 cost[where] += retval;
4db2b577 6667 }
6668
6669 return retval;
6670}
6671
6672/* Implement targetm.vectorize.finish_cost. */
6673
f97dec81 6674static void
6675spu_finish_cost (void *data, unsigned *prologue_cost,
6676 unsigned *body_cost, unsigned *epilogue_cost)
4db2b577 6677{
f97dec81 6678 unsigned *cost = (unsigned *) data;
6679 *prologue_cost = cost[vect_prologue];
6680 *body_cost = cost[vect_body];
6681 *epilogue_cost = cost[vect_epilogue];
4db2b577 6682}
6683
6684/* Implement targetm.vectorize.destroy_cost_data. */
6685
61b33788 6686static void
4db2b577 6687spu_destroy_cost_data (void *data)
6688{
6689 free (data);
6690}
6691
0e87db76 6692/* Return true iff, data reference of TYPE can reach vector alignment (16)
6693 after applying N number of iterations. This routine does not determine
6694 how may iterations are required to reach desired alignment. */
6695
6696static bool
a9f1838b 6697spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6698{
6699 if (is_packed)
6700 return false;
6701
6702 /* All other types are naturally aligned. */
6703 return true;
6704}
6705
6cf5579e 6706/* Return the appropriate mode for a named address pointer. */
6707static enum machine_mode
6708spu_addr_space_pointer_mode (addr_space_t addrspace)
6709{
6710 switch (addrspace)
6711 {
6712 case ADDR_SPACE_GENERIC:
6713 return ptr_mode;
6714 case ADDR_SPACE_EA:
6715 return EAmode;
6716 default:
6717 gcc_unreachable ();
6718 }
6719}
6720
6721/* Return the appropriate mode for a named address address. */
6722static enum machine_mode
6723spu_addr_space_address_mode (addr_space_t addrspace)
6724{
6725 switch (addrspace)
6726 {
6727 case ADDR_SPACE_GENERIC:
6728 return Pmode;
6729 case ADDR_SPACE_EA:
6730 return EAmode;
6731 default:
6732 gcc_unreachable ();
6733 }
6734}
6735
6736/* Determine if one named address space is a subset of another. */
6737
6738static bool
6739spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6740{
6741 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6742 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6743
6744 if (subset == superset)
6745 return true;
6746
6747 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6748 being subsets but instead as disjoint address spaces. */
6749 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6750 return false;
6751
6752 else
6753 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6754}
6755
6756/* Convert from one address space to another. */
6757static rtx
6758spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6759{
6760 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6761 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6762
6763 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6764 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6765
6766 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6767 {
6768 rtx result, ls;
6769
6770 ls = gen_const_mem (DImode,
6771 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6772 set_mem_align (ls, 128);
6773
6774 result = gen_reg_rtx (Pmode);
6775 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6776 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6777 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6778 ls, const0_rtx, Pmode, 1);
6779
6780 emit_insn (gen_subsi3 (result, op, ls));
6781
6782 return result;
6783 }
6784
6785 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6786 {
6787 rtx result, ls;
6788
6789 ls = gen_const_mem (DImode,
6790 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6791 set_mem_align (ls, 128);
6792
6793 result = gen_reg_rtx (EAmode);
6794 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6795 op = force_reg (Pmode, op);
6796 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6797 ls, const0_rtx, EAmode, 1);
6798 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6799
6800 if (EAmode == SImode)
6801 emit_insn (gen_addsi3 (result, op, ls));
6802 else
6803 emit_insn (gen_adddi3 (result, op, ls));
6804
6805 return result;
6806 }
6807
6808 else
6809 gcc_unreachable ();
6810}
6811
6812
d52fd16a 6813/* Count the total number of instructions in each pipe and return the
6814 maximum, which is used as the Minimum Iteration Interval (MII)
6815 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6816 -2 are instructions that can go in pipe0 or pipe1. */
6817static int
6818spu_sms_res_mii (struct ddg *g)
6819{
6820 int i;
6821 unsigned t[4] = {0, 0, 0, 0};
6822
6823 for (i = 0; i < g->num_nodes; i++)
6824 {
6825 rtx insn = g->nodes[i].insn;
6826 int p = get_pipe (insn) + 2;
6827
1e944a0b 6828 gcc_assert (p >= 0);
6829 gcc_assert (p < 4);
d52fd16a 6830
6831 t[p]++;
6832 if (dump_file && INSN_P (insn))
6833 fprintf (dump_file, "i%d %s %d %d\n",
6834 INSN_UID (insn),
6835 insn_data[INSN_CODE(insn)].name,
6836 p, t[p]);
6837 }
6838 if (dump_file)
6839 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6840
6841 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6842}
6843
6844
5df189be 6845void
6846spu_init_expanders (void)
9d98604b 6847{
5df189be 6848 if (cfun)
9d98604b 6849 {
6850 rtx r0, r1;
6851 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6852 frame_pointer_needed is true. We don't know that until we're
6853 expanding the prologue. */
6854 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6855
6856 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6857 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6858 to be treated as aligned, so generate them here. */
6859 r0 = gen_reg_rtx (SImode);
6860 r1 = gen_reg_rtx (SImode);
6861 mark_reg_pointer (r0, 128);
6862 mark_reg_pointer (r1, 128);
6863 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6864 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6865 }
ea32e033 6866}
6867
6868static enum machine_mode
6869spu_libgcc_cmp_return_mode (void)
6870{
6871
6872/* For SPU word mode is TI mode so it is better to use SImode
6873 for compare returns. */
6874 return SImode;
6875}
6876
6877static enum machine_mode
6878spu_libgcc_shift_count_mode (void)
6879{
6880/* For SPU word mode is TI mode so it is better to use SImode
6881 for shift counts. */
6882 return SImode;
6883}
5a976006 6884
a08dfd55 6885/* Implement targetm.section_type_flags. */
6886static unsigned int
6887spu_section_type_flags (tree decl, const char *name, int reloc)
6888{
6889 /* .toe needs to have type @nobits. */
6890 if (strcmp (name, ".toe") == 0)
6891 return SECTION_BSS;
6cf5579e 6892 /* Don't load _ea into the current address space. */
6893 if (strcmp (name, "._ea") == 0)
6894 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6895 return default_section_type_flags (decl, name, reloc);
6896}
c2233b46 6897
6cf5579e 6898/* Implement targetm.select_section. */
6899static section *
6900spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6901{
6902 /* Variables and constants defined in the __ea address space
6903 go into a special section named "._ea". */
6904 if (TREE_TYPE (decl) != error_mark_node
6905 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6906 {
6907 /* We might get called with string constants, but get_named_section
6908 doesn't like them as they are not DECLs. Also, we need to set
6909 flags in that case. */
6910 if (!DECL_P (decl))
6911 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6912
6913 return get_named_section (decl, "._ea", reloc);
6914 }
6915
6916 return default_elf_select_section (decl, reloc, align);
6917}
6918
6919/* Implement targetm.unique_section. */
6920static void
6921spu_unique_section (tree decl, int reloc)
6922{
6923 /* We don't support unique section names in the __ea address
6924 space for now. */
6925 if (TREE_TYPE (decl) != error_mark_node
6926 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6927 return;
6928
6929 default_unique_section (decl, reloc);
6930}
6931
56c7bfc2 6932/* Generate a constant or register which contains 2^SCALE. We assume
6933 the result is valid for MODE. Currently, MODE must be V4SFmode and
6934 SCALE must be SImode. */
6935rtx
6936spu_gen_exp2 (enum machine_mode mode, rtx scale)
6937{
6938 gcc_assert (mode == V4SFmode);
6939 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6940 if (GET_CODE (scale) != CONST_INT)
6941 {
6942 /* unsigned int exp = (127 + scale) << 23;
6943 __vector float m = (__vector float) spu_splats (exp); */
6944 rtx reg = force_reg (SImode, scale);
6945 rtx exp = gen_reg_rtx (SImode);
6946 rtx mul = gen_reg_rtx (mode);
6947 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6948 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6949 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6950 return mul;
6951 }
6952 else
6953 {
6954 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6955 unsigned char arr[16];
6956 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6957 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6958 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6959 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6960 return array_to_constant (mode, arr);
6961 }
6962}
6963
9d98604b 6964/* After reload, just change the convert into a move instruction
6965 or a dead instruction. */
6966void
6967spu_split_convert (rtx ops[])
6968{
6969 if (REGNO (ops[0]) == REGNO (ops[1]))
6970 emit_note (NOTE_INSN_DELETED);
6971 else
6972 {
6973 /* Use TImode always as this might help hard reg copyprop. */
6974 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6975 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6976 emit_insn (gen_move_insn (op0, op1));
6977 }
6978}
6979
b3878a6c 6980void
4cbad5bb 6981spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 6982{
6983 fprintf (file, "# profile\n");
6984 fprintf (file, "brsl $75, _mcount\n");
6985}
6986
329c1e4e 6987/* Implement targetm.ref_may_alias_errno. */
6988static bool
6989spu_ref_may_alias_errno (ao_ref *ref)
6990{
6991 tree base = ao_ref_base (ref);
6992
6993 /* With SPU newlib, errno is defined as something like
6994 _impure_data._errno
6995 The default implementation of this target macro does not
6996 recognize such expressions, so special-code for it here. */
6997
6998 if (TREE_CODE (base) == VAR_DECL
6999 && !TREE_STATIC (base)
7000 && DECL_EXTERNAL (base)
7001 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7002 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7003 "_impure_data") == 0
7004 /* _errno is the first member of _impure_data. */
7005 && ref->offset == 0)
7006 return true;
7007
7008 return default_ref_may_alias_errno (ref);
7009}
7010
f17d2d13 7011/* Output thunk to FILE that implements a C++ virtual function call (with
7012 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7013 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7014 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7015 relative to the resulting this pointer. */
7016
7017static void
7018spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7019 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7020 tree function)
7021{
7022 rtx op[8];
7023
7024 /* Make sure unwind info is emitted for the thunk if needed. */
7025 final_start_function (emit_barrier (), file, 1);
7026
7027 /* Operand 0 is the target function. */
7028 op[0] = XEXP (DECL_RTL (function), 0);
7029
7030 /* Operand 1 is the 'this' pointer. */
7031 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7032 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7033 else
7034 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7035
7036 /* Operands 2/3 are the low/high halfwords of delta. */
7037 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7038 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7039
7040 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7041 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7042 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7043
7044 /* Operands 6/7 are temporary registers. */
7045 op[6] = gen_rtx_REG (Pmode, 79);
7046 op[7] = gen_rtx_REG (Pmode, 78);
7047
7048 /* Add DELTA to this pointer. */
7049 if (delta)
7050 {
7051 if (delta >= -0x200 && delta < 0x200)
7052 output_asm_insn ("ai\t%1,%1,%2", op);
7053 else if (delta >= -0x8000 && delta < 0x8000)
7054 {
7055 output_asm_insn ("il\t%6,%2", op);
7056 output_asm_insn ("a\t%1,%1,%6", op);
7057 }
7058 else
7059 {
7060 output_asm_insn ("ilhu\t%6,%3", op);
7061 output_asm_insn ("iohl\t%6,%2", op);
7062 output_asm_insn ("a\t%1,%1,%6", op);
7063 }
7064 }
7065
7066 /* Perform vcall adjustment. */
7067 if (vcall_offset)
7068 {
7069 output_asm_insn ("lqd\t%7,0(%1)", op);
7070 output_asm_insn ("rotqby\t%7,%7,%1", op);
7071
7072 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7073 output_asm_insn ("ai\t%7,%7,%4", op);
7074 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7075 {
7076 output_asm_insn ("il\t%6,%4", op);
7077 output_asm_insn ("a\t%7,%7,%6", op);
7078 }
7079 else
7080 {
7081 output_asm_insn ("ilhu\t%6,%5", op);
7082 output_asm_insn ("iohl\t%6,%4", op);
7083 output_asm_insn ("a\t%7,%7,%6", op);
7084 }
7085
7086 output_asm_insn ("lqd\t%6,0(%7)", op);
7087 output_asm_insn ("rotqby\t%6,%6,%7", op);
7088 output_asm_insn ("a\t%1,%1,%6", op);
7089 }
7090
7091 /* Jump to target. */
7092 output_asm_insn ("br\t%0", op);
7093
7094 final_end_function ();
7095}
7096
d5065e6e 7097/* Canonicalize a comparison from one we don't have to one we do have. */
7098static void
7099spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7100 bool op0_preserve_value)
7101{
7102 if (!op0_preserve_value
7103 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7104 {
7105 rtx tem = *op0;
7106 *op0 = *op1;
7107 *op1 = tem;
7108 *code = (int)swap_condition ((enum rtx_code)*code);
7109 }
7110}
3defb88e 7111\f
7112/* Table of machine attributes. */
7113static const struct attribute_spec spu_attribute_table[] =
7114{
7115 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7116 affects_type_identity } */
7117 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7118 false },
7119 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7120 false },
7121 { NULL, 0, 0, false, false, false, NULL, false }
7122};
7123
7124/* TARGET overrides. */
7125
7126#undef TARGET_ADDR_SPACE_POINTER_MODE
7127#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7128
7129#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7130#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7131
7132#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7133#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7134 spu_addr_space_legitimate_address_p
7135
7136#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7137#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7138
7139#undef TARGET_ADDR_SPACE_SUBSET_P
7140#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7141
7142#undef TARGET_ADDR_SPACE_CONVERT
7143#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7144
7145#undef TARGET_INIT_BUILTINS
7146#define TARGET_INIT_BUILTINS spu_init_builtins
7147#undef TARGET_BUILTIN_DECL
7148#define TARGET_BUILTIN_DECL spu_builtin_decl
7149
7150#undef TARGET_EXPAND_BUILTIN
7151#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7152
7153#undef TARGET_UNWIND_WORD_MODE
7154#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7155
7156#undef TARGET_LEGITIMIZE_ADDRESS
7157#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7158
7159/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7160 and .quad for the debugger. When it is known that the assembler is fixed,
7161 these can be removed. */
7162#undef TARGET_ASM_UNALIGNED_SI_OP
7163#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7164
7165#undef TARGET_ASM_ALIGNED_DI_OP
7166#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7167
7168/* The .8byte directive doesn't seem to work well for a 32 bit
7169 architecture. */
7170#undef TARGET_ASM_UNALIGNED_DI_OP
7171#define TARGET_ASM_UNALIGNED_DI_OP NULL
7172
7173#undef TARGET_RTX_COSTS
7174#define TARGET_RTX_COSTS spu_rtx_costs
7175
7176#undef TARGET_ADDRESS_COST
d9c5e5f4 7177#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3defb88e 7178
7179#undef TARGET_SCHED_ISSUE_RATE
7180#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7181
7182#undef TARGET_SCHED_INIT_GLOBAL
7183#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7184
7185#undef TARGET_SCHED_INIT
7186#define TARGET_SCHED_INIT spu_sched_init
7187
7188#undef TARGET_SCHED_VARIABLE_ISSUE
7189#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7190
7191#undef TARGET_SCHED_REORDER
7192#define TARGET_SCHED_REORDER spu_sched_reorder
7193
7194#undef TARGET_SCHED_REORDER2
7195#define TARGET_SCHED_REORDER2 spu_sched_reorder
7196
7197#undef TARGET_SCHED_ADJUST_COST
7198#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7199
7200#undef TARGET_ATTRIBUTE_TABLE
7201#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7202
7203#undef TARGET_ASM_INTEGER
7204#define TARGET_ASM_INTEGER spu_assemble_integer
7205
7206#undef TARGET_SCALAR_MODE_SUPPORTED_P
7207#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7208
7209#undef TARGET_VECTOR_MODE_SUPPORTED_P
7210#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7211
7212#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7213#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7214
7215#undef TARGET_ASM_GLOBALIZE_LABEL
7216#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7217
7218#undef TARGET_PASS_BY_REFERENCE
7219#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7220
7221#undef TARGET_FUNCTION_ARG
7222#define TARGET_FUNCTION_ARG spu_function_arg
7223
7224#undef TARGET_FUNCTION_ARG_ADVANCE
7225#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7226
7227#undef TARGET_MUST_PASS_IN_STACK
7228#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7229
7230#undef TARGET_BUILD_BUILTIN_VA_LIST
7231#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7232
7233#undef TARGET_EXPAND_BUILTIN_VA_START
7234#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7235
7236#undef TARGET_SETUP_INCOMING_VARARGS
7237#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7238
7239#undef TARGET_MACHINE_DEPENDENT_REORG
7240#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7241
7242#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7243#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7244
7245#undef TARGET_INIT_LIBFUNCS
7246#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7247
7248#undef TARGET_RETURN_IN_MEMORY
7249#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7250
7251#undef TARGET_ENCODE_SECTION_INFO
7252#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7253
7254#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7255#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7256
7257#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7258#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7259
7260#undef TARGET_VECTORIZE_INIT_COST
7261#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7262
7263#undef TARGET_VECTORIZE_ADD_STMT_COST
7264#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7265
7266#undef TARGET_VECTORIZE_FINISH_COST
7267#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7268
7269#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7270#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7271
7272#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7273#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7274
7275#undef TARGET_LIBGCC_CMP_RETURN_MODE
7276#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7277
7278#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7279#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7280
7281#undef TARGET_SCHED_SMS_RES_MII
7282#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7283
7284#undef TARGET_SECTION_TYPE_FLAGS
7285#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7286
7287#undef TARGET_ASM_SELECT_SECTION
7288#define TARGET_ASM_SELECT_SECTION spu_select_section
7289
7290#undef TARGET_ASM_UNIQUE_SECTION
7291#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7292
7293#undef TARGET_LEGITIMATE_ADDRESS_P
7294#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7295
7296#undef TARGET_LEGITIMATE_CONSTANT_P
7297#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7298
7299#undef TARGET_TRAMPOLINE_INIT
7300#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7301
08c6cbd2 7302#undef TARGET_WARN_FUNC_RETURN
7303#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7304
3defb88e 7305#undef TARGET_OPTION_OVERRIDE
7306#define TARGET_OPTION_OVERRIDE spu_option_override
7307
7308#undef TARGET_CONDITIONAL_REGISTER_USAGE
7309#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7310
7311#undef TARGET_REF_MAY_ALIAS_ERRNO
7312#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7313
7314#undef TARGET_ASM_OUTPUT_MI_THUNK
7315#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7316#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7317#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7318
7319/* Variable tracking should be run after all optimizations which
7320 change order of insns. It also needs a valid CFG. */
7321#undef TARGET_DELAY_VARTRACK
7322#define TARGET_DELAY_VARTRACK true
7323
d5065e6e 7324#undef TARGET_CANONICALIZE_COMPARISON
7325#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7326
3defb88e 7327struct gcc_target targetm = TARGET_INITIALIZER;
7328
c2233b46 7329#include "gt-spu.h"