]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
Factor unrelated declarations out of tree.h.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
711789cc 1/* Copyright (C) 2006-2013 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
9ed99284 31#include "stringpool.h"
32#include "stor-layout.h"
33#include "calls.h"
34#include "varasm.h"
644459d0 35#include "expr.h"
36#include "optabs.h"
37#include "except.h"
38#include "function.h"
39#include "output.h"
40#include "basic-block.h"
0b205f4c 41#include "diagnostic-core.h"
644459d0 42#include "ggc.h"
43#include "hashtab.h"
44#include "tm_p.h"
45#include "target.h"
46#include "target-def.h"
47#include "langhooks.h"
48#include "reload.h"
644459d0 49#include "sched-int.h"
50#include "params.h"
644459d0 51#include "machmode.h"
e795d6e1 52#include "gimple.h"
a8783bee 53#include "gimplify.h"
644459d0 54#include "tm-constrs.h"
d52fd16a 55#include "ddg.h"
5a976006 56#include "sbitmap.h"
57#include "timevar.h"
58#include "df.h"
b9ed1410 59#include "dumpfile.h"
a7a0184d 60#include "cfgloop.h"
6352eedf 61
62/* Builtin types, data and prototypes. */
c2233b46 63
64enum spu_builtin_type_index
65{
66 SPU_BTI_END_OF_PARAMS,
67
68 /* We create new type nodes for these. */
69 SPU_BTI_V16QI,
70 SPU_BTI_V8HI,
71 SPU_BTI_V4SI,
72 SPU_BTI_V2DI,
73 SPU_BTI_V4SF,
74 SPU_BTI_V2DF,
75 SPU_BTI_UV16QI,
76 SPU_BTI_UV8HI,
77 SPU_BTI_UV4SI,
78 SPU_BTI_UV2DI,
79
80 /* A 16-byte type. (Implemented with V16QI_type_node) */
81 SPU_BTI_QUADWORD,
82
83 /* These all correspond to intSI_type_node */
84 SPU_BTI_7,
85 SPU_BTI_S7,
86 SPU_BTI_U7,
87 SPU_BTI_S10,
88 SPU_BTI_S10_4,
89 SPU_BTI_U14,
90 SPU_BTI_16,
91 SPU_BTI_S16,
92 SPU_BTI_S16_2,
93 SPU_BTI_U16,
94 SPU_BTI_U16_2,
95 SPU_BTI_U18,
96
97 /* These correspond to the standard types */
98 SPU_BTI_INTQI,
99 SPU_BTI_INTHI,
100 SPU_BTI_INTSI,
101 SPU_BTI_INTDI,
102
103 SPU_BTI_UINTQI,
104 SPU_BTI_UINTHI,
105 SPU_BTI_UINTSI,
106 SPU_BTI_UINTDI,
107
108 SPU_BTI_FLOAT,
109 SPU_BTI_DOUBLE,
110
111 SPU_BTI_VOID,
112 SPU_BTI_PTR,
113
114 SPU_BTI_MAX
115};
116
117#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
118#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
119#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
120#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
121#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
122#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
123#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
124#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
125#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
126#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
127
128static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
129
6352eedf 130struct spu_builtin_range
131{
132 int low, high;
133};
134
135static struct spu_builtin_range spu_builtin_range[] = {
136 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
137 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
138 {0ll, 0x7fll}, /* SPU_BTI_U7 */
139 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
140 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
141 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
142 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
143 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
144 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
145 {0ll, 0xffffll}, /* SPU_BTI_U16 */
146 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
147 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
148};
149
644459d0 150\f
151/* Target specific attribute specifications. */
152char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
153
154/* Prototypes and external defs. */
644459d0 155static int get_pipe (rtx insn);
644459d0 156static int spu_naked_function_p (tree func);
644459d0 157static int mem_is_padded_component_ref (rtx x);
c7b91b14 158static void fix_range (const char *);
9d98604b 159static rtx spu_expand_load (rtx, rtx, rtx, int);
644459d0 160
5474166e 161/* Which instruction set architecture to use. */
162int spu_arch;
163/* Which cpu are we tuning for. */
164int spu_tune;
165
5a976006 166/* The hardware requires 8 insns between a hint and the branch it
167 effects. This variable describes how many rtl instructions the
168 compiler needs to see before inserting a hint, and then the compiler
169 will insert enough nops to make it at least 8 insns. The default is
170 for the compiler to allow up to 2 nops be emitted. The nops are
171 inserted in pairs, so we round down. */
172int spu_hint_dist = (8*4) - (2*4);
173
644459d0 174enum spu_immediate {
175 SPU_NONE,
176 SPU_IL,
177 SPU_ILA,
178 SPU_ILH,
179 SPU_ILHU,
180 SPU_ORI,
181 SPU_ORHI,
182 SPU_ORBI,
99369027 183 SPU_IOHL
644459d0 184};
dea01258 185enum immediate_class
186{
187 IC_POOL, /* constant pool */
188 IC_IL1, /* one il* instruction */
189 IC_IL2, /* both ilhu and iohl instructions */
190 IC_IL1s, /* one il* instruction */
191 IC_IL2s, /* both ilhu and iohl instructions */
192 IC_FSMBI, /* the fsmbi instruction */
193 IC_CPAT, /* one of the c*d instructions */
5df189be 194 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 195};
644459d0 196
197static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
198static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 199static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
200static enum immediate_class classify_immediate (rtx op,
201 enum machine_mode mode);
644459d0 202
6cf5579e 203/* Pointer mode for __ea references. */
204#define EAmode (spu_ea_model != 32 ? DImode : SImode)
205
ef51d1e3 206\f
5eb28709 207/* Define the structure for the machine field in struct function. */
208struct GTY(()) machine_function
209{
210 /* Register to use for PIC accesses. */
211 rtx pic_reg;
212};
213
214/* How to allocate a 'struct machine_function'. */
215static struct machine_function *
216spu_init_machine_status (void)
217{
218 return ggc_alloc_cleared_machine_function ();
219}
220
4c834714 221/* Implement TARGET_OPTION_OVERRIDE. */
222static void
223spu_option_override (void)
644459d0 224{
5eb28709 225 /* Set up function hooks. */
226 init_machine_status = spu_init_machine_status;
227
14d408d9 228 /* Small loops will be unpeeled at -O3. For SPU it is more important
229 to keep code small by default. */
686e2769 230 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 231 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 232 global_options.x_param_values,
233 global_options_set.x_param_values);
14d408d9 234
644459d0 235 flag_omit_frame_pointer = 1;
236
5a976006 237 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 238 if (align_functions < 8)
239 align_functions = 8;
c7b91b14 240
5a976006 241 spu_hint_dist = 8*4 - spu_max_nops*4;
242 if (spu_hint_dist < 0)
243 spu_hint_dist = 0;
244
c7b91b14 245 if (spu_fixed_range_string)
246 fix_range (spu_fixed_range_string);
5474166e 247
248 /* Determine processor architectural level. */
249 if (spu_arch_string)
250 {
251 if (strcmp (&spu_arch_string[0], "cell") == 0)
252 spu_arch = PROCESSOR_CELL;
253 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
254 spu_arch = PROCESSOR_CELLEDP;
255 else
8e181c9d 256 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 257 }
258
259 /* Determine processor to tune for. */
260 if (spu_tune_string)
261 {
262 if (strcmp (&spu_tune_string[0], "cell") == 0)
263 spu_tune = PROCESSOR_CELL;
264 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
265 spu_tune = PROCESSOR_CELLEDP;
266 else
8e181c9d 267 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 268 }
98bbec1e 269
13684256 270 /* Change defaults according to the processor architecture. */
271 if (spu_arch == PROCESSOR_CELLEDP)
272 {
273 /* If no command line option has been otherwise specified, change
274 the default to -mno-safe-hints on celledp -- only the original
275 Cell/B.E. processors require this workaround. */
276 if (!(target_flags_explicit & MASK_SAFE_HINTS))
277 target_flags &= ~MASK_SAFE_HINTS;
278 }
279
98bbec1e 280 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 281}
282\f
283/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
284 struct attribute_spec.handler. */
285
644459d0 286/* True if MODE is valid for the target. By "valid", we mean able to
287 be manipulated in non-trivial ways. In particular, this means all
288 the arithmetic is supported. */
289static bool
290spu_scalar_mode_supported_p (enum machine_mode mode)
291{
292 switch (mode)
293 {
294 case QImode:
295 case HImode:
296 case SImode:
297 case SFmode:
298 case DImode:
299 case TImode:
300 case DFmode:
301 return true;
302
303 default:
304 return false;
305 }
306}
307
308/* Similarly for vector modes. "Supported" here is less strict. At
309 least some operations are supported; need to check optabs or builtins
310 for further details. */
311static bool
312spu_vector_mode_supported_p (enum machine_mode mode)
313{
314 switch (mode)
315 {
316 case V16QImode:
317 case V8HImode:
318 case V4SImode:
319 case V2DImode:
320 case V4SFmode:
321 case V2DFmode:
322 return true;
323
324 default:
325 return false;
326 }
327}
328
329/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
330 least significant bytes of the outer mode. This function returns
331 TRUE for the SUBREG's where this is correct. */
332int
333valid_subreg (rtx op)
334{
335 enum machine_mode om = GET_MODE (op);
336 enum machine_mode im = GET_MODE (SUBREG_REG (op));
337 return om != VOIDmode && im != VOIDmode
338 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 339 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
340 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 341}
342
343/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 344 and adjust the start offset. */
644459d0 345static rtx
346adjust_operand (rtx op, HOST_WIDE_INT * start)
347{
348 enum machine_mode mode;
349 int op_size;
38aca5eb 350 /* Strip any paradoxical SUBREG. */
351 if (GET_CODE (op) == SUBREG
352 && (GET_MODE_BITSIZE (GET_MODE (op))
353 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 354 {
355 if (start)
356 *start -=
357 GET_MODE_BITSIZE (GET_MODE (op)) -
358 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
359 op = SUBREG_REG (op);
360 }
361 /* If it is smaller than SI, assure a SUBREG */
362 op_size = GET_MODE_BITSIZE (GET_MODE (op));
363 if (op_size < 32)
364 {
365 if (start)
366 *start += 32 - op_size;
367 op_size = 32;
368 }
369 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
370 mode = mode_for_size (op_size, MODE_INT, 0);
371 if (mode != GET_MODE (op))
372 op = gen_rtx_SUBREG (mode, op, 0);
373 return op;
374}
375
376void
377spu_expand_extv (rtx ops[], int unsignedp)
378{
9d98604b 379 rtx dst = ops[0], src = ops[1];
644459d0 380 HOST_WIDE_INT width = INTVAL (ops[2]);
381 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 382 HOST_WIDE_INT align_mask;
383 rtx s0, s1, mask, r0;
644459d0 384
9d98604b 385 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 386
9d98604b 387 if (MEM_P (src))
644459d0 388 {
9d98604b 389 /* First, determine if we need 1 TImode load or 2. We need only 1
390 if the bits being extracted do not cross the alignment boundary
391 as determined by the MEM and its address. */
392
393 align_mask = -MEM_ALIGN (src);
394 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 395 {
9d98604b 396 /* Alignment is sufficient for 1 load. */
397 s0 = gen_reg_rtx (TImode);
398 r0 = spu_expand_load (s0, 0, src, start / 8);
399 start &= 7;
400 if (r0)
401 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 402 }
9d98604b 403 else
404 {
405 /* Need 2 loads. */
406 s0 = gen_reg_rtx (TImode);
407 s1 = gen_reg_rtx (TImode);
408 r0 = spu_expand_load (s0, s1, src, start / 8);
409 start &= 7;
410
411 gcc_assert (start + width <= 128);
412 if (r0)
413 {
414 rtx r1 = gen_reg_rtx (SImode);
415 mask = gen_reg_rtx (TImode);
416 emit_move_insn (mask, GEN_INT (-1));
417 emit_insn (gen_rotqby_ti (s0, s0, r0));
418 emit_insn (gen_rotqby_ti (s1, s1, r0));
419 if (GET_CODE (r0) == CONST_INT)
420 r1 = GEN_INT (INTVAL (r0) & 15);
421 else
422 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
423 emit_insn (gen_shlqby_ti (mask, mask, r1));
424 emit_insn (gen_selb (s0, s1, s0, mask));
425 }
426 }
427
428 }
429 else if (GET_CODE (src) == SUBREG)
430 {
431 rtx r = SUBREG_REG (src);
432 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
433 s0 = gen_reg_rtx (TImode);
434 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
435 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
436 else
437 emit_move_insn (s0, src);
438 }
439 else
440 {
441 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
442 s0 = gen_reg_rtx (TImode);
443 emit_move_insn (s0, src);
644459d0 444 }
445
9d98604b 446 /* Now s0 is TImode and contains the bits to extract at start. */
447
448 if (start)
449 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
450
451 if (128 - width)
f5ff0b21 452 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 453
9d98604b 454 emit_move_insn (dst, s0);
644459d0 455}
456
457void
458spu_expand_insv (rtx ops[])
459{
460 HOST_WIDE_INT width = INTVAL (ops[1]);
461 HOST_WIDE_INT start = INTVAL (ops[2]);
462 HOST_WIDE_INT maskbits;
4cbad5bb 463 enum machine_mode dst_mode;
644459d0 464 rtx dst = ops[0], src = ops[3];
4cbad5bb 465 int dst_size;
644459d0 466 rtx mask;
467 rtx shift_reg;
468 int shift;
469
470
471 if (GET_CODE (ops[0]) == MEM)
472 dst = gen_reg_rtx (TImode);
473 else
474 dst = adjust_operand (dst, &start);
475 dst_mode = GET_MODE (dst);
476 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
477
478 if (CONSTANT_P (src))
479 {
480 enum machine_mode m =
481 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
482 src = force_reg (m, convert_to_mode (m, src, 0));
483 }
484 src = adjust_operand (src, 0);
644459d0 485
486 mask = gen_reg_rtx (dst_mode);
487 shift_reg = gen_reg_rtx (dst_mode);
488 shift = dst_size - start - width;
489
490 /* It's not safe to use subreg here because the compiler assumes
491 that the SUBREG_REG is right justified in the SUBREG. */
492 convert_move (shift_reg, src, 1);
493
494 if (shift > 0)
495 {
496 switch (dst_mode)
497 {
498 case SImode:
499 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
500 break;
501 case DImode:
502 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
503 break;
504 case TImode:
505 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
506 break;
507 default:
508 abort ();
509 }
510 }
511 else if (shift < 0)
512 abort ();
513
514 switch (dst_size)
515 {
516 case 32:
517 maskbits = (-1ll << (32 - width - start));
518 if (start)
519 maskbits += (1ll << (32 - start));
520 emit_move_insn (mask, GEN_INT (maskbits));
521 break;
522 case 64:
523 maskbits = (-1ll << (64 - width - start));
524 if (start)
525 maskbits += (1ll << (64 - start));
526 emit_move_insn (mask, GEN_INT (maskbits));
527 break;
528 case 128:
529 {
530 unsigned char arr[16];
531 int i = start / 8;
532 memset (arr, 0, sizeof (arr));
533 arr[i] = 0xff >> (start & 7);
534 for (i++; i <= (start + width - 1) / 8; i++)
535 arr[i] = 0xff;
536 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
537 emit_move_insn (mask, array_to_constant (TImode, arr));
538 }
539 break;
540 default:
541 abort ();
542 }
543 if (GET_CODE (ops[0]) == MEM)
544 {
644459d0 545 rtx low = gen_reg_rtx (SImode);
644459d0 546 rtx rotl = gen_reg_rtx (SImode);
547 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 548 rtx addr;
549 rtx addr0;
550 rtx addr1;
644459d0 551 rtx mem;
552
9d98604b 553 addr = force_reg (Pmode, XEXP (ops[0], 0));
554 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 555 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
556 emit_insn (gen_negsi2 (rotl, low));
557 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
558 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 559 mem = change_address (ops[0], TImode, addr0);
644459d0 560 set_mem_alias_set (mem, 0);
561 emit_move_insn (dst, mem);
562 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 563 if (start + width > MEM_ALIGN (ops[0]))
564 {
565 rtx shl = gen_reg_rtx (SImode);
566 rtx mask1 = gen_reg_rtx (TImode);
567 rtx dst1 = gen_reg_rtx (TImode);
568 rtx mem1;
29c05e22 569 addr1 = plus_constant (Pmode, addr, 16);
9d98604b 570 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 571 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
572 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 573 mem1 = change_address (ops[0], TImode, addr1);
644459d0 574 set_mem_alias_set (mem1, 0);
575 emit_move_insn (dst1, mem1);
576 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
577 emit_move_insn (mem1, dst1);
578 }
9d98604b 579 emit_move_insn (mem, dst);
644459d0 580 }
581 else
71cd778d 582 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 583}
584
585
586int
587spu_expand_block_move (rtx ops[])
588{
589 HOST_WIDE_INT bytes, align, offset;
590 rtx src, dst, sreg, dreg, target;
591 int i;
592 if (GET_CODE (ops[2]) != CONST_INT
593 || GET_CODE (ops[3]) != CONST_INT
48eb4342 594 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 595 return 0;
596
597 bytes = INTVAL (ops[2]);
598 align = INTVAL (ops[3]);
599
600 if (bytes <= 0)
601 return 1;
602
603 dst = ops[0];
604 src = ops[1];
605
606 if (align == 16)
607 {
608 for (offset = 0; offset + 16 <= bytes; offset += 16)
609 {
610 dst = adjust_address (ops[0], V16QImode, offset);
611 src = adjust_address (ops[1], V16QImode, offset);
612 emit_move_insn (dst, src);
613 }
614 if (offset < bytes)
615 {
616 rtx mask;
617 unsigned char arr[16] = { 0 };
618 for (i = 0; i < bytes - offset; i++)
619 arr[i] = 0xff;
620 dst = adjust_address (ops[0], V16QImode, offset);
621 src = adjust_address (ops[1], V16QImode, offset);
622 mask = gen_reg_rtx (V16QImode);
623 sreg = gen_reg_rtx (V16QImode);
624 dreg = gen_reg_rtx (V16QImode);
625 target = gen_reg_rtx (V16QImode);
626 emit_move_insn (mask, array_to_constant (V16QImode, arr));
627 emit_move_insn (dreg, dst);
628 emit_move_insn (sreg, src);
629 emit_insn (gen_selb (target, dreg, sreg, mask));
630 emit_move_insn (dst, target);
631 }
632 return 1;
633 }
634 return 0;
635}
636
637enum spu_comp_code
638{ SPU_EQ, SPU_GT, SPU_GTU };
639
5474166e 640int spu_comp_icode[12][3] = {
641 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
642 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
643 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
644 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
645 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
646 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
647 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
648 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
649 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
650 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
651 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
652 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 653};
654
655/* Generate a compare for CODE. Return a brand-new rtx that represents
656 the result of the compare. GCC can figure this out too if we don't
657 provide all variations of compares, but GCC always wants to use
658 WORD_MODE, we can generate better code in most cases if we do it
659 ourselves. */
660void
74f4459c 661spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 662{
663 int reverse_compare = 0;
664 int reverse_test = 0;
5d70b918 665 rtx compare_result, eq_result;
666 rtx comp_rtx, eq_rtx;
644459d0 667 enum machine_mode comp_mode;
668 enum machine_mode op_mode;
b9c74b4d 669 enum spu_comp_code scode, eq_code;
670 enum insn_code ior_code;
74f4459c 671 enum rtx_code code = GET_CODE (cmp);
672 rtx op0 = XEXP (cmp, 0);
673 rtx op1 = XEXP (cmp, 1);
644459d0 674 int index;
5d70b918 675 int eq_test = 0;
644459d0 676
74f4459c 677 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 678 and so on, to keep the constant in operand 1. */
74f4459c 679 if (GET_CODE (op1) == CONST_INT)
644459d0 680 {
74f4459c 681 HOST_WIDE_INT val = INTVAL (op1) - 1;
682 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 683 switch (code)
684 {
685 case GE:
74f4459c 686 op1 = GEN_INT (val);
644459d0 687 code = GT;
688 break;
689 case LT:
74f4459c 690 op1 = GEN_INT (val);
644459d0 691 code = LE;
692 break;
693 case GEU:
74f4459c 694 op1 = GEN_INT (val);
644459d0 695 code = GTU;
696 break;
697 case LTU:
74f4459c 698 op1 = GEN_INT (val);
644459d0 699 code = LEU;
700 break;
701 default:
702 break;
703 }
704 }
705
686195ea 706 /* However, if we generate an integer result, performing a reverse test
707 would require an extra negation, so avoid that where possible. */
708 if (GET_CODE (op1) == CONST_INT && is_set == 1)
709 {
710 HOST_WIDE_INT val = INTVAL (op1) + 1;
711 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
712 switch (code)
713 {
714 case LE:
715 op1 = GEN_INT (val);
716 code = LT;
717 break;
718 case LEU:
719 op1 = GEN_INT (val);
720 code = LTU;
721 break;
722 default:
723 break;
724 }
725 }
726
5d70b918 727 comp_mode = SImode;
74f4459c 728 op_mode = GET_MODE (op0);
5d70b918 729
644459d0 730 switch (code)
731 {
732 case GE:
644459d0 733 scode = SPU_GT;
07027691 734 if (HONOR_NANS (op_mode))
5d70b918 735 {
736 reverse_compare = 0;
737 reverse_test = 0;
738 eq_test = 1;
739 eq_code = SPU_EQ;
740 }
741 else
742 {
743 reverse_compare = 1;
744 reverse_test = 1;
745 }
644459d0 746 break;
747 case LE:
644459d0 748 scode = SPU_GT;
07027691 749 if (HONOR_NANS (op_mode))
5d70b918 750 {
751 reverse_compare = 1;
752 reverse_test = 0;
753 eq_test = 1;
754 eq_code = SPU_EQ;
755 }
756 else
757 {
758 reverse_compare = 0;
759 reverse_test = 1;
760 }
644459d0 761 break;
762 case LT:
763 reverse_compare = 1;
764 reverse_test = 0;
765 scode = SPU_GT;
766 break;
767 case GEU:
768 reverse_compare = 1;
769 reverse_test = 1;
770 scode = SPU_GTU;
771 break;
772 case LEU:
773 reverse_compare = 0;
774 reverse_test = 1;
775 scode = SPU_GTU;
776 break;
777 case LTU:
778 reverse_compare = 1;
779 reverse_test = 0;
780 scode = SPU_GTU;
781 break;
782 case NE:
783 reverse_compare = 0;
784 reverse_test = 1;
785 scode = SPU_EQ;
786 break;
787
788 case EQ:
789 scode = SPU_EQ;
790 break;
791 case GT:
792 scode = SPU_GT;
793 break;
794 case GTU:
795 scode = SPU_GTU;
796 break;
797 default:
798 scode = SPU_EQ;
799 break;
800 }
801
644459d0 802 switch (op_mode)
803 {
804 case QImode:
805 index = 0;
806 comp_mode = QImode;
807 break;
808 case HImode:
809 index = 1;
810 comp_mode = HImode;
811 break;
812 case SImode:
813 index = 2;
814 break;
815 case DImode:
816 index = 3;
817 break;
818 case TImode:
819 index = 4;
820 break;
821 case SFmode:
822 index = 5;
823 break;
824 case DFmode:
825 index = 6;
826 break;
827 case V16QImode:
5474166e 828 index = 7;
829 comp_mode = op_mode;
830 break;
644459d0 831 case V8HImode:
5474166e 832 index = 8;
833 comp_mode = op_mode;
834 break;
644459d0 835 case V4SImode:
5474166e 836 index = 9;
837 comp_mode = op_mode;
838 break;
644459d0 839 case V4SFmode:
5474166e 840 index = 10;
841 comp_mode = V4SImode;
842 break;
644459d0 843 case V2DFmode:
5474166e 844 index = 11;
845 comp_mode = V2DImode;
644459d0 846 break;
5474166e 847 case V2DImode:
644459d0 848 default:
849 abort ();
850 }
851
74f4459c 852 if (GET_MODE (op1) == DFmode
07027691 853 && (scode != SPU_GT && scode != SPU_EQ))
854 abort ();
644459d0 855
74f4459c 856 if (is_set == 0 && op1 == const0_rtx
857 && (GET_MODE (op0) == SImode
686195ea 858 || GET_MODE (op0) == HImode
859 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
644459d0 860 {
861 /* Don't need to set a register with the result when we are
862 comparing against zero and branching. */
863 reverse_test = !reverse_test;
74f4459c 864 compare_result = op0;
644459d0 865 }
866 else
867 {
868 compare_result = gen_reg_rtx (comp_mode);
869
870 if (reverse_compare)
871 {
74f4459c 872 rtx t = op1;
873 op1 = op0;
874 op0 = t;
644459d0 875 }
876
877 if (spu_comp_icode[index][scode] == 0)
878 abort ();
879
880 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 881 (op0, op_mode))
882 op0 = force_reg (op_mode, op0);
644459d0 883 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 884 (op1, op_mode))
885 op1 = force_reg (op_mode, op1);
644459d0 886 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 887 op0, op1);
644459d0 888 if (comp_rtx == 0)
889 abort ();
890 emit_insn (comp_rtx);
891
5d70b918 892 if (eq_test)
893 {
894 eq_result = gen_reg_rtx (comp_mode);
895 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 896 op0, op1);
5d70b918 897 if (eq_rtx == 0)
898 abort ();
899 emit_insn (eq_rtx);
d6bf3b14 900 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 901 gcc_assert (ior_code != CODE_FOR_nothing);
902 emit_insn (GEN_FCN (ior_code)
903 (compare_result, compare_result, eq_result));
904 }
644459d0 905 }
906
907 if (is_set == 0)
908 {
909 rtx bcomp;
910 rtx loc_ref;
911
912 /* We don't have branch on QI compare insns, so we convert the
913 QI compare result to a HI result. */
914 if (comp_mode == QImode)
915 {
916 rtx old_res = compare_result;
917 compare_result = gen_reg_rtx (HImode);
918 comp_mode = HImode;
919 emit_insn (gen_extendqihi2 (compare_result, old_res));
920 }
921
922 if (reverse_test)
923 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
924 else
925 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
926
74f4459c 927 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 928 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
929 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
930 loc_ref, pc_rtx)));
931 }
932 else if (is_set == 2)
933 {
74f4459c 934 rtx target = operands[0];
644459d0 935 int compare_size = GET_MODE_BITSIZE (comp_mode);
936 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
937 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
938 rtx select_mask;
939 rtx op_t = operands[2];
940 rtx op_f = operands[3];
941
942 /* The result of the comparison can be SI, HI or QI mode. Create a
943 mask based on that result. */
944 if (target_size > compare_size)
945 {
946 select_mask = gen_reg_rtx (mode);
947 emit_insn (gen_extend_compare (select_mask, compare_result));
948 }
949 else if (target_size < compare_size)
950 select_mask =
951 gen_rtx_SUBREG (mode, compare_result,
952 (compare_size - target_size) / BITS_PER_UNIT);
953 else if (comp_mode != mode)
954 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
955 else
956 select_mask = compare_result;
957
958 if (GET_MODE (target) != GET_MODE (op_t)
959 || GET_MODE (target) != GET_MODE (op_f))
960 abort ();
961
962 if (reverse_test)
963 emit_insn (gen_selb (target, op_t, op_f, select_mask));
964 else
965 emit_insn (gen_selb (target, op_f, op_t, select_mask));
966 }
967 else
968 {
74f4459c 969 rtx target = operands[0];
644459d0 970 if (reverse_test)
971 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
972 gen_rtx_NOT (comp_mode, compare_result)));
973 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
974 emit_insn (gen_extendhisi2 (target, compare_result));
975 else if (GET_MODE (target) == SImode
976 && GET_MODE (compare_result) == QImode)
977 emit_insn (gen_extend_compare (target, compare_result));
978 else
979 emit_move_insn (target, compare_result);
980 }
981}
982
983HOST_WIDE_INT
984const_double_to_hwint (rtx x)
985{
986 HOST_WIDE_INT val;
987 REAL_VALUE_TYPE rv;
988 if (GET_MODE (x) == SFmode)
989 {
990 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
991 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
992 }
993 else if (GET_MODE (x) == DFmode)
994 {
995 long l[2];
996 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
997 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
998 val = l[0];
999 val = (val << 32) | (l[1] & 0xffffffff);
1000 }
1001 else
1002 abort ();
1003 return val;
1004}
1005
1006rtx
1007hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1008{
1009 long tv[2];
1010 REAL_VALUE_TYPE rv;
1011 gcc_assert (mode == SFmode || mode == DFmode);
1012
1013 if (mode == SFmode)
1014 tv[0] = (v << 32) >> 32;
1015 else if (mode == DFmode)
1016 {
1017 tv[1] = (v << 32) >> 32;
1018 tv[0] = v >> 32;
1019 }
1020 real_from_target (&rv, tv, mode);
1021 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1022}
1023
1024void
1025print_operand_address (FILE * file, register rtx addr)
1026{
1027 rtx reg;
1028 rtx offset;
1029
e04cf423 1030 if (GET_CODE (addr) == AND
1031 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1032 && INTVAL (XEXP (addr, 1)) == -16)
1033 addr = XEXP (addr, 0);
1034
644459d0 1035 switch (GET_CODE (addr))
1036 {
1037 case REG:
1038 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1039 break;
1040
1041 case PLUS:
1042 reg = XEXP (addr, 0);
1043 offset = XEXP (addr, 1);
1044 if (GET_CODE (offset) == REG)
1045 {
1046 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1047 reg_names[REGNO (offset)]);
1048 }
1049 else if (GET_CODE (offset) == CONST_INT)
1050 {
1051 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1052 INTVAL (offset), reg_names[REGNO (reg)]);
1053 }
1054 else
1055 abort ();
1056 break;
1057
1058 case CONST:
1059 case LABEL_REF:
1060 case SYMBOL_REF:
1061 case CONST_INT:
1062 output_addr_const (file, addr);
1063 break;
1064
1065 default:
1066 debug_rtx (addr);
1067 abort ();
1068 }
1069}
1070
1071void
1072print_operand (FILE * file, rtx x, int code)
1073{
1074 enum machine_mode mode = GET_MODE (x);
1075 HOST_WIDE_INT val;
1076 unsigned char arr[16];
1077 int xcode = GET_CODE (x);
dea01258 1078 int i, info;
644459d0 1079 if (GET_MODE (x) == VOIDmode)
1080 switch (code)
1081 {
644459d0 1082 case 'L': /* 128 bits, signed */
1083 case 'm': /* 128 bits, signed */
1084 case 'T': /* 128 bits, signed */
1085 case 't': /* 128 bits, signed */
1086 mode = TImode;
1087 break;
644459d0 1088 case 'K': /* 64 bits, signed */
1089 case 'k': /* 64 bits, signed */
1090 case 'D': /* 64 bits, signed */
1091 case 'd': /* 64 bits, signed */
1092 mode = DImode;
1093 break;
644459d0 1094 case 'J': /* 32 bits, signed */
1095 case 'j': /* 32 bits, signed */
1096 case 's': /* 32 bits, signed */
1097 case 'S': /* 32 bits, signed */
1098 mode = SImode;
1099 break;
1100 }
1101 switch (code)
1102 {
1103
1104 case 'j': /* 32 bits, signed */
1105 case 'k': /* 64 bits, signed */
1106 case 'm': /* 128 bits, signed */
1107 if (xcode == CONST_INT
1108 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1109 {
1110 gcc_assert (logical_immediate_p (x, mode));
1111 constant_to_array (mode, x, arr);
1112 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1113 val = trunc_int_for_mode (val, SImode);
1114 switch (which_logical_immediate (val))
1115 {
1116 case SPU_ORI:
1117 break;
1118 case SPU_ORHI:
1119 fprintf (file, "h");
1120 break;
1121 case SPU_ORBI:
1122 fprintf (file, "b");
1123 break;
1124 default:
1125 gcc_unreachable();
1126 }
1127 }
1128 else
1129 gcc_unreachable();
1130 return;
1131
1132 case 'J': /* 32 bits, signed */
1133 case 'K': /* 64 bits, signed */
1134 case 'L': /* 128 bits, signed */
1135 if (xcode == CONST_INT
1136 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1137 {
1138 gcc_assert (logical_immediate_p (x, mode)
1139 || iohl_immediate_p (x, mode));
1140 constant_to_array (mode, x, arr);
1141 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1142 val = trunc_int_for_mode (val, SImode);
1143 switch (which_logical_immediate (val))
1144 {
1145 case SPU_ORI:
1146 case SPU_IOHL:
1147 break;
1148 case SPU_ORHI:
1149 val = trunc_int_for_mode (val, HImode);
1150 break;
1151 case SPU_ORBI:
1152 val = trunc_int_for_mode (val, QImode);
1153 break;
1154 default:
1155 gcc_unreachable();
1156 }
1157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1158 }
1159 else
1160 gcc_unreachable();
1161 return;
1162
1163 case 't': /* 128 bits, signed */
1164 case 'd': /* 64 bits, signed */
1165 case 's': /* 32 bits, signed */
dea01258 1166 if (CONSTANT_P (x))
644459d0 1167 {
dea01258 1168 enum immediate_class c = classify_immediate (x, mode);
1169 switch (c)
1170 {
1171 case IC_IL1:
1172 constant_to_array (mode, x, arr);
1173 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1174 val = trunc_int_for_mode (val, SImode);
1175 switch (which_immediate_load (val))
1176 {
1177 case SPU_IL:
1178 break;
1179 case SPU_ILA:
1180 fprintf (file, "a");
1181 break;
1182 case SPU_ILH:
1183 fprintf (file, "h");
1184 break;
1185 case SPU_ILHU:
1186 fprintf (file, "hu");
1187 break;
1188 default:
1189 gcc_unreachable ();
1190 }
1191 break;
1192 case IC_CPAT:
1193 constant_to_array (mode, x, arr);
1194 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1195 if (info == 1)
1196 fprintf (file, "b");
1197 else if (info == 2)
1198 fprintf (file, "h");
1199 else if (info == 4)
1200 fprintf (file, "w");
1201 else if (info == 8)
1202 fprintf (file, "d");
1203 break;
1204 case IC_IL1s:
1205 if (xcode == CONST_VECTOR)
1206 {
1207 x = CONST_VECTOR_ELT (x, 0);
1208 xcode = GET_CODE (x);
1209 }
1210 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1211 fprintf (file, "a");
1212 else if (xcode == HIGH)
1213 fprintf (file, "hu");
1214 break;
1215 case IC_FSMBI:
5df189be 1216 case IC_FSMBI2:
dea01258 1217 case IC_IL2:
1218 case IC_IL2s:
1219 case IC_POOL:
1220 abort ();
1221 }
644459d0 1222 }
644459d0 1223 else
1224 gcc_unreachable ();
1225 return;
1226
1227 case 'T': /* 128 bits, signed */
1228 case 'D': /* 64 bits, signed */
1229 case 'S': /* 32 bits, signed */
dea01258 1230 if (CONSTANT_P (x))
644459d0 1231 {
dea01258 1232 enum immediate_class c = classify_immediate (x, mode);
1233 switch (c)
644459d0 1234 {
dea01258 1235 case IC_IL1:
1236 constant_to_array (mode, x, arr);
1237 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1238 val = trunc_int_for_mode (val, SImode);
1239 switch (which_immediate_load (val))
1240 {
1241 case SPU_IL:
1242 case SPU_ILA:
1243 break;
1244 case SPU_ILH:
1245 case SPU_ILHU:
1246 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1247 break;
1248 default:
1249 gcc_unreachable ();
1250 }
1251 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1252 break;
1253 case IC_FSMBI:
1254 constant_to_array (mode, x, arr);
1255 val = 0;
1256 for (i = 0; i < 16; i++)
1257 {
1258 val <<= 1;
1259 val |= arr[i] & 1;
1260 }
1261 print_operand (file, GEN_INT (val), 0);
1262 break;
1263 case IC_CPAT:
1264 constant_to_array (mode, x, arr);
1265 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1266 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1267 break;
dea01258 1268 case IC_IL1s:
dea01258 1269 if (xcode == HIGH)
5df189be 1270 x = XEXP (x, 0);
1271 if (GET_CODE (x) == CONST_VECTOR)
1272 x = CONST_VECTOR_ELT (x, 0);
1273 output_addr_const (file, x);
1274 if (xcode == HIGH)
1275 fprintf (file, "@h");
644459d0 1276 break;
dea01258 1277 case IC_IL2:
1278 case IC_IL2s:
5df189be 1279 case IC_FSMBI2:
dea01258 1280 case IC_POOL:
1281 abort ();
644459d0 1282 }
c8befdb9 1283 }
644459d0 1284 else
1285 gcc_unreachable ();
1286 return;
1287
644459d0 1288 case 'C':
1289 if (xcode == CONST_INT)
1290 {
1291 /* Only 4 least significant bits are relevant for generate
1292 control word instructions. */
1293 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1294 return;
1295 }
1296 break;
1297
1298 case 'M': /* print code for c*d */
1299 if (GET_CODE (x) == CONST_INT)
1300 switch (INTVAL (x))
1301 {
1302 case 1:
1303 fprintf (file, "b");
1304 break;
1305 case 2:
1306 fprintf (file, "h");
1307 break;
1308 case 4:
1309 fprintf (file, "w");
1310 break;
1311 case 8:
1312 fprintf (file, "d");
1313 break;
1314 default:
1315 gcc_unreachable();
1316 }
1317 else
1318 gcc_unreachable();
1319 return;
1320
1321 case 'N': /* Negate the operand */
1322 if (xcode == CONST_INT)
1323 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1324 else if (xcode == CONST_VECTOR)
1325 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1326 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1327 return;
1328
1329 case 'I': /* enable/disable interrupts */
1330 if (xcode == CONST_INT)
1331 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1332 return;
1333
1334 case 'b': /* branch modifiers */
1335 if (xcode == REG)
1336 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1337 else if (COMPARISON_P (x))
1338 fprintf (file, "%s", xcode == NE ? "n" : "");
1339 return;
1340
1341 case 'i': /* indirect call */
1342 if (xcode == MEM)
1343 {
1344 if (GET_CODE (XEXP (x, 0)) == REG)
1345 /* Used in indirect function calls. */
1346 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1347 else
1348 output_address (XEXP (x, 0));
1349 }
1350 return;
1351
1352 case 'p': /* load/store */
1353 if (xcode == MEM)
1354 {
1355 x = XEXP (x, 0);
1356 xcode = GET_CODE (x);
1357 }
e04cf423 1358 if (xcode == AND)
1359 {
1360 x = XEXP (x, 0);
1361 xcode = GET_CODE (x);
1362 }
644459d0 1363 if (xcode == REG)
1364 fprintf (file, "d");
1365 else if (xcode == CONST_INT)
1366 fprintf (file, "a");
1367 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1368 fprintf (file, "r");
1369 else if (xcode == PLUS || xcode == LO_SUM)
1370 {
1371 if (GET_CODE (XEXP (x, 1)) == REG)
1372 fprintf (file, "x");
1373 else
1374 fprintf (file, "d");
1375 }
1376 return;
1377
5df189be 1378 case 'e':
1379 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1380 val &= 0x7;
1381 output_addr_const (file, GEN_INT (val));
1382 return;
1383
1384 case 'f':
1385 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1386 val &= 0x1f;
1387 output_addr_const (file, GEN_INT (val));
1388 return;
1389
1390 case 'g':
1391 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1392 val &= 0x3f;
1393 output_addr_const (file, GEN_INT (val));
1394 return;
1395
1396 case 'h':
1397 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1398 val = (val >> 3) & 0x1f;
1399 output_addr_const (file, GEN_INT (val));
1400 return;
1401
1402 case 'E':
1403 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1404 val = -val;
1405 val &= 0x7;
1406 output_addr_const (file, GEN_INT (val));
1407 return;
1408
1409 case 'F':
1410 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1411 val = -val;
1412 val &= 0x1f;
1413 output_addr_const (file, GEN_INT (val));
1414 return;
1415
1416 case 'G':
1417 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1418 val = -val;
1419 val &= 0x3f;
1420 output_addr_const (file, GEN_INT (val));
1421 return;
1422
1423 case 'H':
1424 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1425 val = -(val & -8ll);
1426 val = (val >> 3) & 0x1f;
1427 output_addr_const (file, GEN_INT (val));
1428 return;
1429
56c7bfc2 1430 case 'v':
1431 case 'w':
1432 constant_to_array (mode, x, arr);
1433 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1434 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1435 return;
1436
644459d0 1437 case 0:
1438 if (xcode == REG)
1439 fprintf (file, "%s", reg_names[REGNO (x)]);
1440 else if (xcode == MEM)
1441 output_address (XEXP (x, 0));
1442 else if (xcode == CONST_VECTOR)
dea01258 1443 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1444 else
1445 output_addr_const (file, x);
1446 return;
1447
f6a0d06f 1448 /* unused letters
56c7bfc2 1449 o qr u yz
5df189be 1450 AB OPQR UVWXYZ */
644459d0 1451 default:
1452 output_operand_lossage ("invalid %%xn code");
1453 }
1454 gcc_unreachable ();
1455}
1456
644459d0 1457/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1458 caller saved register. For leaf functions it is more efficient to
1459 use a volatile register because we won't need to save and restore the
1460 pic register. This routine is only valid after register allocation
1461 is completed, so we can pick an unused register. */
1462static rtx
1463get_pic_reg (void)
1464{
644459d0 1465 if (!reload_completed && !reload_in_progress)
1466 abort ();
5eb28709 1467
1468 /* If we've already made the decision, we need to keep with it. Once we've
1469 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1470 return true since the register is now live; this should not cause us to
1471 "switch back" to using pic_offset_table_rtx. */
1472 if (!cfun->machine->pic_reg)
1473 {
d5bf7b64 1474 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
5eb28709 1475 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1476 else
1477 cfun->machine->pic_reg = pic_offset_table_rtx;
1478 }
1479
1480 return cfun->machine->pic_reg;
644459d0 1481}
1482
5df189be 1483/* Split constant addresses to handle cases that are too large.
1484 Add in the pic register when in PIC mode.
1485 Split immediates that require more than 1 instruction. */
dea01258 1486int
1487spu_split_immediate (rtx * ops)
c8befdb9 1488{
dea01258 1489 enum machine_mode mode = GET_MODE (ops[0]);
1490 enum immediate_class c = classify_immediate (ops[1], mode);
1491
1492 switch (c)
c8befdb9 1493 {
dea01258 1494 case IC_IL2:
1495 {
1496 unsigned char arrhi[16];
1497 unsigned char arrlo[16];
98bbec1e 1498 rtx to, temp, hi, lo;
dea01258 1499 int i;
98bbec1e 1500 enum machine_mode imode = mode;
1501 /* We need to do reals as ints because the constant used in the
1502 IOR might not be a legitimate real constant. */
1503 imode = int_mode_for_mode (mode);
dea01258 1504 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1505 if (imode != mode)
1506 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1507 else
1508 to = ops[0];
1509 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1510 for (i = 0; i < 16; i += 4)
1511 {
1512 arrlo[i + 2] = arrhi[i + 2];
1513 arrlo[i + 3] = arrhi[i + 3];
1514 arrlo[i + 0] = arrlo[i + 1] = 0;
1515 arrhi[i + 2] = arrhi[i + 3] = 0;
1516 }
98bbec1e 1517 hi = array_to_constant (imode, arrhi);
1518 lo = array_to_constant (imode, arrlo);
1519 emit_move_insn (temp, hi);
dea01258 1520 emit_insn (gen_rtx_SET
98bbec1e 1521 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1522 return 1;
1523 }
5df189be 1524 case IC_FSMBI2:
1525 {
1526 unsigned char arr_fsmbi[16];
1527 unsigned char arr_andbi[16];
1528 rtx to, reg_fsmbi, reg_and;
1529 int i;
1530 enum machine_mode imode = mode;
1531 /* We need to do reals as ints because the constant used in the
1532 * AND might not be a legitimate real constant. */
1533 imode = int_mode_for_mode (mode);
1534 constant_to_array (mode, ops[1], arr_fsmbi);
1535 if (imode != mode)
1536 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1537 else
1538 to = ops[0];
1539 for (i = 0; i < 16; i++)
1540 if (arr_fsmbi[i] != 0)
1541 {
1542 arr_andbi[0] = arr_fsmbi[i];
1543 arr_fsmbi[i] = 0xff;
1544 }
1545 for (i = 1; i < 16; i++)
1546 arr_andbi[i] = arr_andbi[0];
1547 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1548 reg_and = array_to_constant (imode, arr_andbi);
1549 emit_move_insn (to, reg_fsmbi);
1550 emit_insn (gen_rtx_SET
1551 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1552 return 1;
1553 }
dea01258 1554 case IC_POOL:
1555 if (reload_in_progress || reload_completed)
1556 {
1557 rtx mem = force_const_mem (mode, ops[1]);
1558 if (TARGET_LARGE_MEM)
1559 {
1560 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1561 emit_move_insn (addr, XEXP (mem, 0));
1562 mem = replace_equiv_address (mem, addr);
1563 }
1564 emit_move_insn (ops[0], mem);
1565 return 1;
1566 }
1567 break;
1568 case IC_IL1s:
1569 case IC_IL2s:
1570 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1571 {
1572 if (c == IC_IL2s)
1573 {
5df189be 1574 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1575 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1576 }
1577 else if (flag_pic)
1578 emit_insn (gen_pic (ops[0], ops[1]));
1579 if (flag_pic)
1580 {
1581 rtx pic_reg = get_pic_reg ();
1582 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
dea01258 1583 }
1584 return flag_pic || c == IC_IL2s;
1585 }
1586 break;
1587 case IC_IL1:
1588 case IC_FSMBI:
1589 case IC_CPAT:
1590 break;
c8befdb9 1591 }
dea01258 1592 return 0;
c8befdb9 1593}
1594
644459d0 1595/* SAVING is TRUE when we are generating the actual load and store
1596 instructions for REGNO. When determining the size of the stack
1597 needed for saving register we must allocate enough space for the
1598 worst case, because we don't always have the information early enough
1599 to not allocate it. But we can at least eliminate the actual loads
1600 and stores during the prologue/epilogue. */
1601static int
1602need_to_save_reg (int regno, int saving)
1603{
3072d30e 1604 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1605 return 1;
1606 if (flag_pic
1607 && regno == PIC_OFFSET_TABLE_REGNUM
5eb28709 1608 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
644459d0 1609 return 1;
1610 return 0;
1611}
1612
1613/* This function is only correct starting with local register
1614 allocation */
1615int
1616spu_saved_regs_size (void)
1617{
1618 int reg_save_size = 0;
1619 int regno;
1620
1621 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1622 if (need_to_save_reg (regno, 0))
1623 reg_save_size += 0x10;
1624 return reg_save_size;
1625}
1626
1627static rtx
1628frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1629{
1630 rtx reg = gen_rtx_REG (V4SImode, regno);
1631 rtx mem =
1632 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1633 return emit_insn (gen_movv4si (mem, reg));
1634}
1635
1636static rtx
1637frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1638{
1639 rtx reg = gen_rtx_REG (V4SImode, regno);
1640 rtx mem =
1641 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1642 return emit_insn (gen_movv4si (reg, mem));
1643}
1644
1645/* This happens after reload, so we need to expand it. */
1646static rtx
1647frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1648{
1649 rtx insn;
1650 if (satisfies_constraint_K (GEN_INT (imm)))
1651 {
1652 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1653 }
1654 else
1655 {
3072d30e 1656 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1657 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1658 if (REGNO (src) == REGNO (scratch))
1659 abort ();
1660 }
644459d0 1661 return insn;
1662}
1663
1664/* Return nonzero if this function is known to have a null epilogue. */
1665
1666int
1667direct_return (void)
1668{
1669 if (reload_completed)
1670 {
1671 if (cfun->static_chain_decl == 0
1672 && (spu_saved_regs_size ()
1673 + get_frame_size ()
abe32cce 1674 + crtl->outgoing_args_size
1675 + crtl->args.pretend_args_size == 0)
d5bf7b64 1676 && crtl->is_leaf)
644459d0 1677 return 1;
1678 }
1679 return 0;
1680}
1681
1682/*
1683 The stack frame looks like this:
1684 +-------------+
1685 | incoming |
a8e019fa 1686 | args |
1687 AP -> +-------------+
644459d0 1688 | $lr save |
1689 +-------------+
1690 prev SP | back chain |
1691 +-------------+
1692 | var args |
abe32cce 1693 | reg save | crtl->args.pretend_args_size bytes
644459d0 1694 +-------------+
1695 | ... |
1696 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1697 FP -> +-------------+
644459d0 1698 | ... |
a8e019fa 1699 | vars | get_frame_size() bytes
1700 HFP -> +-------------+
644459d0 1701 | ... |
1702 | outgoing |
abe32cce 1703 | args | crtl->outgoing_args_size bytes
644459d0 1704 +-------------+
1705 | $lr of next |
1706 | frame |
1707 +-------------+
a8e019fa 1708 | back chain |
1709 SP -> +-------------+
644459d0 1710
1711*/
1712void
1713spu_expand_prologue (void)
1714{
1715 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1716 HOST_WIDE_INT total_size;
1717 HOST_WIDE_INT saved_regs_size;
1718 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1719 rtx scratch_reg_0, scratch_reg_1;
1720 rtx insn, real;
1721
5eb28709 1722 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1723 cfun->machine->pic_reg = pic_offset_table_rtx;
644459d0 1724
1725 if (spu_naked_function_p (current_function_decl))
1726 return;
1727
1728 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1729 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1730
1731 saved_regs_size = spu_saved_regs_size ();
1732 total_size = size + saved_regs_size
abe32cce 1733 + crtl->outgoing_args_size
1734 + crtl->args.pretend_args_size;
644459d0 1735
d5bf7b64 1736 if (!crtl->is_leaf
18d50ae6 1737 || cfun->calls_alloca || total_size > 0)
644459d0 1738 total_size += STACK_POINTER_OFFSET;
1739
1740 /* Save this first because code after this might use the link
1741 register as a scratch register. */
d5bf7b64 1742 if (!crtl->is_leaf)
644459d0 1743 {
1744 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1745 RTX_FRAME_RELATED_P (insn) = 1;
1746 }
1747
1748 if (total_size > 0)
1749 {
abe32cce 1750 offset = -crtl->args.pretend_args_size;
644459d0 1751 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1752 if (need_to_save_reg (regno, 1))
1753 {
1754 offset -= 16;
1755 insn = frame_emit_store (regno, sp_reg, offset);
1756 RTX_FRAME_RELATED_P (insn) = 1;
1757 }
1758 }
1759
5eb28709 1760 if (flag_pic && cfun->machine->pic_reg)
644459d0 1761 {
5eb28709 1762 rtx pic_reg = cfun->machine->pic_reg;
644459d0 1763 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1764 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1765 }
1766
1767 if (total_size > 0)
1768 {
1769 if (flag_stack_check)
1770 {
d819917f 1771 /* We compare against total_size-1 because
644459d0 1772 ($sp >= total_size) <=> ($sp > total_size-1) */
1773 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1774 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1775 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1776 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1777 {
1778 emit_move_insn (scratch_v4si, size_v4si);
1779 size_v4si = scratch_v4si;
1780 }
1781 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1782 emit_insn (gen_vec_extractv4si
1783 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1784 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1785 }
1786
1787 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1788 the value of the previous $sp because we save it as the back
1789 chain. */
1790 if (total_size <= 2000)
1791 {
1792 /* In this case we save the back chain first. */
1793 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1794 insn =
1795 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1796 }
644459d0 1797 else
1798 {
1799 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1800 insn =
1801 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1802 }
1803 RTX_FRAME_RELATED_P (insn) = 1;
1804 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1805 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1806
1807 if (total_size > 2000)
1808 {
1809 /* Save the back chain ptr */
1810 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1811 }
1812
1813 if (frame_pointer_needed)
1814 {
1815 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1816 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1817 + crtl->outgoing_args_size;
644459d0 1818 /* Set the new frame_pointer */
d8dfeb55 1819 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1820 RTX_FRAME_RELATED_P (insn) = 1;
1821 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1822 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1823 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1824 }
1825 }
1826
8c0dd614 1827 if (flag_stack_usage_info)
a512540d 1828 current_function_static_stack_size = total_size;
644459d0 1829}
1830
1831void
1832spu_expand_epilogue (bool sibcall_p)
1833{
1834 int size = get_frame_size (), offset, regno;
1835 HOST_WIDE_INT saved_regs_size, total_size;
1836 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 1837 rtx scratch_reg_0;
644459d0 1838
644459d0 1839 if (spu_naked_function_p (current_function_decl))
1840 return;
1841
1842 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1843
1844 saved_regs_size = spu_saved_regs_size ();
1845 total_size = size + saved_regs_size
abe32cce 1846 + crtl->outgoing_args_size
1847 + crtl->args.pretend_args_size;
644459d0 1848
d5bf7b64 1849 if (!crtl->is_leaf
18d50ae6 1850 || cfun->calls_alloca || total_size > 0)
644459d0 1851 total_size += STACK_POINTER_OFFSET;
1852
1853 if (total_size > 0)
1854 {
18d50ae6 1855 if (cfun->calls_alloca)
644459d0 1856 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1857 else
1858 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1859
1860
1861 if (saved_regs_size > 0)
1862 {
abe32cce 1863 offset = -crtl->args.pretend_args_size;
644459d0 1864 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1865 if (need_to_save_reg (regno, 1))
1866 {
1867 offset -= 0x10;
1868 frame_emit_load (regno, sp_reg, offset);
1869 }
1870 }
1871 }
1872
d5bf7b64 1873 if (!crtl->is_leaf)
644459d0 1874 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1875
1876 if (!sibcall_p)
1877 {
18b42941 1878 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 1879 emit_jump_insn (gen__return ());
644459d0 1880 }
644459d0 1881}
1882
1883rtx
1884spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1885{
1886 if (count != 0)
1887 return 0;
1888 /* This is inefficient because it ends up copying to a save-register
1889 which then gets saved even though $lr has already been saved. But
1890 it does generate better code for leaf functions and we don't need
1891 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1892 used for __builtin_return_address anyway, so maybe we don't care if
1893 it's inefficient. */
1894 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1895}
1896\f
1897
1898/* Given VAL, generate a constant appropriate for MODE.
1899 If MODE is a vector mode, every element will be VAL.
1900 For TImode, VAL will be zero extended to 128 bits. */
1901rtx
1902spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1903{
1904 rtx inner;
1905 rtvec v;
1906 int units, i;
1907
1908 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1909 || GET_MODE_CLASS (mode) == MODE_FLOAT
1910 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1911 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1912
1913 if (GET_MODE_CLASS (mode) == MODE_INT)
1914 return immed_double_const (val, 0, mode);
1915
1916 /* val is the bit representation of the float */
1917 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1918 return hwint_to_const_double (mode, val);
1919
1920 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1921 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1922 else
1923 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1924
1925 units = GET_MODE_NUNITS (mode);
1926
1927 v = rtvec_alloc (units);
1928
1929 for (i = 0; i < units; ++i)
1930 RTVEC_ELT (v, i) = inner;
1931
1932 return gen_rtx_CONST_VECTOR (mode, v);
1933}
644459d0 1934
5474166e 1935/* Create a MODE vector constant from 4 ints. */
1936rtx
1937spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1938{
1939 unsigned char arr[16];
1940 arr[0] = (a >> 24) & 0xff;
1941 arr[1] = (a >> 16) & 0xff;
1942 arr[2] = (a >> 8) & 0xff;
1943 arr[3] = (a >> 0) & 0xff;
1944 arr[4] = (b >> 24) & 0xff;
1945 arr[5] = (b >> 16) & 0xff;
1946 arr[6] = (b >> 8) & 0xff;
1947 arr[7] = (b >> 0) & 0xff;
1948 arr[8] = (c >> 24) & 0xff;
1949 arr[9] = (c >> 16) & 0xff;
1950 arr[10] = (c >> 8) & 0xff;
1951 arr[11] = (c >> 0) & 0xff;
1952 arr[12] = (d >> 24) & 0xff;
1953 arr[13] = (d >> 16) & 0xff;
1954 arr[14] = (d >> 8) & 0xff;
1955 arr[15] = (d >> 0) & 0xff;
1956 return array_to_constant(mode, arr);
1957}
5a976006 1958\f
1959/* branch hint stuff */
5474166e 1960
644459d0 1961/* An array of these is used to propagate hints to predecessor blocks. */
1962struct spu_bb_info
1963{
5a976006 1964 rtx prop_jump; /* propagated from another block */
1965 int bb_index; /* the original block. */
644459d0 1966};
5a976006 1967static struct spu_bb_info *spu_bb_info;
644459d0 1968
5a976006 1969#define STOP_HINT_P(INSN) \
aa90bb35 1970 (CALL_P(INSN) \
5a976006 1971 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1972 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1973
1974/* 1 when RTX is a hinted branch or its target. We keep track of
1975 what has been hinted so the safe-hint code can test it easily. */
1976#define HINTED_P(RTX) \
1977 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1978
1979/* 1 when RTX is an insn that must be scheduled on an even boundary. */
1980#define SCHED_ON_EVEN_P(RTX) \
1981 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1982
1983/* Emit a nop for INSN such that the two will dual issue. This assumes
1984 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1985 We check for TImode to handle a MULTI1 insn which has dual issued its
b1135d9a 1986 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
5a976006 1987static void
1988emit_nop_for_insn (rtx insn)
644459d0 1989{
5a976006 1990 int p;
1991 rtx new_insn;
b1135d9a 1992
1993 /* We need to handle JUMP_TABLE_DATA separately. */
1994 if (JUMP_TABLE_DATA_P (insn))
1995 {
1996 new_insn = emit_insn_after (gen_lnop(), insn);
1997 recog_memoized (new_insn);
1998 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1999 return;
2000 }
2001
5a976006 2002 p = get_pipe (insn);
2003 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2004 new_insn = emit_insn_after (gen_lnop (), insn);
2005 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2006 {
5a976006 2007 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2008 PUT_MODE (new_insn, TImode);
2009 PUT_MODE (insn, VOIDmode);
2010 }
2011 else
2012 new_insn = emit_insn_after (gen_lnop (), insn);
2013 recog_memoized (new_insn);
d53c050c 2014 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
5a976006 2015}
2016
2017/* Insert nops in basic blocks to meet dual issue alignment
2018 requirements. Also make sure hbrp and hint instructions are at least
2019 one cycle apart, possibly inserting a nop. */
2020static void
2021pad_bb(void)
2022{
2023 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2024 int length;
2025 int addr;
2026
2027 /* This sets up INSN_ADDRESSES. */
2028 shorten_branches (get_insns ());
2029
2030 /* Keep track of length added by nops. */
2031 length = 0;
2032
2033 prev_insn = 0;
2034 insn = get_insns ();
2035 if (!active_insn_p (insn))
2036 insn = next_active_insn (insn);
2037 for (; insn; insn = next_insn)
2038 {
2039 next_insn = next_active_insn (insn);
2040 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2041 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2042 {
5a976006 2043 if (hbr_insn)
2044 {
2045 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2046 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2047 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2048 || (a1 - a0 == 4))
2049 {
2050 prev_insn = emit_insn_before (gen_lnop (), insn);
2051 PUT_MODE (prev_insn, GET_MODE (insn));
2052 PUT_MODE (insn, TImode);
d53c050c 2053 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
5a976006 2054 length += 4;
2055 }
2056 }
2057 hbr_insn = insn;
2058 }
2059 if (INSN_CODE (insn) == CODE_FOR_blockage)
2060 {
2061 if (GET_MODE (insn) == TImode)
2062 PUT_MODE (next_insn, TImode);
2063 insn = next_insn;
2064 next_insn = next_active_insn (insn);
2065 }
2066 addr = INSN_ADDRESSES (INSN_UID (insn));
2067 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2068 {
2069 if (((addr + length) & 7) != 0)
2070 {
2071 emit_nop_for_insn (prev_insn);
2072 length += 4;
2073 }
644459d0 2074 }
5a976006 2075 else if (GET_MODE (insn) == TImode
2076 && ((next_insn && GET_MODE (next_insn) != TImode)
2077 || get_attr_type (insn) == TYPE_MULTI0)
2078 && ((addr + length) & 7) != 0)
2079 {
2080 /* prev_insn will always be set because the first insn is
2081 always 8-byte aligned. */
2082 emit_nop_for_insn (prev_insn);
2083 length += 4;
2084 }
2085 prev_insn = insn;
644459d0 2086 }
644459d0 2087}
2088
5a976006 2089\f
2090/* Routines for branch hints. */
2091
644459d0 2092static void
5a976006 2093spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2094 int distance, sbitmap blocks)
644459d0 2095{
5a976006 2096 rtx branch_label = 0;
2097 rtx hint;
2098 rtx insn;
2099 rtx table;
644459d0 2100
2101 if (before == 0 || branch == 0 || target == 0)
2102 return;
2103
5a976006 2104 /* While scheduling we require hints to be no further than 600, so
2105 we need to enforce that here too */
644459d0 2106 if (distance > 600)
2107 return;
2108
5a976006 2109 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2110 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2111 before = NEXT_INSN (before);
644459d0 2112
2113 branch_label = gen_label_rtx ();
2114 LABEL_NUSES (branch_label)++;
2115 LABEL_PRESERVE_P (branch_label) = 1;
2116 insn = emit_label_before (branch_label, branch);
2117 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
08b7917c 2118 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
5a976006 2119
2120 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2121 recog_memoized (hint);
d53c050c 2122 INSN_LOCATION (hint) = INSN_LOCATION (branch);
5a976006 2123 HINTED_P (branch) = 1;
644459d0 2124
5a976006 2125 if (GET_CODE (target) == LABEL_REF)
2126 HINTED_P (XEXP (target, 0)) = 1;
2127 else if (tablejump_p (branch, 0, &table))
644459d0 2128 {
5a976006 2129 rtvec vec;
2130 int j;
2131 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2132 vec = XVEC (PATTERN (table), 0);
2133 else
2134 vec = XVEC (PATTERN (table), 1);
2135 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2136 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2137 }
5a976006 2138
2139 if (distance >= 588)
644459d0 2140 {
5a976006 2141 /* Make sure the hint isn't scheduled any earlier than this point,
2142 which could make it too far for the branch offest to fit */
2fbdf9ef 2143 insn = emit_insn_before (gen_blockage (), hint);
2144 recog_memoized (insn);
d53c050c 2145 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2146 }
2147 else if (distance <= 8 * 4)
2148 {
2149 /* To guarantee at least 8 insns between the hint and branch we
2150 insert nops. */
2151 int d;
2152 for (d = distance; d < 8 * 4; d += 4)
2153 {
2154 insn =
2155 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2156 recog_memoized (insn);
d53c050c 2157 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2158 }
2159
2160 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2161 insn = emit_insn_after (gen_blockage (), hint);
2162 recog_memoized (insn);
d53c050c 2163 INSN_LOCATION (insn) = INSN_LOCATION (hint);
5a976006 2164
2165 /* Make sure any nops inserted aren't scheduled after the call. */
2166 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2167 {
2168 insn = emit_insn_before (gen_blockage (), branch);
2169 recog_memoized (insn);
d53c050c 2170 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2fbdf9ef 2171 }
644459d0 2172 }
644459d0 2173}
2174
2175/* Returns 0 if we don't want a hint for this branch. Otherwise return
2176 the rtx for the branch target. */
2177static rtx
2178get_branch_target (rtx branch)
2179{
aa90bb35 2180 if (JUMP_P (branch))
644459d0 2181 {
2182 rtx set, src;
2183
2184 /* Return statements */
2185 if (GET_CODE (PATTERN (branch)) == RETURN)
2186 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2187
fcc31b99 2188 /* ASM GOTOs. */
604157f6 2189 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2190 return NULL;
2191
644459d0 2192 set = single_set (branch);
2193 src = SET_SRC (set);
2194 if (GET_CODE (SET_DEST (set)) != PC)
2195 abort ();
2196
2197 if (GET_CODE (src) == IF_THEN_ELSE)
2198 {
2199 rtx lab = 0;
2200 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2201 if (note)
2202 {
2203 /* If the more probable case is not a fall through, then
2204 try a branch hint. */
9eb946de 2205 int prob = XINT (note, 0);
644459d0 2206 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2207 && GET_CODE (XEXP (src, 1)) != PC)
2208 lab = XEXP (src, 1);
2209 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2210 && GET_CODE (XEXP (src, 2)) != PC)
2211 lab = XEXP (src, 2);
2212 }
2213 if (lab)
2214 {
2215 if (GET_CODE (lab) == RETURN)
2216 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2217 return lab;
2218 }
2219 return 0;
2220 }
2221
2222 return src;
2223 }
aa90bb35 2224 else if (CALL_P (branch))
644459d0 2225 {
2226 rtx call;
2227 /* All of our call patterns are in a PARALLEL and the CALL is
2228 the first pattern in the PARALLEL. */
2229 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2230 abort ();
2231 call = XVECEXP (PATTERN (branch), 0, 0);
2232 if (GET_CODE (call) == SET)
2233 call = SET_SRC (call);
2234 if (GET_CODE (call) != CALL)
2235 abort ();
2236 return XEXP (XEXP (call, 0), 0);
2237 }
2238 return 0;
2239}
2240
5a976006 2241/* The special $hbr register is used to prevent the insn scheduler from
2242 moving hbr insns across instructions which invalidate them. It
2243 should only be used in a clobber, and this function searches for
2244 insns which clobber it. */
2245static bool
2246insn_clobbers_hbr (rtx insn)
2247{
2248 if (INSN_P (insn)
2249 && GET_CODE (PATTERN (insn)) == PARALLEL)
2250 {
2251 rtx parallel = PATTERN (insn);
2252 rtx clobber;
2253 int j;
2254 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2255 {
2256 clobber = XVECEXP (parallel, 0, j);
2257 if (GET_CODE (clobber) == CLOBBER
2258 && GET_CODE (XEXP (clobber, 0)) == REG
2259 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2260 return 1;
2261 }
2262 }
2263 return 0;
2264}
2265
2266/* Search up to 32 insns starting at FIRST:
2267 - at any kind of hinted branch, just return
2268 - at any unconditional branch in the first 15 insns, just return
2269 - at a call or indirect branch, after the first 15 insns, force it to
2270 an even address and return
2271 - at any unconditional branch, after the first 15 insns, force it to
2272 an even address.
2273 At then end of the search, insert an hbrp within 4 insns of FIRST,
2274 and an hbrp within 16 instructions of FIRST.
2275 */
644459d0 2276static void
5a976006 2277insert_hbrp_for_ilb_runout (rtx first)
644459d0 2278{
5a976006 2279 rtx insn, before_4 = 0, before_16 = 0;
2280 int addr = 0, length, first_addr = -1;
2281 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2282 int insert_lnop_after = 0;
2283 for (insn = first; insn; insn = NEXT_INSN (insn))
2284 if (INSN_P (insn))
2285 {
2286 if (first_addr == -1)
2287 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2288 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2289 length = get_attr_length (insn);
2290
2291 if (before_4 == 0 && addr + length >= 4 * 4)
2292 before_4 = insn;
2293 /* We test for 14 instructions because the first hbrp will add
2294 up to 2 instructions. */
2295 if (before_16 == 0 && addr + length >= 14 * 4)
2296 before_16 = insn;
2297
2298 if (INSN_CODE (insn) == CODE_FOR_hbr)
2299 {
2300 /* Make sure an hbrp is at least 2 cycles away from a hint.
2301 Insert an lnop after the hbrp when necessary. */
2302 if (before_4 == 0 && addr > 0)
2303 {
2304 before_4 = insn;
2305 insert_lnop_after |= 1;
2306 }
2307 else if (before_4 && addr <= 4 * 4)
2308 insert_lnop_after |= 1;
2309 if (before_16 == 0 && addr > 10 * 4)
2310 {
2311 before_16 = insn;
2312 insert_lnop_after |= 2;
2313 }
2314 else if (before_16 && addr <= 14 * 4)
2315 insert_lnop_after |= 2;
2316 }
644459d0 2317
5a976006 2318 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2319 {
2320 if (addr < hbrp_addr0)
2321 hbrp_addr0 = addr;
2322 else if (addr < hbrp_addr1)
2323 hbrp_addr1 = addr;
2324 }
644459d0 2325
5a976006 2326 if (CALL_P (insn) || JUMP_P (insn))
2327 {
2328 if (HINTED_P (insn))
2329 return;
2330
2331 /* Any branch after the first 15 insns should be on an even
2332 address to avoid a special case branch. There might be
2333 some nops and/or hbrps inserted, so we test after 10
2334 insns. */
2335 if (addr > 10 * 4)
2336 SCHED_ON_EVEN_P (insn) = 1;
2337 }
644459d0 2338
5a976006 2339 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2340 return;
2341
2342
2343 if (addr + length >= 32 * 4)
644459d0 2344 {
5a976006 2345 gcc_assert (before_4 && before_16);
2346 if (hbrp_addr0 > 4 * 4)
644459d0 2347 {
5a976006 2348 insn =
2349 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2350 recog_memoized (insn);
d53c050c 2351 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2352 INSN_ADDRESSES_NEW (insn,
2353 INSN_ADDRESSES (INSN_UID (before_4)));
2354 PUT_MODE (insn, GET_MODE (before_4));
2355 PUT_MODE (before_4, TImode);
2356 if (insert_lnop_after & 1)
644459d0 2357 {
5a976006 2358 insn = emit_insn_before (gen_lnop (), before_4);
2359 recog_memoized (insn);
d53c050c 2360 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
5a976006 2361 INSN_ADDRESSES_NEW (insn,
2362 INSN_ADDRESSES (INSN_UID (before_4)));
2363 PUT_MODE (insn, TImode);
644459d0 2364 }
644459d0 2365 }
5a976006 2366 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2367 && hbrp_addr1 > 16 * 4)
644459d0 2368 {
5a976006 2369 insn =
2370 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2371 recog_memoized (insn);
d53c050c 2372 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2373 INSN_ADDRESSES_NEW (insn,
2374 INSN_ADDRESSES (INSN_UID (before_16)));
2375 PUT_MODE (insn, GET_MODE (before_16));
2376 PUT_MODE (before_16, TImode);
2377 if (insert_lnop_after & 2)
644459d0 2378 {
5a976006 2379 insn = emit_insn_before (gen_lnop (), before_16);
2380 recog_memoized (insn);
d53c050c 2381 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
5a976006 2382 INSN_ADDRESSES_NEW (insn,
2383 INSN_ADDRESSES (INSN_UID
2384 (before_16)));
2385 PUT_MODE (insn, TImode);
644459d0 2386 }
2387 }
5a976006 2388 return;
644459d0 2389 }
644459d0 2390 }
5a976006 2391 else if (BARRIER_P (insn))
2392 return;
644459d0 2393
644459d0 2394}
5a976006 2395
2396/* The SPU might hang when it executes 48 inline instructions after a
2397 hinted branch jumps to its hinted target. The beginning of a
851d9296 2398 function and the return from a call might have been hinted, and
2399 must be handled as well. To prevent a hang we insert 2 hbrps. The
2400 first should be within 6 insns of the branch target. The second
2401 should be within 22 insns of the branch target. When determining
2402 if hbrps are necessary, we look for only 32 inline instructions,
2403 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2404 when inserting new hbrps, we insert them within 4 and 16 insns of
2405 the target. */
644459d0 2406static void
5a976006 2407insert_hbrp (void)
644459d0 2408{
5a976006 2409 rtx insn;
2410 if (TARGET_SAFE_HINTS)
644459d0 2411 {
5a976006 2412 shorten_branches (get_insns ());
2413 /* Insert hbrp at beginning of function */
2414 insn = next_active_insn (get_insns ());
2415 if (insn)
2416 insert_hbrp_for_ilb_runout (insn);
2417 /* Insert hbrp after hinted targets. */
2418 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2419 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2420 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2421 }
644459d0 2422}
2423
5a976006 2424static int in_spu_reorg;
2425
8a42230a 2426static void
2427spu_var_tracking (void)
2428{
2429 if (flag_var_tracking)
2430 {
2431 df_analyze ();
2432 timevar_push (TV_VAR_TRACKING);
2433 variable_tracking_main ();
2434 timevar_pop (TV_VAR_TRACKING);
2435 df_finish_pass (false);
2436 }
2437}
2438
5a976006 2439/* Insert branch hints. There are no branch optimizations after this
2440 pass, so it's safe to set our branch hints now. */
644459d0 2441static void
5a976006 2442spu_machine_dependent_reorg (void)
644459d0 2443{
5a976006 2444 sbitmap blocks;
2445 basic_block bb;
2446 rtx branch, insn;
2447 rtx branch_target = 0;
2448 int branch_addr = 0, insn_addr, required_dist = 0;
2449 int i;
2450 unsigned int j;
644459d0 2451
5a976006 2452 if (!TARGET_BRANCH_HINTS || optimize == 0)
2453 {
2454 /* We still do it for unoptimized code because an external
2455 function might have hinted a call or return. */
a54ca889 2456 compute_bb_for_insn ();
5a976006 2457 insert_hbrp ();
2458 pad_bb ();
8a42230a 2459 spu_var_tracking ();
a54ca889 2460 free_bb_for_insn ();
5a976006 2461 return;
2462 }
644459d0 2463
5a976006 2464 blocks = sbitmap_alloc (last_basic_block);
53c5d9d4 2465 bitmap_clear (blocks);
644459d0 2466
5a976006 2467 in_spu_reorg = 1;
2468 compute_bb_for_insn ();
2469
a7a0184d 2470 /* (Re-)discover loops so that bb->loop_father can be used
2471 in the analysis below. */
2472 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2473
5a976006 2474 compact_blocks ();
2475
2476 spu_bb_info =
a28770e1 2477 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
5a976006 2478 sizeof (struct spu_bb_info));
2479
2480 /* We need exact insn addresses and lengths. */
2481 shorten_branches (get_insns ());
2482
a28770e1 2483 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
644459d0 2484 {
5a976006 2485 bb = BASIC_BLOCK (i);
2486 branch = 0;
2487 if (spu_bb_info[i].prop_jump)
644459d0 2488 {
5a976006 2489 branch = spu_bb_info[i].prop_jump;
2490 branch_target = get_branch_target (branch);
2491 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2492 required_dist = spu_hint_dist;
2493 }
2494 /* Search from end of a block to beginning. In this loop, find
2495 jumps which need a branch and emit them only when:
2496 - it's an indirect branch and we're at the insn which sets
2497 the register
2498 - we're at an insn that will invalidate the hint. e.g., a
2499 call, another hint insn, inline asm that clobbers $hbr, and
2500 some inlined operations (divmodsi4). Don't consider jumps
2501 because they are only at the end of a block and are
2502 considered when we are deciding whether to propagate
2503 - we're getting too far away from the branch. The hbr insns
2504 only have a signed 10 bit offset
2505 We go back as far as possible so the branch will be considered
2506 for propagation when we get to the beginning of the block. */
2507 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2508 {
2509 if (INSN_P (insn))
2510 {
2511 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2512 if (branch
2513 && ((GET_CODE (branch_target) == REG
2514 && set_of (branch_target, insn) != NULL_RTX)
2515 || insn_clobbers_hbr (insn)
2516 || branch_addr - insn_addr > 600))
2517 {
2518 rtx next = NEXT_INSN (insn);
2519 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2520 if (insn != BB_END (bb)
2521 && branch_addr - next_addr >= required_dist)
2522 {
2523 if (dump_file)
2524 fprintf (dump_file,
2525 "hint for %i in block %i before %i\n",
2526 INSN_UID (branch), bb->index,
2527 INSN_UID (next));
2528 spu_emit_branch_hint (next, branch, branch_target,
2529 branch_addr - next_addr, blocks);
2530 }
2531 branch = 0;
2532 }
2533
2534 /* JUMP_P will only be true at the end of a block. When
2535 branch is already set it means we've previously decided
2536 to propagate a hint for that branch into this block. */
2537 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2538 {
2539 branch = 0;
2540 if ((branch_target = get_branch_target (insn)))
2541 {
2542 branch = insn;
2543 branch_addr = insn_addr;
2544 required_dist = spu_hint_dist;
2545 }
2546 }
2547 }
2548 if (insn == BB_HEAD (bb))
2549 break;
2550 }
2551
2552 if (branch)
2553 {
2554 /* If we haven't emitted a hint for this branch yet, it might
2555 be profitable to emit it in one of the predecessor blocks,
2556 especially for loops. */
2557 rtx bbend;
2558 basic_block prev = 0, prop = 0, prev2 = 0;
2559 int loop_exit = 0, simple_loop = 0;
2560 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2561
2562 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2563 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2564 prev = EDGE_PRED (bb, j)->src;
2565 else
2566 prev2 = EDGE_PRED (bb, j)->src;
2567
2568 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2569 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2570 loop_exit = 1;
2571 else if (EDGE_SUCC (bb, j)->dest == bb)
2572 simple_loop = 1;
2573
2574 /* If this branch is a loop exit then propagate to previous
2575 fallthru block. This catches the cases when it is a simple
2576 loop or when there is an initial branch into the loop. */
2577 if (prev && (loop_exit || simple_loop)
a7a0184d 2578 && bb_loop_depth (prev) <= bb_loop_depth (bb))
5a976006 2579 prop = prev;
2580
2581 /* If there is only one adjacent predecessor. Don't propagate
a7a0184d 2582 outside this loop. */
5a976006 2583 else if (prev && single_pred_p (bb)
a7a0184d 2584 && prev->loop_father == bb->loop_father)
5a976006 2585 prop = prev;
2586
2587 /* If this is the JOIN block of a simple IF-THEN then
9d75589a 2588 propagate the hint to the HEADER block. */
5a976006 2589 else if (prev && prev2
2590 && EDGE_COUNT (bb->preds) == 2
2591 && EDGE_COUNT (prev->preds) == 1
2592 && EDGE_PRED (prev, 0)->src == prev2
a7a0184d 2593 && prev2->loop_father == bb->loop_father
5a976006 2594 && GET_CODE (branch_target) != REG)
2595 prop = prev;
2596
2597 /* Don't propagate when:
2598 - this is a simple loop and the hint would be too far
2599 - this is not a simple loop and there are 16 insns in
2600 this block already
2601 - the predecessor block ends in a branch that will be
2602 hinted
2603 - the predecessor block ends in an insn that invalidates
2604 the hint */
2605 if (prop
2606 && prop->index >= 0
2607 && (bbend = BB_END (prop))
2608 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2609 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2610 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2611 {
2612 if (dump_file)
2613 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2614 "for %i (loop_exit %i simple_loop %i dist %i)\n",
a7a0184d 2615 bb->index, prop->index, bb_loop_depth (bb),
5a976006 2616 INSN_UID (branch), loop_exit, simple_loop,
2617 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2618
2619 spu_bb_info[prop->index].prop_jump = branch;
2620 spu_bb_info[prop->index].bb_index = i;
2621 }
2622 else if (branch_addr - next_addr >= required_dist)
2623 {
2624 if (dump_file)
2625 fprintf (dump_file, "hint for %i in block %i before %i\n",
2626 INSN_UID (branch), bb->index,
2627 INSN_UID (NEXT_INSN (insn)));
2628 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2629 branch_addr - next_addr, blocks);
2630 }
2631 branch = 0;
644459d0 2632 }
644459d0 2633 }
5a976006 2634 free (spu_bb_info);
644459d0 2635
53c5d9d4 2636 if (!bitmap_empty_p (blocks))
5a976006 2637 find_many_sub_basic_blocks (blocks);
2638
2639 /* We have to schedule to make sure alignment is ok. */
2640 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2641
2642 /* The hints need to be scheduled, so call it again. */
2643 schedule_insns ();
2fbdf9ef 2644 df_finish_pass (true);
5a976006 2645
2646 insert_hbrp ();
2647
2648 pad_bb ();
2649
8f1d58ad 2650 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2651 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2652 {
2653 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2654 between its branch label and the branch . We don't move the
2655 label because GCC expects it at the beginning of the block. */
2656 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2657 rtx label_ref = XVECEXP (unspec, 0, 0);
2658 rtx label = XEXP (label_ref, 0);
2659 rtx branch;
2660 int offset = 0;
2661 for (branch = NEXT_INSN (label);
2662 !JUMP_P (branch) && !CALL_P (branch);
2663 branch = NEXT_INSN (branch))
2664 if (NONJUMP_INSN_P (branch))
2665 offset += get_attr_length (branch);
2666 if (offset > 0)
29c05e22 2667 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
8f1d58ad 2668 }
5a976006 2669
8a42230a 2670 spu_var_tracking ();
5a976006 2671
a7a0184d 2672 loop_optimizer_finalize ();
2673
5a976006 2674 free_bb_for_insn ();
2675
2676 in_spu_reorg = 0;
644459d0 2677}
2678\f
2679
2680/* Insn scheduling routines, primarily for dual issue. */
2681static int
2682spu_sched_issue_rate (void)
2683{
2684 return 2;
2685}
2686
2687static int
5a976006 2688uses_ls_unit(rtx insn)
644459d0 2689{
5a976006 2690 rtx set = single_set (insn);
2691 if (set != 0
2692 && (GET_CODE (SET_DEST (set)) == MEM
2693 || GET_CODE (SET_SRC (set)) == MEM))
2694 return 1;
2695 return 0;
644459d0 2696}
2697
2698static int
2699get_pipe (rtx insn)
2700{
2701 enum attr_type t;
2702 /* Handle inline asm */
2703 if (INSN_CODE (insn) == -1)
2704 return -1;
2705 t = get_attr_type (insn);
2706 switch (t)
2707 {
2708 case TYPE_CONVERT:
2709 return -2;
2710 case TYPE_MULTI0:
2711 return -1;
2712
2713 case TYPE_FX2:
2714 case TYPE_FX3:
2715 case TYPE_SPR:
2716 case TYPE_NOP:
2717 case TYPE_FXB:
2718 case TYPE_FPD:
2719 case TYPE_FP6:
2720 case TYPE_FP7:
644459d0 2721 return 0;
2722
2723 case TYPE_LNOP:
2724 case TYPE_SHUF:
2725 case TYPE_LOAD:
2726 case TYPE_STORE:
2727 case TYPE_BR:
2728 case TYPE_MULTI1:
2729 case TYPE_HBR:
5a976006 2730 case TYPE_IPREFETCH:
644459d0 2731 return 1;
2732 default:
2733 abort ();
2734 }
2735}
2736
5a976006 2737
2738/* haifa-sched.c has a static variable that keeps track of the current
2739 cycle. It is passed to spu_sched_reorder, and we record it here for
2740 use by spu_sched_variable_issue. It won't be accurate if the
2741 scheduler updates it's clock_var between the two calls. */
2742static int clock_var;
2743
2744/* This is used to keep track of insn alignment. Set to 0 at the
2745 beginning of each block and increased by the "length" attr of each
2746 insn scheduled. */
2747static int spu_sched_length;
2748
2749/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2750 ready list appropriately in spu_sched_reorder(). */
2751static int pipe0_clock;
2752static int pipe1_clock;
2753
2754static int prev_clock_var;
2755
2756static int prev_priority;
2757
2758/* The SPU needs to load the next ilb sometime during the execution of
2759 the previous ilb. There is a potential conflict if every cycle has a
2760 load or store. To avoid the conflict we make sure the load/store
2761 unit is free for at least one cycle during the execution of insns in
2762 the previous ilb. */
2763static int spu_ls_first;
2764static int prev_ls_clock;
2765
2766static void
2767spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2768 int max_ready ATTRIBUTE_UNUSED)
2769{
2770 spu_sched_length = 0;
2771}
2772
2773static void
2774spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2775 int max_ready ATTRIBUTE_UNUSED)
2776{
2777 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2778 {
2779 /* When any block might be at least 8-byte aligned, assume they
2780 will all be at least 8-byte aligned to make sure dual issue
2781 works out correctly. */
2782 spu_sched_length = 0;
2783 }
2784 spu_ls_first = INT_MAX;
2785 clock_var = -1;
2786 prev_ls_clock = -1;
2787 pipe0_clock = -1;
2788 pipe1_clock = -1;
2789 prev_clock_var = -1;
2790 prev_priority = -1;
2791}
2792
644459d0 2793static int
5a976006 2794spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2795 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2796{
5a976006 2797 int len;
2798 int p;
644459d0 2799 if (GET_CODE (PATTERN (insn)) == USE
2800 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2801 || (len = get_attr_length (insn)) == 0)
2802 return more;
2803
2804 spu_sched_length += len;
2805
2806 /* Reset on inline asm */
2807 if (INSN_CODE (insn) == -1)
2808 {
2809 spu_ls_first = INT_MAX;
2810 pipe0_clock = -1;
2811 pipe1_clock = -1;
2812 return 0;
2813 }
2814 p = get_pipe (insn);
2815 if (p == 0)
2816 pipe0_clock = clock_var;
2817 else
2818 pipe1_clock = clock_var;
2819
2820 if (in_spu_reorg)
2821 {
2822 if (clock_var - prev_ls_clock > 1
2823 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2824 spu_ls_first = INT_MAX;
2825 if (uses_ls_unit (insn))
2826 {
2827 if (spu_ls_first == INT_MAX)
2828 spu_ls_first = spu_sched_length;
2829 prev_ls_clock = clock_var;
2830 }
2831
2832 /* The scheduler hasn't inserted the nop, but we will later on.
2833 Include those nops in spu_sched_length. */
2834 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2835 spu_sched_length += 4;
2836 prev_clock_var = clock_var;
2837
2838 /* more is -1 when called from spu_sched_reorder for new insns
2839 that don't have INSN_PRIORITY */
2840 if (more >= 0)
2841 prev_priority = INSN_PRIORITY (insn);
2842 }
2843
9d75589a 2844 /* Always try issuing more insns. spu_sched_reorder will decide
5a976006 2845 when the cycle should be advanced. */
2846 return 1;
2847}
2848
2849/* This function is called for both TARGET_SCHED_REORDER and
2850 TARGET_SCHED_REORDER2. */
2851static int
2852spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2853 rtx *ready, int *nreadyp, int clock)
2854{
2855 int i, nready = *nreadyp;
2856 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2857 rtx insn;
2858
2859 clock_var = clock;
2860
2861 if (nready <= 0 || pipe1_clock >= clock)
2862 return 0;
2863
2864 /* Find any rtl insns that don't generate assembly insns and schedule
2865 them first. */
2866 for (i = nready - 1; i >= 0; i--)
2867 {
2868 insn = ready[i];
2869 if (INSN_CODE (insn) == -1
2870 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 2871 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 2872 {
2873 ready[i] = ready[nready - 1];
2874 ready[nready - 1] = insn;
2875 return 1;
2876 }
2877 }
2878
2879 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2880 for (i = 0; i < nready; i++)
2881 if (INSN_CODE (ready[i]) != -1)
2882 {
2883 insn = ready[i];
2884 switch (get_attr_type (insn))
2885 {
2886 default:
2887 case TYPE_MULTI0:
2888 case TYPE_CONVERT:
2889 case TYPE_FX2:
2890 case TYPE_FX3:
2891 case TYPE_SPR:
2892 case TYPE_NOP:
2893 case TYPE_FXB:
2894 case TYPE_FPD:
2895 case TYPE_FP6:
2896 case TYPE_FP7:
2897 pipe_0 = i;
2898 break;
2899 case TYPE_LOAD:
2900 case TYPE_STORE:
2901 pipe_ls = i;
2902 case TYPE_LNOP:
2903 case TYPE_SHUF:
2904 case TYPE_BR:
2905 case TYPE_MULTI1:
2906 case TYPE_HBR:
2907 pipe_1 = i;
2908 break;
2909 case TYPE_IPREFETCH:
2910 pipe_hbrp = i;
2911 break;
2912 }
2913 }
2914
2915 /* In the first scheduling phase, schedule loads and stores together
2916 to increase the chance they will get merged during postreload CSE. */
2917 if (!reload_completed && pipe_ls >= 0)
2918 {
2919 insn = ready[pipe_ls];
2920 ready[pipe_ls] = ready[nready - 1];
2921 ready[nready - 1] = insn;
2922 return 1;
2923 }
2924
2925 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2926 if (pipe_hbrp >= 0)
2927 pipe_1 = pipe_hbrp;
2928
2929 /* When we have loads/stores in every cycle of the last 15 insns and
2930 we are about to schedule another load/store, emit an hbrp insn
2931 instead. */
2932 if (in_spu_reorg
2933 && spu_sched_length - spu_ls_first >= 4 * 15
2934 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2935 {
2936 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2937 recog_memoized (insn);
2938 if (pipe0_clock < clock)
2939 PUT_MODE (insn, TImode);
2940 spu_sched_variable_issue (file, verbose, insn, -1);
2941 return 0;
2942 }
2943
2944 /* In general, we want to emit nops to increase dual issue, but dual
2945 issue isn't faster when one of the insns could be scheduled later
2946 without effecting the critical path. We look at INSN_PRIORITY to
2947 make a good guess, but it isn't perfect so -mdual-nops=n can be
2948 used to effect it. */
2949 if (in_spu_reorg && spu_dual_nops < 10)
2950 {
9d75589a 2951 /* When we are at an even address and we are not issuing nops to
5a976006 2952 improve scheduling then we need to advance the cycle. */
2953 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2954 && (spu_dual_nops == 0
2955 || (pipe_1 != -1
2956 && prev_priority >
2957 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2958 return 0;
2959
2960 /* When at an odd address, schedule the highest priority insn
2961 without considering pipeline. */
2962 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2963 && (spu_dual_nops == 0
2964 || (prev_priority >
2965 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2966 return 1;
2967 }
2968
2969
2970 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2971 pipe0 insn in the ready list, schedule it. */
2972 if (pipe0_clock < clock && pipe_0 >= 0)
2973 schedule_i = pipe_0;
2974
2975 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2976 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2977 else
2978 schedule_i = pipe_1;
2979
2980 if (schedule_i > -1)
2981 {
2982 insn = ready[schedule_i];
2983 ready[schedule_i] = ready[nready - 1];
2984 ready[nready - 1] = insn;
2985 return 1;
2986 }
2987 return 0;
644459d0 2988}
2989
2990/* INSN is dependent on DEP_INSN. */
2991static int
5a976006 2992spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 2993{
5a976006 2994 rtx set;
2995
2996 /* The blockage pattern is used to prevent instructions from being
2997 moved across it and has no cost. */
2998 if (INSN_CODE (insn) == CODE_FOR_blockage
2999 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3000 return 0;
3001
9d98604b 3002 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3003 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3004 return 0;
3005
3006 /* Make sure hbrps are spread out. */
3007 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3008 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3009 return 8;
3010
3011 /* Make sure hints and hbrps are 2 cycles apart. */
3012 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3013 || INSN_CODE (insn) == CODE_FOR_hbr)
3014 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3015 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3016 return 2;
3017
3018 /* An hbrp has no real dependency on other insns. */
3019 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3020 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3021 return 0;
3022
3023 /* Assuming that it is unlikely an argument register will be used in
3024 the first cycle of the called function, we reduce the cost for
3025 slightly better scheduling of dep_insn. When not hinted, the
3026 mispredicted branch would hide the cost as well. */
3027 if (CALL_P (insn))
3028 {
3029 rtx target = get_branch_target (insn);
3030 if (GET_CODE (target) != REG || !set_of (target, insn))
3031 return cost - 2;
3032 return cost;
3033 }
3034
3035 /* And when returning from a function, let's assume the return values
3036 are completed sooner too. */
3037 if (CALL_P (dep_insn))
644459d0 3038 return cost - 2;
5a976006 3039
3040 /* Make sure an instruction that loads from the back chain is schedule
3041 away from the return instruction so a hint is more likely to get
3042 issued. */
3043 if (INSN_CODE (insn) == CODE_FOR__return
3044 && (set = single_set (dep_insn))
3045 && GET_CODE (SET_DEST (set)) == REG
3046 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3047 return 20;
3048
644459d0 3049 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3050 scheduler makes every insn in a block anti-dependent on the final
3051 jump_insn. We adjust here so higher cost insns will get scheduled
3052 earlier. */
5a976006 3053 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3054 return insn_cost (dep_insn) - 3;
5a976006 3055
644459d0 3056 return cost;
3057}
3058\f
3059/* Create a CONST_DOUBLE from a string. */
842ae815 3060rtx
644459d0 3061spu_float_const (const char *string, enum machine_mode mode)
3062{
3063 REAL_VALUE_TYPE value;
3064 value = REAL_VALUE_ATOF (string, mode);
3065 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3066}
3067
644459d0 3068int
3069spu_constant_address_p (rtx x)
3070{
3071 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3072 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3073 || GET_CODE (x) == HIGH);
3074}
3075
3076static enum spu_immediate
3077which_immediate_load (HOST_WIDE_INT val)
3078{
3079 gcc_assert (val == trunc_int_for_mode (val, SImode));
3080
3081 if (val >= -0x8000 && val <= 0x7fff)
3082 return SPU_IL;
3083 if (val >= 0 && val <= 0x3ffff)
3084 return SPU_ILA;
3085 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3086 return SPU_ILH;
3087 if ((val & 0xffff) == 0)
3088 return SPU_ILHU;
3089
3090 return SPU_NONE;
3091}
3092
dea01258 3093/* Return true when OP can be loaded by one of the il instructions, or
3094 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3095int
3096immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3097{
3098 if (CONSTANT_P (op))
3099 {
3100 enum immediate_class c = classify_immediate (op, mode);
5df189be 3101 return c == IC_IL1 || c == IC_IL1s
3072d30e 3102 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3103 }
3104 return 0;
3105}
3106
3107/* Return true if the first SIZE bytes of arr is a constant that can be
3108 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3109 represent the size and offset of the instruction to use. */
3110static int
3111cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3112{
3113 int cpat, run, i, start;
3114 cpat = 1;
3115 run = 0;
3116 start = -1;
3117 for (i = 0; i < size && cpat; i++)
3118 if (arr[i] != i+16)
3119 {
3120 if (!run)
3121 {
3122 start = i;
3123 if (arr[i] == 3)
3124 run = 1;
3125 else if (arr[i] == 2 && arr[i+1] == 3)
3126 run = 2;
3127 else if (arr[i] == 0)
3128 {
3129 while (arr[i+run] == run && i+run < 16)
3130 run++;
3131 if (run != 4 && run != 8)
3132 cpat = 0;
3133 }
3134 else
3135 cpat = 0;
3136 if ((i & (run-1)) != 0)
3137 cpat = 0;
3138 i += run;
3139 }
3140 else
3141 cpat = 0;
3142 }
b01a6dc3 3143 if (cpat && (run || size < 16))
dea01258 3144 {
3145 if (run == 0)
3146 run = 1;
3147 if (prun)
3148 *prun = run;
3149 if (pstart)
3150 *pstart = start == -1 ? 16-run : start;
3151 return 1;
3152 }
3153 return 0;
3154}
3155
3156/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3157 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3158static enum immediate_class
3159classify_immediate (rtx op, enum machine_mode mode)
644459d0 3160{
3161 HOST_WIDE_INT val;
3162 unsigned char arr[16];
5df189be 3163 int i, j, repeated, fsmbi, repeat;
dea01258 3164
3165 gcc_assert (CONSTANT_P (op));
3166
644459d0 3167 if (GET_MODE (op) != VOIDmode)
3168 mode = GET_MODE (op);
3169
dea01258 3170 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3171 if (!flag_pic
3172 && mode == V4SImode
dea01258 3173 && GET_CODE (op) == CONST_VECTOR
3174 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3175 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3176 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3177 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3178 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3179 op = CONST_VECTOR_ELT (op, 0);
644459d0 3180
dea01258 3181 switch (GET_CODE (op))
3182 {
3183 case SYMBOL_REF:
3184 case LABEL_REF:
3185 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3186
dea01258 3187 case CONST:
0cfc65d4 3188 /* We can never know if the resulting address fits in 18 bits and can be
3189 loaded with ila. For now, assume the address will not overflow if
3190 the displacement is "small" (fits 'K' constraint). */
3191 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3192 {
3193 rtx sym = XEXP (XEXP (op, 0), 0);
3194 rtx cst = XEXP (XEXP (op, 0), 1);
3195
3196 if (GET_CODE (sym) == SYMBOL_REF
3197 && GET_CODE (cst) == CONST_INT
3198 && satisfies_constraint_K (cst))
3199 return IC_IL1s;
3200 }
3201 return IC_IL2s;
644459d0 3202
dea01258 3203 case HIGH:
3204 return IC_IL1s;
3205
3206 case CONST_VECTOR:
3207 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3208 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3209 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3210 return IC_POOL;
3211 /* Fall through. */
3212
3213 case CONST_INT:
3214 case CONST_DOUBLE:
3215 constant_to_array (mode, op, arr);
644459d0 3216
dea01258 3217 /* Check that each 4-byte slot is identical. */
3218 repeated = 1;
3219 for (i = 4; i < 16; i += 4)
3220 for (j = 0; j < 4; j++)
3221 if (arr[j] != arr[i + j])
3222 repeated = 0;
3223
3224 if (repeated)
3225 {
3226 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3227 val = trunc_int_for_mode (val, SImode);
3228
3229 if (which_immediate_load (val) != SPU_NONE)
3230 return IC_IL1;
3231 }
3232
3233 /* Any mode of 2 bytes or smaller can be loaded with an il
3234 instruction. */
3235 gcc_assert (GET_MODE_SIZE (mode) > 2);
3236
3237 fsmbi = 1;
5df189be 3238 repeat = 0;
dea01258 3239 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3240 if (arr[i] != 0 && repeat == 0)
3241 repeat = arr[i];
3242 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3243 fsmbi = 0;
3244 if (fsmbi)
5df189be 3245 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3246
3247 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3248 return IC_CPAT;
3249
3250 if (repeated)
3251 return IC_IL2;
3252
3253 return IC_POOL;
3254 default:
3255 break;
3256 }
3257 gcc_unreachable ();
644459d0 3258}
3259
3260static enum spu_immediate
3261which_logical_immediate (HOST_WIDE_INT val)
3262{
3263 gcc_assert (val == trunc_int_for_mode (val, SImode));
3264
3265 if (val >= -0x200 && val <= 0x1ff)
3266 return SPU_ORI;
3267 if (val >= 0 && val <= 0xffff)
3268 return SPU_IOHL;
3269 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3270 {
3271 val = trunc_int_for_mode (val, HImode);
3272 if (val >= -0x200 && val <= 0x1ff)
3273 return SPU_ORHI;
3274 if ((val & 0xff) == ((val >> 8) & 0xff))
3275 {
3276 val = trunc_int_for_mode (val, QImode);
3277 if (val >= -0x200 && val <= 0x1ff)
3278 return SPU_ORBI;
3279 }
3280 }
3281 return SPU_NONE;
3282}
3283
5df189be 3284/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3285 CONST_DOUBLEs. */
3286static int
3287const_vector_immediate_p (rtx x)
3288{
3289 int i;
3290 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3291 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3292 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3293 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3294 return 0;
3295 return 1;
3296}
3297
644459d0 3298int
3299logical_immediate_p (rtx op, enum machine_mode mode)
3300{
3301 HOST_WIDE_INT val;
3302 unsigned char arr[16];
3303 int i, j;
3304
3305 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3306 || GET_CODE (op) == CONST_VECTOR);
3307
5df189be 3308 if (GET_CODE (op) == CONST_VECTOR
3309 && !const_vector_immediate_p (op))
3310 return 0;
3311
644459d0 3312 if (GET_MODE (op) != VOIDmode)
3313 mode = GET_MODE (op);
3314
3315 constant_to_array (mode, op, arr);
3316
3317 /* Check that bytes are repeated. */
3318 for (i = 4; i < 16; i += 4)
3319 for (j = 0; j < 4; j++)
3320 if (arr[j] != arr[i + j])
3321 return 0;
3322
3323 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3324 val = trunc_int_for_mode (val, SImode);
3325
3326 i = which_logical_immediate (val);
3327 return i != SPU_NONE && i != SPU_IOHL;
3328}
3329
3330int
3331iohl_immediate_p (rtx op, enum machine_mode mode)
3332{
3333 HOST_WIDE_INT val;
3334 unsigned char arr[16];
3335 int i, j;
3336
3337 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3338 || GET_CODE (op) == CONST_VECTOR);
3339
5df189be 3340 if (GET_CODE (op) == CONST_VECTOR
3341 && !const_vector_immediate_p (op))
3342 return 0;
3343
644459d0 3344 if (GET_MODE (op) != VOIDmode)
3345 mode = GET_MODE (op);
3346
3347 constant_to_array (mode, op, arr);
3348
3349 /* Check that bytes are repeated. */
3350 for (i = 4; i < 16; i += 4)
3351 for (j = 0; j < 4; j++)
3352 if (arr[j] != arr[i + j])
3353 return 0;
3354
3355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3356 val = trunc_int_for_mode (val, SImode);
3357
3358 return val >= 0 && val <= 0xffff;
3359}
3360
3361int
3362arith_immediate_p (rtx op, enum machine_mode mode,
3363 HOST_WIDE_INT low, HOST_WIDE_INT high)
3364{
3365 HOST_WIDE_INT val;
3366 unsigned char arr[16];
3367 int bytes, i, j;
3368
3369 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3370 || GET_CODE (op) == CONST_VECTOR);
3371
5df189be 3372 if (GET_CODE (op) == CONST_VECTOR
3373 && !const_vector_immediate_p (op))
3374 return 0;
3375
644459d0 3376 if (GET_MODE (op) != VOIDmode)
3377 mode = GET_MODE (op);
3378
3379 constant_to_array (mode, op, arr);
3380
3381 if (VECTOR_MODE_P (mode))
3382 mode = GET_MODE_INNER (mode);
3383
3384 bytes = GET_MODE_SIZE (mode);
3385 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3386
3387 /* Check that bytes are repeated. */
3388 for (i = bytes; i < 16; i += bytes)
3389 for (j = 0; j < bytes; j++)
3390 if (arr[j] != arr[i + j])
3391 return 0;
3392
3393 val = arr[0];
3394 for (j = 1; j < bytes; j++)
3395 val = (val << 8) | arr[j];
3396
3397 val = trunc_int_for_mode (val, mode);
3398
3399 return val >= low && val <= high;
3400}
3401
56c7bfc2 3402/* TRUE when op is an immediate and an exact power of 2, and given that
3403 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3404 all entries must be the same. */
3405bool
3406exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3407{
3408 enum machine_mode int_mode;
3409 HOST_WIDE_INT val;
3410 unsigned char arr[16];
3411 int bytes, i, j;
3412
3413 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3414 || GET_CODE (op) == CONST_VECTOR);
3415
3416 if (GET_CODE (op) == CONST_VECTOR
3417 && !const_vector_immediate_p (op))
3418 return 0;
3419
3420 if (GET_MODE (op) != VOIDmode)
3421 mode = GET_MODE (op);
3422
3423 constant_to_array (mode, op, arr);
3424
3425 if (VECTOR_MODE_P (mode))
3426 mode = GET_MODE_INNER (mode);
3427
3428 bytes = GET_MODE_SIZE (mode);
3429 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3430
3431 /* Check that bytes are repeated. */
3432 for (i = bytes; i < 16; i += bytes)
3433 for (j = 0; j < bytes; j++)
3434 if (arr[j] != arr[i + j])
3435 return 0;
3436
3437 val = arr[0];
3438 for (j = 1; j < bytes; j++)
3439 val = (val << 8) | arr[j];
3440
3441 val = trunc_int_for_mode (val, int_mode);
3442
3443 /* Currently, we only handle SFmode */
3444 gcc_assert (mode == SFmode);
3445 if (mode == SFmode)
3446 {
3447 int exp = (val >> 23) - 127;
3448 return val > 0 && (val & 0x007fffff) == 0
3449 && exp >= low && exp <= high;
3450 }
3451 return FALSE;
3452}
3453
6cf5579e 3454/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3455
3456static int
3457ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3458{
3459 rtx x = *px;
3460 tree decl;
3461
3462 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3463 {
3464 rtx plus = XEXP (x, 0);
3465 rtx op0 = XEXP (plus, 0);
3466 rtx op1 = XEXP (plus, 1);
3467 if (GET_CODE (op1) == CONST_INT)
3468 x = op0;
3469 }
3470
3471 return (GET_CODE (x) == SYMBOL_REF
3472 && (decl = SYMBOL_REF_DECL (x)) != 0
3473 && TREE_CODE (decl) == VAR_DECL
3474 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3475}
3476
644459d0 3477/* We accept:
5b865faf 3478 - any 32-bit constant (SImode, SFmode)
644459d0 3479 - any constant that can be generated with fsmbi (any mode)
5b865faf 3480 - a 64-bit constant where the high and low bits are identical
644459d0 3481 (DImode, DFmode)
5b865faf 3482 - a 128-bit constant where the four 32-bit words match. */
ca316360 3483bool
3484spu_legitimate_constant_p (enum machine_mode mode, rtx x)
644459d0 3485{
5df189be 3486 if (GET_CODE (x) == HIGH)
3487 x = XEXP (x, 0);
6cf5579e 3488
3489 /* Reject any __ea qualified reference. These can't appear in
3490 instructions but must be forced to the constant pool. */
3491 if (for_each_rtx (&x, ea_symbol_ref, 0))
3492 return 0;
3493
644459d0 3494 /* V4SI with all identical symbols is valid. */
5df189be 3495 if (!flag_pic
ca316360 3496 && mode == V4SImode
644459d0 3497 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3498 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3499 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3500 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3501 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3502 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3503
5df189be 3504 if (GET_CODE (x) == CONST_VECTOR
3505 && !const_vector_immediate_p (x))
3506 return 0;
644459d0 3507 return 1;
3508}
3509
3510/* Valid address are:
3511 - symbol_ref, label_ref, const
3512 - reg
9d98604b 3513 - reg + const_int, where const_int is 16 byte aligned
644459d0 3514 - reg + reg, alignment doesn't matter
3515 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3516 ignore the 4 least significant bits of the const. We only care about
3517 16 byte modes because the expand phase will change all smaller MEM
3518 references to TImode. */
3519static bool
3520spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3521 rtx x, bool reg_ok_strict)
644459d0 3522{
9d98604b 3523 int aligned = GET_MODE_SIZE (mode) >= 16;
3524 if (aligned
3525 && GET_CODE (x) == AND
644459d0 3526 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3527 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3528 x = XEXP (x, 0);
3529 switch (GET_CODE (x))
3530 {
644459d0 3531 case LABEL_REF:
6cf5579e 3532 return !TARGET_LARGE_MEM;
3533
9d98604b 3534 case SYMBOL_REF:
644459d0 3535 case CONST:
6cf5579e 3536 /* Keep __ea references until reload so that spu_expand_mov can see them
3537 in MEMs. */
3538 if (ea_symbol_ref (&x, 0))
3539 return !reload_in_progress && !reload_completed;
9d98604b 3540 return !TARGET_LARGE_MEM;
644459d0 3541
3542 case CONST_INT:
3543 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3544
3545 case SUBREG:
3546 x = XEXP (x, 0);
9d98604b 3547 if (REG_P (x))
3548 return 0;
644459d0 3549
3550 case REG:
3551 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3552
3553 case PLUS:
3554 case LO_SUM:
3555 {
3556 rtx op0 = XEXP (x, 0);
3557 rtx op1 = XEXP (x, 1);
3558 if (GET_CODE (op0) == SUBREG)
3559 op0 = XEXP (op0, 0);
3560 if (GET_CODE (op1) == SUBREG)
3561 op1 = XEXP (op1, 0);
644459d0 3562 if (GET_CODE (op0) == REG
3563 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3564 && GET_CODE (op1) == CONST_INT
fa695424 3565 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3566 /* If virtual registers are involved, the displacement will
3567 change later on anyway, so checking would be premature.
3568 Reload will make sure the final displacement after
3569 register elimination is OK. */
3570 || op0 == arg_pointer_rtx
3571 || op0 == frame_pointer_rtx
3572 || op0 == virtual_stack_vars_rtx)
9d98604b 3573 && (!aligned || (INTVAL (op1) & 15) == 0))
3574 return TRUE;
644459d0 3575 if (GET_CODE (op0) == REG
3576 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3577 && GET_CODE (op1) == REG
3578 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3579 return TRUE;
644459d0 3580 }
3581 break;
3582
3583 default:
3584 break;
3585 }
9d98604b 3586 return FALSE;
644459d0 3587}
3588
6cf5579e 3589/* Like spu_legitimate_address_p, except with named addresses. */
3590static bool
3591spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3592 bool reg_ok_strict, addr_space_t as)
3593{
3594 if (as == ADDR_SPACE_EA)
3595 return (REG_P (x) && (GET_MODE (x) == EAmode));
3596
3597 else if (as != ADDR_SPACE_GENERIC)
3598 gcc_unreachable ();
3599
3600 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3601}
3602
644459d0 3603/* When the address is reg + const_int, force the const_int into a
fa7637bd 3604 register. */
3defb88e 3605static rtx
644459d0 3606spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3607 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3608{
3609 rtx op0, op1;
3610 /* Make sure both operands are registers. */
3611 if (GET_CODE (x) == PLUS)
3612 {
3613 op0 = XEXP (x, 0);
3614 op1 = XEXP (x, 1);
3615 if (ALIGNED_SYMBOL_REF_P (op0))
3616 {
3617 op0 = force_reg (Pmode, op0);
3618 mark_reg_pointer (op0, 128);
3619 }
3620 else if (GET_CODE (op0) != REG)
3621 op0 = force_reg (Pmode, op0);
3622 if (ALIGNED_SYMBOL_REF_P (op1))
3623 {
3624 op1 = force_reg (Pmode, op1);
3625 mark_reg_pointer (op1, 128);
3626 }
3627 else if (GET_CODE (op1) != REG)
3628 op1 = force_reg (Pmode, op1);
3629 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3630 }
41e3a0c7 3631 return x;
644459d0 3632}
3633
6cf5579e 3634/* Like spu_legitimate_address, except with named address support. */
3635static rtx
3636spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3637 addr_space_t as)
3638{
3639 if (as != ADDR_SPACE_GENERIC)
3640 return x;
3641
3642 return spu_legitimize_address (x, oldx, mode);
3643}
3644
fa695424 3645/* Reload reg + const_int for out-of-range displacements. */
3646rtx
3647spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3648 int opnum, int type)
3649{
3650 bool removed_and = false;
3651
3652 if (GET_CODE (ad) == AND
3653 && CONST_INT_P (XEXP (ad, 1))
3654 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3655 {
3656 ad = XEXP (ad, 0);
3657 removed_and = true;
3658 }
3659
3660 if (GET_CODE (ad) == PLUS
3661 && REG_P (XEXP (ad, 0))
3662 && CONST_INT_P (XEXP (ad, 1))
3663 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3664 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3665 {
3666 /* Unshare the sum. */
3667 ad = copy_rtx (ad);
3668
3669 /* Reload the displacement. */
3670 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3671 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3672 opnum, (enum reload_type) type);
3673
3674 /* Add back AND for alignment if we stripped it. */
3675 if (removed_and)
3676 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3677
3678 return ad;
3679 }
3680
3681 return NULL_RTX;
3682}
3683
644459d0 3684/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3685 struct attribute_spec.handler. */
3686static tree
3687spu_handle_fndecl_attribute (tree * node,
3688 tree name,
3689 tree args ATTRIBUTE_UNUSED,
3690 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3691{
3692 if (TREE_CODE (*node) != FUNCTION_DECL)
3693 {
67a779df 3694 warning (0, "%qE attribute only applies to functions",
3695 name);
644459d0 3696 *no_add_attrs = true;
3697 }
3698
3699 return NULL_TREE;
3700}
3701
3702/* Handle the "vector" attribute. */
3703static tree
3704spu_handle_vector_attribute (tree * node, tree name,
3705 tree args ATTRIBUTE_UNUSED,
3706 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3707{
3708 tree type = *node, result = NULL_TREE;
3709 enum machine_mode mode;
3710 int unsigned_p;
3711
3712 while (POINTER_TYPE_P (type)
3713 || TREE_CODE (type) == FUNCTION_TYPE
3714 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3715 type = TREE_TYPE (type);
3716
3717 mode = TYPE_MODE (type);
3718
3719 unsigned_p = TYPE_UNSIGNED (type);
3720 switch (mode)
3721 {
3722 case DImode:
3723 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3724 break;
3725 case SImode:
3726 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3727 break;
3728 case HImode:
3729 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3730 break;
3731 case QImode:
3732 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3733 break;
3734 case SFmode:
3735 result = V4SF_type_node;
3736 break;
3737 case DFmode:
3738 result = V2DF_type_node;
3739 break;
3740 default:
3741 break;
3742 }
3743
3744 /* Propagate qualifiers attached to the element type
3745 onto the vector type. */
3746 if (result && result != type && TYPE_QUALS (type))
3747 result = build_qualified_type (result, TYPE_QUALS (type));
3748
3749 *no_add_attrs = true; /* No need to hang on to the attribute. */
3750
3751 if (!result)
67a779df 3752 warning (0, "%qE attribute ignored", name);
644459d0 3753 else
d991e6e8 3754 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3755
3756 return NULL_TREE;
3757}
3758
f2b32076 3759/* Return nonzero if FUNC is a naked function. */
644459d0 3760static int
3761spu_naked_function_p (tree func)
3762{
3763 tree a;
3764
3765 if (TREE_CODE (func) != FUNCTION_DECL)
3766 abort ();
3767
3768 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3769 return a != NULL_TREE;
3770}
3771
3772int
3773spu_initial_elimination_offset (int from, int to)
3774{
3775 int saved_regs_size = spu_saved_regs_size ();
3776 int sp_offset = 0;
d5bf7b64 3777 if (!crtl->is_leaf || crtl->outgoing_args_size
644459d0 3778 || get_frame_size () || saved_regs_size)
3779 sp_offset = STACK_POINTER_OFFSET;
3780 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3781 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3782 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3783 return get_frame_size ();
644459d0 3784 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3785 return sp_offset + crtl->outgoing_args_size
644459d0 3786 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3787 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3788 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3789 else
3790 gcc_unreachable ();
644459d0 3791}
3792
3793rtx
fb80456a 3794spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3795{
3796 enum machine_mode mode = TYPE_MODE (type);
3797 int byte_size = ((mode == BLKmode)
3798 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3799
3800 /* Make sure small structs are left justified in a register. */
3801 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3802 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3803 {
3804 enum machine_mode smode;
3805 rtvec v;
3806 int i;
3807 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3808 int n = byte_size / UNITS_PER_WORD;
3809 v = rtvec_alloc (nregs);
3810 for (i = 0; i < n; i++)
3811 {
3812 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3813 gen_rtx_REG (TImode,
3814 FIRST_RETURN_REGNUM
3815 + i),
3816 GEN_INT (UNITS_PER_WORD * i));
3817 byte_size -= UNITS_PER_WORD;
3818 }
3819
3820 if (n < nregs)
3821 {
3822 if (byte_size < 4)
3823 byte_size = 4;
3824 smode =
3825 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3826 RTVEC_ELT (v, n) =
3827 gen_rtx_EXPR_LIST (VOIDmode,
3828 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3829 GEN_INT (UNITS_PER_WORD * n));
3830 }
3831 return gen_rtx_PARALLEL (mode, v);
3832 }
3833 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3834}
3835
ee9034d4 3836static rtx
39cba157 3837spu_function_arg (cumulative_args_t cum_v,
644459d0 3838 enum machine_mode mode,
ee9034d4 3839 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3840{
39cba157 3841 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
644459d0 3842 int byte_size;
3843
a08c5dd0 3844 if (*cum >= MAX_REGISTER_ARGS)
644459d0 3845 return 0;
3846
3847 byte_size = ((mode == BLKmode)
3848 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3849
3850 /* The ABI does not allow parameters to be passed partially in
3851 reg and partially in stack. */
a08c5dd0 3852 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 3853 return 0;
3854
3855 /* Make sure small structs are left justified in a register. */
3856 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3857 && byte_size < UNITS_PER_WORD && byte_size > 0)
3858 {
3859 enum machine_mode smode;
3860 rtx gr_reg;
3861 if (byte_size < 4)
3862 byte_size = 4;
3863 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3864 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 3865 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 3866 const0_rtx);
3867 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3868 }
3869 else
a08c5dd0 3870 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 3871}
3872
ee9034d4 3873static void
39cba157 3874spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
ee9034d4 3875 const_tree type, bool named ATTRIBUTE_UNUSED)
3876{
39cba157 3877 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3878
ee9034d4 3879 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3880 ? 1
3881 : mode == BLKmode
3882 ? ((int_size_in_bytes (type) + 15) / 16)
3883 : mode == VOIDmode
3884 ? 1
3885 : HARD_REGNO_NREGS (cum, mode));
3886}
3887
644459d0 3888/* Variable sized types are passed by reference. */
3889static bool
39cba157 3890spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
644459d0 3891 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3892 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3893{
3894 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3895}
3896\f
3897
3898/* Var args. */
3899
3900/* Create and return the va_list datatype.
3901
3902 On SPU, va_list is an array type equivalent to
3903
3904 typedef struct __va_list_tag
3905 {
3906 void *__args __attribute__((__aligned(16)));
3907 void *__skip __attribute__((__aligned(16)));
3908
3909 } va_list[1];
3910
fa7637bd 3911 where __args points to the arg that will be returned by the next
644459d0 3912 va_arg(), and __skip points to the previous stack frame such that
3913 when __args == __skip we should advance __args by 32 bytes. */
3914static tree
3915spu_build_builtin_va_list (void)
3916{
3917 tree f_args, f_skip, record, type_decl;
3918 bool owp;
3919
3920 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3921
3922 type_decl =
54e46243 3923 build_decl (BUILTINS_LOCATION,
3924 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3925
54e46243 3926 f_args = build_decl (BUILTINS_LOCATION,
3927 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3928 f_skip = build_decl (BUILTINS_LOCATION,
3929 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3930
3931 DECL_FIELD_CONTEXT (f_args) = record;
3932 DECL_ALIGN (f_args) = 128;
3933 DECL_USER_ALIGN (f_args) = 1;
3934
3935 DECL_FIELD_CONTEXT (f_skip) = record;
3936 DECL_ALIGN (f_skip) = 128;
3937 DECL_USER_ALIGN (f_skip) = 1;
3938
bc907808 3939 TYPE_STUB_DECL (record) = type_decl;
644459d0 3940 TYPE_NAME (record) = type_decl;
3941 TYPE_FIELDS (record) = f_args;
1767a056 3942 DECL_CHAIN (f_args) = f_skip;
644459d0 3943
3944 /* We know this is being padded and we want it too. It is an internal
3945 type so hide the warnings from the user. */
3946 owp = warn_padded;
3947 warn_padded = false;
3948
3949 layout_type (record);
3950
3951 warn_padded = owp;
3952
3953 /* The correct type is an array type of one element. */
3954 return build_array_type (record, build_index_type (size_zero_node));
3955}
3956
3957/* Implement va_start by filling the va_list structure VALIST.
3958 NEXTARG points to the first anonymous stack argument.
3959
3960 The following global variables are used to initialize
3961 the va_list structure:
3962
abe32cce 3963 crtl->args.info;
644459d0 3964 the CUMULATIVE_ARGS for this function
3965
abe32cce 3966 crtl->args.arg_offset_rtx:
644459d0 3967 holds the offset of the first anonymous stack argument
3968 (relative to the virtual arg pointer). */
3969
8a58ed0a 3970static void
644459d0 3971spu_va_start (tree valist, rtx nextarg)
3972{
3973 tree f_args, f_skip;
3974 tree args, skip, t;
3975
3976 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 3977 f_skip = DECL_CHAIN (f_args);
644459d0 3978
170efcd4 3979 valist = build_simple_mem_ref (valist);
644459d0 3980 args =
3981 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3982 skip =
3983 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3984
3985 /* Find the __args area. */
3986 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3987 if (crtl->args.pretend_args_size > 0)
2cc66f2a 3988 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
75a70cf9 3989 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3990 TREE_SIDE_EFFECTS (t) = 1;
3991 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3992
3993 /* Find the __skip area. */
3994 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2cc66f2a 3995 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3996 - STACK_POINTER_OFFSET));
75a70cf9 3997 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3998 TREE_SIDE_EFFECTS (t) = 1;
3999 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4000}
4001
4002/* Gimplify va_arg by updating the va_list structure
4003 VALIST as required to retrieve an argument of type
4004 TYPE, and returning that argument.
4005
4006 ret = va_arg(VALIST, TYPE);
4007
4008 generates code equivalent to:
4009
4010 paddedsize = (sizeof(TYPE) + 15) & -16;
4011 if (VALIST.__args + paddedsize > VALIST.__skip
4012 && VALIST.__args <= VALIST.__skip)
4013 addr = VALIST.__skip + 32;
4014 else
4015 addr = VALIST.__args;
4016 VALIST.__args = addr + paddedsize;
4017 ret = *(TYPE *)addr;
4018 */
4019static tree
75a70cf9 4020spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4021 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4022{
4023 tree f_args, f_skip;
4024 tree args, skip;
4025 HOST_WIDE_INT size, rsize;
2cc66f2a 4026 tree addr, tmp;
644459d0 4027 bool pass_by_reference_p;
4028
4029 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4030 f_skip = DECL_CHAIN (f_args);
644459d0 4031
182cf5a9 4032 valist = build_simple_mem_ref (valist);
644459d0 4033 args =
4034 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4035 skip =
4036 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4037
4038 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4039
4040 /* if an object is dynamically sized, a pointer to it is passed
4041 instead of the object itself. */
27a82950 4042 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4043 false);
644459d0 4044 if (pass_by_reference_p)
4045 type = build_pointer_type (type);
4046 size = int_size_in_bytes (type);
4047 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4048
4049 /* build conditional expression to calculate addr. The expression
4050 will be gimplified later. */
2cc66f2a 4051 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
644459d0 4052 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4053 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4054 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4055 unshare_expr (skip)));
644459d0 4056
4057 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2cc66f2a 4058 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4059 unshare_expr (args));
644459d0 4060
75a70cf9 4061 gimplify_assign (addr, tmp, pre_p);
644459d0 4062
4063 /* update VALIST.__args */
2cc66f2a 4064 tmp = fold_build_pointer_plus_hwi (addr, rsize);
75a70cf9 4065 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4066
8115f0af 4067 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4068 addr);
644459d0 4069
4070 if (pass_by_reference_p)
4071 addr = build_va_arg_indirect_ref (addr);
4072
4073 return build_va_arg_indirect_ref (addr);
4074}
4075
4076/* Save parameter registers starting with the register that corresponds
4077 to the first unnamed parameters. If the first unnamed parameter is
4078 in the stack then save no registers. Set pretend_args_size to the
4079 amount of space needed to save the registers. */
39cba157 4080static void
4081spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
644459d0 4082 tree type, int *pretend_size, int no_rtl)
4083{
4084 if (!no_rtl)
4085 {
4086 rtx tmp;
4087 int regno;
4088 int offset;
39cba157 4089 int ncum = *get_cumulative_args (cum);
644459d0 4090
4091 /* cum currently points to the last named argument, we want to
4092 start at the next argument. */
39cba157 4093 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
644459d0 4094
4095 offset = -STACK_POINTER_OFFSET;
4096 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4097 {
4098 tmp = gen_frame_mem (V4SImode,
29c05e22 4099 plus_constant (Pmode, virtual_incoming_args_rtx,
644459d0 4100 offset));
4101 emit_move_insn (tmp,
4102 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4103 offset += 16;
4104 }
4105 *pretend_size = offset + STACK_POINTER_OFFSET;
4106 }
4107}
4108\f
b2d7ede1 4109static void
644459d0 4110spu_conditional_register_usage (void)
4111{
4112 if (flag_pic)
4113 {
4114 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4115 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4116 }
644459d0 4117}
4118
9d98604b 4119/* This is called any time we inspect the alignment of a register for
4120 addresses. */
644459d0 4121static int
9d98604b 4122reg_aligned_for_addr (rtx x)
644459d0 4123{
9d98604b 4124 int regno =
4125 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4126 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4127}
4128
69ced2d6 4129/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4130 into its SYMBOL_REF_FLAGS. */
4131static void
4132spu_encode_section_info (tree decl, rtx rtl, int first)
4133{
4134 default_encode_section_info (decl, rtl, first);
4135
4136 /* If a variable has a forced alignment to < 16 bytes, mark it with
4137 SYMBOL_FLAG_ALIGN1. */
4138 if (TREE_CODE (decl) == VAR_DECL
4139 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4140 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4141}
4142
644459d0 4143/* Return TRUE if we are certain the mem refers to a complete object
4144 which is both 16-byte aligned and padded to a 16-byte boundary. This
4145 would make it safe to store with a single instruction.
4146 We guarantee the alignment and padding for static objects by aligning
4147 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4148 FIXME: We currently cannot guarantee this for objects on the stack
4149 because assign_parm_setup_stack calls assign_stack_local with the
4150 alignment of the parameter mode and in that case the alignment never
4151 gets adjusted by LOCAL_ALIGNMENT. */
4152static int
4153store_with_one_insn_p (rtx mem)
4154{
9d98604b 4155 enum machine_mode mode = GET_MODE (mem);
644459d0 4156 rtx addr = XEXP (mem, 0);
9d98604b 4157 if (mode == BLKmode)
644459d0 4158 return 0;
9d98604b 4159 if (GET_MODE_SIZE (mode) >= 16)
4160 return 1;
644459d0 4161 /* Only static objects. */
4162 if (GET_CODE (addr) == SYMBOL_REF)
4163 {
4164 /* We use the associated declaration to make sure the access is
fa7637bd 4165 referring to the whole object.
851d9296 4166 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4167 if it is necessary. Will there be cases where one exists, and
4168 the other does not? Will there be cases where both exist, but
4169 have different types? */
4170 tree decl = MEM_EXPR (mem);
4171 if (decl
4172 && TREE_CODE (decl) == VAR_DECL
4173 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4174 return 1;
4175 decl = SYMBOL_REF_DECL (addr);
4176 if (decl
4177 && TREE_CODE (decl) == VAR_DECL
4178 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4179 return 1;
4180 }
4181 return 0;
4182}
4183
9d98604b 4184/* Return 1 when the address is not valid for a simple load and store as
4185 required by the '_mov*' patterns. We could make this less strict
4186 for loads, but we prefer mem's to look the same so they are more
4187 likely to be merged. */
4188static int
4189address_needs_split (rtx mem)
4190{
4191 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4192 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4193 || !(store_with_one_insn_p (mem)
4194 || mem_is_padded_component_ref (mem))))
4195 return 1;
4196
4197 return 0;
4198}
4199
6cf5579e 4200static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4201static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4202static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4203
4204/* MEM is known to be an __ea qualified memory access. Emit a call to
4205 fetch the ppu memory to local store, and return its address in local
4206 store. */
4207
4208static void
4209ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4210{
4211 if (is_store)
4212 {
4213 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4214 if (!cache_fetch_dirty)
4215 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4216 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4217 2, ea_addr, EAmode, ndirty, SImode);
4218 }
4219 else
4220 {
4221 if (!cache_fetch)
4222 cache_fetch = init_one_libfunc ("__cache_fetch");
4223 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4224 1, ea_addr, EAmode);
4225 }
4226}
4227
4228/* Like ea_load_store, but do the cache tag comparison and, for stores,
4229 dirty bit marking, inline.
4230
4231 The cache control data structure is an array of
4232
4233 struct __cache_tag_array
4234 {
4235 unsigned int tag_lo[4];
4236 unsigned int tag_hi[4];
4237 void *data_pointer[4];
4238 int reserved[4];
4239 vector unsigned short dirty_bits[4];
4240 } */
4241
4242static void
4243ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4244{
4245 rtx ea_addr_si;
4246 HOST_WIDE_INT v;
4247 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4248 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4249 rtx index_mask = gen_reg_rtx (SImode);
4250 rtx tag_arr = gen_reg_rtx (Pmode);
4251 rtx splat_mask = gen_reg_rtx (TImode);
4252 rtx splat = gen_reg_rtx (V4SImode);
4253 rtx splat_hi = NULL_RTX;
4254 rtx tag_index = gen_reg_rtx (Pmode);
4255 rtx block_off = gen_reg_rtx (SImode);
4256 rtx tag_addr = gen_reg_rtx (Pmode);
4257 rtx tag = gen_reg_rtx (V4SImode);
4258 rtx cache_tag = gen_reg_rtx (V4SImode);
4259 rtx cache_tag_hi = NULL_RTX;
4260 rtx cache_ptrs = gen_reg_rtx (TImode);
4261 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4262 rtx tag_equal = gen_reg_rtx (V4SImode);
4263 rtx tag_equal_hi = NULL_RTX;
4264 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4265 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4266 rtx eq_index = gen_reg_rtx (SImode);
4267 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4268
4269 if (spu_ea_model != 32)
4270 {
4271 splat_hi = gen_reg_rtx (V4SImode);
4272 cache_tag_hi = gen_reg_rtx (V4SImode);
4273 tag_equal_hi = gen_reg_rtx (V4SImode);
4274 }
4275
29c05e22 4276 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
6cf5579e 4277 emit_move_insn (tag_arr, tag_arr_sym);
4278 v = 0x0001020300010203LL;
4279 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4280 ea_addr_si = ea_addr;
4281 if (spu_ea_model != 32)
4282 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4283
4284 /* tag_index = ea_addr & (tag_array_size - 128) */
4285 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4286
4287 /* splat ea_addr to all 4 slots. */
4288 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4289 /* Similarly for high 32 bits of ea_addr. */
4290 if (spu_ea_model != 32)
4291 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4292
4293 /* block_off = ea_addr & 127 */
4294 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4295
4296 /* tag_addr = tag_arr + tag_index */
4297 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4298
4299 /* Read cache tags. */
4300 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4301 if (spu_ea_model != 32)
4302 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
29c05e22 4303 plus_constant (Pmode,
4304 tag_addr, 16)));
6cf5579e 4305
4306 /* tag = ea_addr & -128 */
4307 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4308
4309 /* Read all four cache data pointers. */
4310 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
29c05e22 4311 plus_constant (Pmode,
4312 tag_addr, 32)));
6cf5579e 4313
4314 /* Compare tags. */
4315 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4316 if (spu_ea_model != 32)
4317 {
4318 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4319 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4320 }
4321
4322 /* At most one of the tags compare equal, so tag_equal has one
4323 32-bit slot set to all 1's, with the other slots all zero.
4324 gbb picks off low bit from each byte in the 128-bit registers,
4325 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4326 we have a hit. */
4327 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4328 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4329
4330 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4331 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4332
4333 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4334 (rotating eq_index mod 16 bytes). */
4335 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4336 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4337
4338 /* Add block offset to form final data address. */
4339 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4340
4341 /* Check that we did hit. */
4342 hit_label = gen_label_rtx ();
4343 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4344 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4345 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4346 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4347 hit_ref, pc_rtx)));
4348 /* Say that this branch is very likely to happen. */
4349 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
9eb946de 4350 add_int_reg_note (insn, REG_BR_PROB, v);
6cf5579e 4351
4352 ea_load_store (mem, is_store, ea_addr, data_addr);
4353 cont_label = gen_label_rtx ();
4354 emit_jump_insn (gen_jump (cont_label));
4355 emit_barrier ();
4356
4357 emit_label (hit_label);
4358
4359 if (is_store)
4360 {
4361 HOST_WIDE_INT v_hi;
4362 rtx dirty_bits = gen_reg_rtx (TImode);
4363 rtx dirty_off = gen_reg_rtx (SImode);
4364 rtx dirty_128 = gen_reg_rtx (TImode);
4365 rtx neg_block_off = gen_reg_rtx (SImode);
4366
4367 /* Set up mask with one dirty bit per byte of the mem we are
4368 writing, starting from top bit. */
4369 v_hi = v = -1;
4370 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4371 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4372 {
4373 v_hi = v;
4374 v = 0;
4375 }
4376 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4377
4378 /* Form index into cache dirty_bits. eq_index is one of
4379 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4380 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4381 offset to each of the four dirty_bits elements. */
4382 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4383
4384 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4385
4386 /* Rotate bit mask to proper bit. */
4387 emit_insn (gen_negsi2 (neg_block_off, block_off));
4388 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4389 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4390
4391 /* Or in the new dirty bits. */
4392 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4393
4394 /* Store. */
4395 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4396 }
4397
4398 emit_label (cont_label);
4399}
4400
4401static rtx
4402expand_ea_mem (rtx mem, bool is_store)
4403{
4404 rtx ea_addr;
4405 rtx data_addr = gen_reg_rtx (Pmode);
4406 rtx new_mem;
4407
4408 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4409 if (optimize_size || optimize == 0)
4410 ea_load_store (mem, is_store, ea_addr, data_addr);
4411 else
4412 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4413
4414 if (ea_alias_set == -1)
4415 ea_alias_set = new_alias_set ();
4416
4417 /* We generate a new MEM RTX to refer to the copy of the data
4418 in the cache. We do not copy memory attributes (except the
4419 alignment) from the original MEM, as they may no longer apply
4420 to the cache copy. */
4421 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4422 set_mem_alias_set (new_mem, ea_alias_set);
4423 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4424
4425 return new_mem;
4426}
4427
644459d0 4428int
4429spu_expand_mov (rtx * ops, enum machine_mode mode)
4430{
4431 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4432 {
4433 /* Perform the move in the destination SUBREG's inner mode. */
4434 ops[0] = SUBREG_REG (ops[0]);
4435 mode = GET_MODE (ops[0]);
4436 ops[1] = gen_lowpart_common (mode, ops[1]);
4437 gcc_assert (ops[1]);
4438 }
644459d0 4439
4440 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4441 {
4442 rtx from = SUBREG_REG (ops[1]);
8d72495d 4443 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4444
4445 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4446 && GET_MODE_CLASS (imode) == MODE_INT
4447 && subreg_lowpart_p (ops[1]));
4448
4449 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4450 imode = SImode;
4451 if (imode != GET_MODE (from))
4452 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4453
4454 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4455 {
d6bf3b14 4456 enum insn_code icode = convert_optab_handler (trunc_optab,
4457 mode, imode);
644459d0 4458 emit_insn (GEN_FCN (icode) (ops[0], from));
4459 }
4460 else
4461 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4462 return 1;
4463 }
4464
4465 /* At least one of the operands needs to be a register. */
4466 if ((reload_in_progress | reload_completed) == 0
4467 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4468 {
4469 rtx temp = force_reg (mode, ops[1]);
4470 emit_move_insn (ops[0], temp);
4471 return 1;
4472 }
4473 if (reload_in_progress || reload_completed)
4474 {
dea01258 4475 if (CONSTANT_P (ops[1]))
4476 return spu_split_immediate (ops);
644459d0 4477 return 0;
4478 }
9d98604b 4479
4480 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4481 extend them. */
4482 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4483 {
9d98604b 4484 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4485 if (val != INTVAL (ops[1]))
644459d0 4486 {
9d98604b 4487 emit_move_insn (ops[0], GEN_INT (val));
4488 return 1;
644459d0 4489 }
4490 }
9d98604b 4491 if (MEM_P (ops[0]))
6cf5579e 4492 {
4493 if (MEM_ADDR_SPACE (ops[0]))
4494 ops[0] = expand_ea_mem (ops[0], true);
4495 return spu_split_store (ops);
4496 }
9d98604b 4497 if (MEM_P (ops[1]))
6cf5579e 4498 {
4499 if (MEM_ADDR_SPACE (ops[1]))
4500 ops[1] = expand_ea_mem (ops[1], false);
4501 return spu_split_load (ops);
4502 }
9d98604b 4503
644459d0 4504 return 0;
4505}
4506
9d98604b 4507static void
4508spu_convert_move (rtx dst, rtx src)
644459d0 4509{
9d98604b 4510 enum machine_mode mode = GET_MODE (dst);
4511 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4512 rtx reg;
4513 gcc_assert (GET_MODE (src) == TImode);
4514 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4515 emit_insn (gen_rtx_SET (VOIDmode, reg,
4516 gen_rtx_TRUNCATE (int_mode,
4517 gen_rtx_LSHIFTRT (TImode, src,
4518 GEN_INT (int_mode == DImode ? 64 : 96)))));
4519 if (int_mode != mode)
4520 {
4521 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4522 emit_move_insn (dst, reg);
4523 }
4524}
644459d0 4525
9d98604b 4526/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4527 the address from SRC and SRC+16. Return a REG or CONST_INT that
4528 specifies how many bytes to rotate the loaded registers, plus any
4529 extra from EXTRA_ROTQBY. The address and rotate amounts are
4530 normalized to improve merging of loads and rotate computations. */
4531static rtx
4532spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4533{
4534 rtx addr = XEXP (src, 0);
4535 rtx p0, p1, rot, addr0, addr1;
4536 int rot_amt;
644459d0 4537
4538 rot = 0;
4539 rot_amt = 0;
9d98604b 4540
4541 if (MEM_ALIGN (src) >= 128)
4542 /* Address is already aligned; simply perform a TImode load. */ ;
4543 else if (GET_CODE (addr) == PLUS)
644459d0 4544 {
4545 /* 8 cases:
4546 aligned reg + aligned reg => lqx
4547 aligned reg + unaligned reg => lqx, rotqby
4548 aligned reg + aligned const => lqd
4549 aligned reg + unaligned const => lqd, rotqbyi
4550 unaligned reg + aligned reg => lqx, rotqby
4551 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4552 unaligned reg + aligned const => lqd, rotqby
4553 unaligned reg + unaligned const -> not allowed by legitimate address
4554 */
4555 p0 = XEXP (addr, 0);
4556 p1 = XEXP (addr, 1);
9d98604b 4557 if (!reg_aligned_for_addr (p0))
644459d0 4558 {
9d98604b 4559 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4560 {
9d98604b 4561 rot = gen_reg_rtx (SImode);
4562 emit_insn (gen_addsi3 (rot, p0, p1));
4563 }
4564 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4565 {
4566 if (INTVAL (p1) > 0
4567 && REG_POINTER (p0)
4568 && INTVAL (p1) * BITS_PER_UNIT
4569 < REGNO_POINTER_ALIGN (REGNO (p0)))
4570 {
4571 rot = gen_reg_rtx (SImode);
4572 emit_insn (gen_addsi3 (rot, p0, p1));
4573 addr = p0;
4574 }
4575 else
4576 {
4577 rtx x = gen_reg_rtx (SImode);
4578 emit_move_insn (x, p1);
4579 if (!spu_arith_operand (p1, SImode))
4580 p1 = x;
4581 rot = gen_reg_rtx (SImode);
4582 emit_insn (gen_addsi3 (rot, p0, p1));
4583 addr = gen_rtx_PLUS (Pmode, p0, x);
4584 }
644459d0 4585 }
4586 else
4587 rot = p0;
4588 }
4589 else
4590 {
4591 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4592 {
4593 rot_amt = INTVAL (p1) & 15;
9d98604b 4594 if (INTVAL (p1) & -16)
4595 {
4596 p1 = GEN_INT (INTVAL (p1) & -16);
4597 addr = gen_rtx_PLUS (SImode, p0, p1);
4598 }
4599 else
4600 addr = p0;
644459d0 4601 }
9d98604b 4602 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4603 rot = p1;
4604 }
4605 }
9d98604b 4606 else if (REG_P (addr))
644459d0 4607 {
9d98604b 4608 if (!reg_aligned_for_addr (addr))
644459d0 4609 rot = addr;
4610 }
4611 else if (GET_CODE (addr) == CONST)
4612 {
4613 if (GET_CODE (XEXP (addr, 0)) == PLUS
4614 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4615 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4616 {
4617 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4618 if (rot_amt & -16)
4619 addr = gen_rtx_CONST (Pmode,
4620 gen_rtx_PLUS (Pmode,
4621 XEXP (XEXP (addr, 0), 0),
4622 GEN_INT (rot_amt & -16)));
4623 else
4624 addr = XEXP (XEXP (addr, 0), 0);
4625 }
4626 else
9d98604b 4627 {
4628 rot = gen_reg_rtx (Pmode);
4629 emit_move_insn (rot, addr);
4630 }
644459d0 4631 }
4632 else if (GET_CODE (addr) == CONST_INT)
4633 {
4634 rot_amt = INTVAL (addr);
4635 addr = GEN_INT (rot_amt & -16);
4636 }
4637 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4638 {
4639 rot = gen_reg_rtx (Pmode);
4640 emit_move_insn (rot, addr);
4641 }
644459d0 4642
9d98604b 4643 rot_amt += extra_rotby;
644459d0 4644
4645 rot_amt &= 15;
4646
4647 if (rot && rot_amt)
4648 {
9d98604b 4649 rtx x = gen_reg_rtx (SImode);
4650 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4651 rot = x;
644459d0 4652 rot_amt = 0;
4653 }
9d98604b 4654 if (!rot && rot_amt)
4655 rot = GEN_INT (rot_amt);
4656
4657 addr0 = copy_rtx (addr);
4658 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4659 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4660
4661 if (dst1)
4662 {
29c05e22 4663 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
9d98604b 4664 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4665 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4666 }
644459d0 4667
9d98604b 4668 return rot;
4669}
4670
4671int
4672spu_split_load (rtx * ops)
4673{
4674 enum machine_mode mode = GET_MODE (ops[0]);
4675 rtx addr, load, rot;
4676 int rot_amt;
644459d0 4677
9d98604b 4678 if (GET_MODE_SIZE (mode) >= 16)
4679 return 0;
644459d0 4680
9d98604b 4681 addr = XEXP (ops[1], 0);
4682 gcc_assert (GET_CODE (addr) != AND);
4683
4684 if (!address_needs_split (ops[1]))
4685 {
4686 ops[1] = change_address (ops[1], TImode, addr);
4687 load = gen_reg_rtx (TImode);
4688 emit_insn (gen__movti (load, ops[1]));
4689 spu_convert_move (ops[0], load);
4690 return 1;
4691 }
4692
4693 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4694
4695 load = gen_reg_rtx (TImode);
4696 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4697
4698 if (rot)
4699 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4700
9d98604b 4701 spu_convert_move (ops[0], load);
4702 return 1;
644459d0 4703}
4704
9d98604b 4705int
644459d0 4706spu_split_store (rtx * ops)
4707{
4708 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4709 rtx reg;
644459d0 4710 rtx addr, p0, p1, p1_lo, smem;
4711 int aform;
4712 int scalar;
4713
9d98604b 4714 if (GET_MODE_SIZE (mode) >= 16)
4715 return 0;
4716
644459d0 4717 addr = XEXP (ops[0], 0);
9d98604b 4718 gcc_assert (GET_CODE (addr) != AND);
4719
4720 if (!address_needs_split (ops[0]))
4721 {
4722 reg = gen_reg_rtx (TImode);
4723 emit_insn (gen_spu_convert (reg, ops[1]));
4724 ops[0] = change_address (ops[0], TImode, addr);
4725 emit_move_insn (ops[0], reg);
4726 return 1;
4727 }
644459d0 4728
4729 if (GET_CODE (addr) == PLUS)
4730 {
4731 /* 8 cases:
4732 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4733 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4734 aligned reg + aligned const => lqd, c?d, shuf, stqx
4735 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4736 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4737 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4738 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4739 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4740 */
4741 aform = 0;
4742 p0 = XEXP (addr, 0);
4743 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4744 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4745 {
4746 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4747 if (reg_aligned_for_addr (p0))
4748 {
4749 p1 = GEN_INT (INTVAL (p1) & -16);
4750 if (p1 == const0_rtx)
4751 addr = p0;
4752 else
4753 addr = gen_rtx_PLUS (SImode, p0, p1);
4754 }
4755 else
4756 {
4757 rtx x = gen_reg_rtx (SImode);
4758 emit_move_insn (x, p1);
4759 addr = gen_rtx_PLUS (SImode, p0, x);
4760 }
644459d0 4761 }
4762 }
9d98604b 4763 else if (REG_P (addr))
644459d0 4764 {
4765 aform = 0;
4766 p0 = addr;
4767 p1 = p1_lo = const0_rtx;
4768 }
4769 else
4770 {
4771 aform = 1;
4772 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4773 p1 = 0; /* aform doesn't use p1 */
4774 p1_lo = addr;
4775 if (ALIGNED_SYMBOL_REF_P (addr))
4776 p1_lo = const0_rtx;
9d98604b 4777 else if (GET_CODE (addr) == CONST
4778 && GET_CODE (XEXP (addr, 0)) == PLUS
4779 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4780 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4781 {
9d98604b 4782 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4783 if ((v & -16) != 0)
4784 addr = gen_rtx_CONST (Pmode,
4785 gen_rtx_PLUS (Pmode,
4786 XEXP (XEXP (addr, 0), 0),
4787 GEN_INT (v & -16)));
4788 else
4789 addr = XEXP (XEXP (addr, 0), 0);
4790 p1_lo = GEN_INT (v & 15);
644459d0 4791 }
4792 else if (GET_CODE (addr) == CONST_INT)
4793 {
4794 p1_lo = GEN_INT (INTVAL (addr) & 15);
4795 addr = GEN_INT (INTVAL (addr) & -16);
4796 }
9d98604b 4797 else
4798 {
4799 p1_lo = gen_reg_rtx (SImode);
4800 emit_move_insn (p1_lo, addr);
4801 }
644459d0 4802 }
4803
4cbad5bb 4804 gcc_assert (aform == 0 || aform == 1);
9d98604b 4805 reg = gen_reg_rtx (TImode);
e04cf423 4806
644459d0 4807 scalar = store_with_one_insn_p (ops[0]);
4808 if (!scalar)
4809 {
4810 /* We could copy the flags from the ops[0] MEM to mem here,
4811 We don't because we want this load to be optimized away if
4812 possible, and copying the flags will prevent that in certain
4813 cases, e.g. consider the volatile flag. */
4814
9d98604b 4815 rtx pat = gen_reg_rtx (TImode);
e04cf423 4816 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4817 set_mem_alias_set (lmem, 0);
4818 emit_insn (gen_movti (reg, lmem));
644459d0 4819
9d98604b 4820 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4821 p0 = stack_pointer_rtx;
4822 if (!p1_lo)
4823 p1_lo = const0_rtx;
4824
4825 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4826 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4827 }
644459d0 4828 else
4829 {
4830 if (GET_CODE (ops[1]) == REG)
4831 emit_insn (gen_spu_convert (reg, ops[1]));
4832 else if (GET_CODE (ops[1]) == SUBREG)
4833 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4834 else
4835 abort ();
4836 }
4837
4838 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4839 emit_insn (gen_ashlti3
4840 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4841
9d98604b 4842 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4843 /* We can't use the previous alias set because the memory has changed
4844 size and can potentially overlap objects of other types. */
4845 set_mem_alias_set (smem, 0);
4846
e04cf423 4847 emit_insn (gen_movti (smem, reg));
9d98604b 4848 return 1;
644459d0 4849}
4850
4851/* Return TRUE if X is MEM which is a struct member reference
4852 and the member can safely be loaded and stored with a single
4853 instruction because it is padded. */
4854static int
4855mem_is_padded_component_ref (rtx x)
4856{
4857 tree t = MEM_EXPR (x);
4858 tree r;
4859 if (!t || TREE_CODE (t) != COMPONENT_REF)
4860 return 0;
4861 t = TREE_OPERAND (t, 1);
4862 if (!t || TREE_CODE (t) != FIELD_DECL
4863 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4864 return 0;
4865 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4866 r = DECL_FIELD_CONTEXT (t);
4867 if (!r || TREE_CODE (r) != RECORD_TYPE)
4868 return 0;
4869 /* Make sure they are the same mode */
4870 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4871 return 0;
4872 /* If there are no following fields then the field alignment assures
fa7637bd 4873 the structure is padded to the alignment which means this field is
4874 padded too. */
644459d0 4875 if (TREE_CHAIN (t) == 0)
4876 return 1;
4877 /* If the following field is also aligned then this field will be
4878 padded. */
4879 t = TREE_CHAIN (t);
4880 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4881 return 1;
4882 return 0;
4883}
4884
c7b91b14 4885/* Parse the -mfixed-range= option string. */
4886static void
4887fix_range (const char *const_str)
4888{
4889 int i, first, last;
4890 char *str, *dash, *comma;
4891
4892 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4893 REG2 are either register names or register numbers. The effect
4894 of this option is to mark the registers in the range from REG1 to
4895 REG2 as ``fixed'' so they won't be used by the compiler. */
4896
4897 i = strlen (const_str);
4898 str = (char *) alloca (i + 1);
4899 memcpy (str, const_str, i + 1);
4900
4901 while (1)
4902 {
4903 dash = strchr (str, '-');
4904 if (!dash)
4905 {
4906 warning (0, "value of -mfixed-range must have form REG1-REG2");
4907 return;
4908 }
4909 *dash = '\0';
4910 comma = strchr (dash + 1, ',');
4911 if (comma)
4912 *comma = '\0';
4913
4914 first = decode_reg_name (str);
4915 if (first < 0)
4916 {
4917 warning (0, "unknown register name: %s", str);
4918 return;
4919 }
4920
4921 last = decode_reg_name (dash + 1);
4922 if (last < 0)
4923 {
4924 warning (0, "unknown register name: %s", dash + 1);
4925 return;
4926 }
4927
4928 *dash = '-';
4929
4930 if (first > last)
4931 {
4932 warning (0, "%s-%s is an empty range", str, dash + 1);
4933 return;
4934 }
4935
4936 for (i = first; i <= last; ++i)
4937 fixed_regs[i] = call_used_regs[i] = 1;
4938
4939 if (!comma)
4940 break;
4941
4942 *comma = ',';
4943 str = comma + 1;
4944 }
4945}
4946
644459d0 4947/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4948 can be generated using the fsmbi instruction. */
4949int
4950fsmbi_const_p (rtx x)
4951{
dea01258 4952 if (CONSTANT_P (x))
4953 {
5df189be 4954 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4955 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4956 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4957 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4958 }
4959 return 0;
4960}
4961
4962/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4963 can be generated using the cbd, chd, cwd or cdd instruction. */
4964int
4965cpat_const_p (rtx x, enum machine_mode mode)
4966{
4967 if (CONSTANT_P (x))
4968 {
4969 enum immediate_class c = classify_immediate (x, mode);
4970 return c == IC_CPAT;
4971 }
4972 return 0;
4973}
644459d0 4974
dea01258 4975rtx
4976gen_cpat_const (rtx * ops)
4977{
4978 unsigned char dst[16];
4979 int i, offset, shift, isize;
4980 if (GET_CODE (ops[3]) != CONST_INT
4981 || GET_CODE (ops[2]) != CONST_INT
4982 || (GET_CODE (ops[1]) != CONST_INT
4983 && GET_CODE (ops[1]) != REG))
4984 return 0;
4985 if (GET_CODE (ops[1]) == REG
4986 && (!REG_POINTER (ops[1])
4987 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4988 return 0;
644459d0 4989
4990 for (i = 0; i < 16; i++)
dea01258 4991 dst[i] = i + 16;
4992 isize = INTVAL (ops[3]);
4993 if (isize == 1)
4994 shift = 3;
4995 else if (isize == 2)
4996 shift = 2;
4997 else
4998 shift = 0;
4999 offset = (INTVAL (ops[2]) +
5000 (GET_CODE (ops[1]) ==
5001 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5002 for (i = 0; i < isize; i++)
5003 dst[offset + i] = i + shift;
5004 return array_to_constant (TImode, dst);
644459d0 5005}
5006
5007/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5008 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5009 than 16 bytes, the value is repeated across the rest of the array. */
5010void
5011constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5012{
5013 HOST_WIDE_INT val;
5014 int i, j, first;
5015
5016 memset (arr, 0, 16);
5017 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5018 if (GET_CODE (x) == CONST_INT
5019 || (GET_CODE (x) == CONST_DOUBLE
5020 && (mode == SFmode || mode == DFmode)))
5021 {
5022 gcc_assert (mode != VOIDmode && mode != BLKmode);
5023
5024 if (GET_CODE (x) == CONST_DOUBLE)
5025 val = const_double_to_hwint (x);
5026 else
5027 val = INTVAL (x);
5028 first = GET_MODE_SIZE (mode) - 1;
5029 for (i = first; i >= 0; i--)
5030 {
5031 arr[i] = val & 0xff;
5032 val >>= 8;
5033 }
5034 /* Splat the constant across the whole array. */
5035 for (j = 0, i = first + 1; i < 16; i++)
5036 {
5037 arr[i] = arr[j];
5038 j = (j == first) ? 0 : j + 1;
5039 }
5040 }
5041 else if (GET_CODE (x) == CONST_DOUBLE)
5042 {
5043 val = CONST_DOUBLE_LOW (x);
5044 for (i = 15; i >= 8; i--)
5045 {
5046 arr[i] = val & 0xff;
5047 val >>= 8;
5048 }
5049 val = CONST_DOUBLE_HIGH (x);
5050 for (i = 7; i >= 0; i--)
5051 {
5052 arr[i] = val & 0xff;
5053 val >>= 8;
5054 }
5055 }
5056 else if (GET_CODE (x) == CONST_VECTOR)
5057 {
5058 int units;
5059 rtx elt;
5060 mode = GET_MODE_INNER (mode);
5061 units = CONST_VECTOR_NUNITS (x);
5062 for (i = 0; i < units; i++)
5063 {
5064 elt = CONST_VECTOR_ELT (x, i);
5065 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5066 {
5067 if (GET_CODE (elt) == CONST_DOUBLE)
5068 val = const_double_to_hwint (elt);
5069 else
5070 val = INTVAL (elt);
5071 first = GET_MODE_SIZE (mode) - 1;
5072 if (first + i * GET_MODE_SIZE (mode) > 16)
5073 abort ();
5074 for (j = first; j >= 0; j--)
5075 {
5076 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5077 val >>= 8;
5078 }
5079 }
5080 }
5081 }
5082 else
5083 gcc_unreachable();
5084}
5085
5086/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5087 smaller than 16 bytes, use the bytes that would represent that value
5088 in a register, e.g., for QImode return the value of arr[3]. */
5089rtx
e96f2783 5090array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5091{
5092 enum machine_mode inner_mode;
5093 rtvec v;
5094 int units, size, i, j, k;
5095 HOST_WIDE_INT val;
5096
5097 if (GET_MODE_CLASS (mode) == MODE_INT
5098 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5099 {
5100 j = GET_MODE_SIZE (mode);
5101 i = j < 4 ? 4 - j : 0;
5102 for (val = 0; i < j; i++)
5103 val = (val << 8) | arr[i];
5104 val = trunc_int_for_mode (val, mode);
5105 return GEN_INT (val);
5106 }
5107
5108 if (mode == TImode)
5109 {
5110 HOST_WIDE_INT high;
5111 for (i = high = 0; i < 8; i++)
5112 high = (high << 8) | arr[i];
5113 for (i = 8, val = 0; i < 16; i++)
5114 val = (val << 8) | arr[i];
5115 return immed_double_const (val, high, TImode);
5116 }
5117 if (mode == SFmode)
5118 {
5119 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5120 val = trunc_int_for_mode (val, SImode);
171b6d22 5121 return hwint_to_const_double (SFmode, val);
644459d0 5122 }
5123 if (mode == DFmode)
5124 {
1f915911 5125 for (i = 0, val = 0; i < 8; i++)
5126 val = (val << 8) | arr[i];
171b6d22 5127 return hwint_to_const_double (DFmode, val);
644459d0 5128 }
5129
5130 if (!VECTOR_MODE_P (mode))
5131 abort ();
5132
5133 units = GET_MODE_NUNITS (mode);
5134 size = GET_MODE_UNIT_SIZE (mode);
5135 inner_mode = GET_MODE_INNER (mode);
5136 v = rtvec_alloc (units);
5137
5138 for (k = i = 0; i < units; ++i)
5139 {
5140 val = 0;
5141 for (j = 0; j < size; j++, k++)
5142 val = (val << 8) | arr[k];
5143
5144 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5145 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5146 else
5147 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5148 }
5149 if (k > 16)
5150 abort ();
5151
5152 return gen_rtx_CONST_VECTOR (mode, v);
5153}
5154
5155static void
5156reloc_diagnostic (rtx x)
5157{
712d2297 5158 tree decl = 0;
644459d0 5159 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5160 return;
5161
5162 if (GET_CODE (x) == SYMBOL_REF)
5163 decl = SYMBOL_REF_DECL (x);
5164 else if (GET_CODE (x) == CONST
5165 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5166 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5167
5168 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5169 if (decl && !DECL_P (decl))
5170 decl = 0;
5171
644459d0 5172 /* The decl could be a string constant. */
5173 if (decl && DECL_P (decl))
712d2297 5174 {
5175 location_t loc;
5176 /* We use last_assemble_variable_decl to get line information. It's
5177 not always going to be right and might not even be close, but will
5178 be right for the more common cases. */
5179 if (!last_assemble_variable_decl || in_section == ctors_section)
5180 loc = DECL_SOURCE_LOCATION (decl);
5181 else
5182 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5183
712d2297 5184 if (TARGET_WARN_RELOC)
5185 warning_at (loc, 0,
5186 "creating run-time relocation for %qD", decl);
5187 else
5188 error_at (loc,
5189 "creating run-time relocation for %qD", decl);
5190 }
5191 else
5192 {
5193 if (TARGET_WARN_RELOC)
5194 warning_at (input_location, 0, "creating run-time relocation");
5195 else
5196 error_at (input_location, "creating run-time relocation");
5197 }
644459d0 5198}
5199
5200/* Hook into assemble_integer so we can generate an error for run-time
5201 relocations. The SPU ABI disallows them. */
5202static bool
5203spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5204{
5205 /* By default run-time relocations aren't supported, but we allow them
5206 in case users support it in their own run-time loader. And we provide
5207 a warning for those users that don't. */
5208 if ((GET_CODE (x) == SYMBOL_REF)
5209 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5210 reloc_diagnostic (x);
5211
5212 return default_assemble_integer (x, size, aligned_p);
5213}
5214
5215static void
5216spu_asm_globalize_label (FILE * file, const char *name)
5217{
5218 fputs ("\t.global\t", file);
5219 assemble_name (file, name);
5220 fputs ("\n", file);
5221}
5222
5223static bool
20d892d1 5224spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5225 int opno ATTRIBUTE_UNUSED, int *total,
f529eb25 5226 bool speed ATTRIBUTE_UNUSED)
644459d0 5227{
5228 enum machine_mode mode = GET_MODE (x);
5229 int cost = COSTS_N_INSNS (2);
5230
5231 /* Folding to a CONST_VECTOR will use extra space but there might
5232 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5233 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5234 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5235 because this cost will only be compared against a single insn.
5236 if (code == CONST_VECTOR)
ca316360 5237 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5238 */
5239
5240 /* Use defaults for float operations. Not accurate but good enough. */
5241 if (mode == DFmode)
5242 {
5243 *total = COSTS_N_INSNS (13);
5244 return true;
5245 }
5246 if (mode == SFmode)
5247 {
5248 *total = COSTS_N_INSNS (6);
5249 return true;
5250 }
5251 switch (code)
5252 {
5253 case CONST_INT:
5254 if (satisfies_constraint_K (x))
5255 *total = 0;
5256 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5257 *total = COSTS_N_INSNS (1);
5258 else
5259 *total = COSTS_N_INSNS (3);
5260 return true;
5261
5262 case CONST:
5263 *total = COSTS_N_INSNS (3);
5264 return true;
5265
5266 case LABEL_REF:
5267 case SYMBOL_REF:
5268 *total = COSTS_N_INSNS (0);
5269 return true;
5270
5271 case CONST_DOUBLE:
5272 *total = COSTS_N_INSNS (5);
5273 return true;
5274
5275 case FLOAT_EXTEND:
5276 case FLOAT_TRUNCATE:
5277 case FLOAT:
5278 case UNSIGNED_FLOAT:
5279 case FIX:
5280 case UNSIGNED_FIX:
5281 *total = COSTS_N_INSNS (7);
5282 return true;
5283
5284 case PLUS:
5285 if (mode == TImode)
5286 {
5287 *total = COSTS_N_INSNS (9);
5288 return true;
5289 }
5290 break;
5291
5292 case MULT:
5293 cost =
5294 GET_CODE (XEXP (x, 0)) ==
5295 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5296 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5297 {
5298 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5299 {
5300 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5301 cost = COSTS_N_INSNS (14);
5302 if ((val & 0xffff) == 0)
5303 cost = COSTS_N_INSNS (9);
5304 else if (val > 0 && val < 0x10000)
5305 cost = COSTS_N_INSNS (11);
5306 }
5307 }
5308 *total = cost;
5309 return true;
5310 case DIV:
5311 case UDIV:
5312 case MOD:
5313 case UMOD:
5314 *total = COSTS_N_INSNS (20);
5315 return true;
5316 case ROTATE:
5317 case ROTATERT:
5318 case ASHIFT:
5319 case ASHIFTRT:
5320 case LSHIFTRT:
5321 *total = COSTS_N_INSNS (4);
5322 return true;
5323 case UNSPEC:
5324 if (XINT (x, 1) == UNSPEC_CONVERT)
5325 *total = COSTS_N_INSNS (0);
5326 else
5327 *total = COSTS_N_INSNS (4);
5328 return true;
5329 }
5330 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5331 if (GET_MODE_CLASS (mode) == MODE_INT
5332 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5333 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5334 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5335 *total = cost;
5336 return true;
5337}
5338
1bd43494 5339static enum machine_mode
5340spu_unwind_word_mode (void)
644459d0 5341{
1bd43494 5342 return SImode;
644459d0 5343}
5344
5345/* Decide whether we can make a sibling call to a function. DECL is the
5346 declaration of the function being targeted by the call and EXP is the
5347 CALL_EXPR representing the call. */
5348static bool
5349spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5350{
5351 return decl && !TARGET_LARGE_MEM;
5352}
5353
5354/* We need to correctly update the back chain pointer and the Available
5355 Stack Size (which is in the second slot of the sp register.) */
5356void
5357spu_allocate_stack (rtx op0, rtx op1)
5358{
5359 HOST_WIDE_INT v;
5360 rtx chain = gen_reg_rtx (V4SImode);
5361 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5362 rtx sp = gen_reg_rtx (V4SImode);
5363 rtx splatted = gen_reg_rtx (V4SImode);
5364 rtx pat = gen_reg_rtx (TImode);
5365
5366 /* copy the back chain so we can save it back again. */
5367 emit_move_insn (chain, stack_bot);
5368
5369 op1 = force_reg (SImode, op1);
5370
5371 v = 0x1020300010203ll;
5372 emit_move_insn (pat, immed_double_const (v, v, TImode));
5373 emit_insn (gen_shufb (splatted, op1, op1, pat));
5374
5375 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5376 emit_insn (gen_subv4si3 (sp, sp, splatted));
5377
5378 if (flag_stack_check)
5379 {
5380 rtx avail = gen_reg_rtx(SImode);
5381 rtx result = gen_reg_rtx(SImode);
5382 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5383 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5384 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5385 }
5386
5387 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5388
5389 emit_move_insn (stack_bot, chain);
5390
5391 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5392}
5393
5394void
5395spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5396{
5397 static unsigned char arr[16] =
5398 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5399 rtx temp = gen_reg_rtx (SImode);
5400 rtx temp2 = gen_reg_rtx (SImode);
5401 rtx temp3 = gen_reg_rtx (V4SImode);
5402 rtx temp4 = gen_reg_rtx (V4SImode);
5403 rtx pat = gen_reg_rtx (TImode);
5404 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5405
5406 /* Restore the backchain from the first word, sp from the second. */
5407 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5408 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5409
5410 emit_move_insn (pat, array_to_constant (TImode, arr));
5411
5412 /* Compute Available Stack Size for sp */
5413 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5414 emit_insn (gen_shufb (temp3, temp, temp, pat));
5415
5416 /* Compute Available Stack Size for back chain */
5417 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5418 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5419 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5420
5421 emit_insn (gen_addv4si3 (sp, sp, temp3));
5422 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5423}
5424
5425static void
5426spu_init_libfuncs (void)
5427{
5428 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5429 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5430 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5431 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5432 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5433 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5434 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5435 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5436 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4d3aeb29 5437 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
644459d0 5438 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5439 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5440
5441 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5442 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5443
5825ec3f 5444 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5445 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5446 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5447 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5448 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5449 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5450 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5451 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5452 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5453 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5454 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5455 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5456
19a53068 5457 set_optab_libfunc (smul_optab, TImode, "__multi3");
5458 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5459 set_optab_libfunc (smod_optab, TImode, "__modti3");
5460 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5461 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5462 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5463}
5464
5465/* Make a subreg, stripping any existing subreg. We could possibly just
5466 call simplify_subreg, but in this case we know what we want. */
5467rtx
5468spu_gen_subreg (enum machine_mode mode, rtx x)
5469{
5470 if (GET_CODE (x) == SUBREG)
5471 x = SUBREG_REG (x);
5472 if (GET_MODE (x) == mode)
5473 return x;
5474 return gen_rtx_SUBREG (mode, x, 0);
5475}
5476
5477static bool
fb80456a 5478spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5479{
5480 return (TYPE_MODE (type) == BLKmode
5481 && ((type) == 0
5482 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5483 || int_size_in_bytes (type) >
5484 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5485}
5486\f
5487/* Create the built-in types and functions */
5488
c2233b46 5489enum spu_function_code
5490{
5491#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5492#include "spu-builtins.def"
5493#undef DEF_BUILTIN
5494 NUM_SPU_BUILTINS
5495};
5496
5497extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5498
644459d0 5499struct spu_builtin_description spu_builtins[] = {
5500#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5501 {fcode, icode, name, type, params},
644459d0 5502#include "spu-builtins.def"
5503#undef DEF_BUILTIN
5504};
5505
0c5c4d59 5506static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5507
5508/* Returns the spu builtin decl for CODE. */
e6925042 5509
5510static tree
5511spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5512{
5513 if (code >= NUM_SPU_BUILTINS)
5514 return error_mark_node;
5515
0c5c4d59 5516 return spu_builtin_decls[code];
e6925042 5517}
5518
5519
644459d0 5520static void
5521spu_init_builtins (void)
5522{
5523 struct spu_builtin_description *d;
5524 unsigned int i;
5525
5526 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5527 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5528 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5529 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5530 V4SF_type_node = build_vector_type (float_type_node, 4);
5531 V2DF_type_node = build_vector_type (double_type_node, 2);
5532
5533 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5534 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5535 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5536 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5537
c4ecce0c 5538 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5539
5540 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5541 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5542 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5543 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5544 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5545 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5546 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5547 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5548 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5552
5553 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5554 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5555 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5557 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5558 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5559 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5561
5562 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5563 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5564
5565 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5566
5567 spu_builtin_types[SPU_BTI_PTR] =
5568 build_pointer_type (build_qualified_type
5569 (void_type_node,
5570 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5571
5572 /* For each builtin we build a new prototype. The tree code will make
5573 sure nodes are shared. */
5574 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5575 {
5576 tree p;
5577 char name[64]; /* build_function will make a copy. */
5578 int parm;
5579
5580 if (d->name == 0)
5581 continue;
5582
5dfbd18f 5583 /* Find last parm. */
644459d0 5584 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5585 ;
644459d0 5586
5587 p = void_list_node;
5588 while (parm > 1)
5589 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5590
5591 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5592
5593 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5594 spu_builtin_decls[i] =
3726fe5e 5595 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5596 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5597 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5598
5599 /* These builtins don't throw. */
0c5c4d59 5600 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5601 }
5602}
5603
cf31d486 5604void
5605spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5606{
5607 static unsigned char arr[16] =
5608 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5609
5610 rtx temp = gen_reg_rtx (Pmode);
5611 rtx temp2 = gen_reg_rtx (V4SImode);
5612 rtx temp3 = gen_reg_rtx (V4SImode);
5613 rtx pat = gen_reg_rtx (TImode);
5614 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5615
5616 emit_move_insn (pat, array_to_constant (TImode, arr));
5617
5618 /* Restore the sp. */
5619 emit_move_insn (temp, op1);
5620 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5621
5622 /* Compute available stack size for sp. */
5623 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5624 emit_insn (gen_shufb (temp3, temp, temp, pat));
5625
5626 emit_insn (gen_addv4si3 (sp, sp, temp3));
5627 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5628}
5629
644459d0 5630int
5631spu_safe_dma (HOST_WIDE_INT channel)
5632{
006e4b96 5633 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5634}
5635
5636void
5637spu_builtin_splats (rtx ops[])
5638{
5639 enum machine_mode mode = GET_MODE (ops[0]);
5640 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5641 {
5642 unsigned char arr[16];
5643 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5644 emit_move_insn (ops[0], array_to_constant (mode, arr));
5645 }
644459d0 5646 else
5647 {
5648 rtx reg = gen_reg_rtx (TImode);
5649 rtx shuf;
5650 if (GET_CODE (ops[1]) != REG
5651 && GET_CODE (ops[1]) != SUBREG)
5652 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5653 switch (mode)
5654 {
5655 case V2DImode:
5656 case V2DFmode:
5657 shuf =
5658 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5659 TImode);
5660 break;
5661 case V4SImode:
5662 case V4SFmode:
5663 shuf =
5664 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5665 TImode);
5666 break;
5667 case V8HImode:
5668 shuf =
5669 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5670 TImode);
5671 break;
5672 case V16QImode:
5673 shuf =
5674 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5675 TImode);
5676 break;
5677 default:
5678 abort ();
5679 }
5680 emit_move_insn (reg, shuf);
5681 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5682 }
5683}
5684
5685void
5686spu_builtin_extract (rtx ops[])
5687{
5688 enum machine_mode mode;
5689 rtx rot, from, tmp;
5690
5691 mode = GET_MODE (ops[1]);
5692
5693 if (GET_CODE (ops[2]) == CONST_INT)
5694 {
5695 switch (mode)
5696 {
5697 case V16QImode:
5698 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5699 break;
5700 case V8HImode:
5701 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5702 break;
5703 case V4SFmode:
5704 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5705 break;
5706 case V4SImode:
5707 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5708 break;
5709 case V2DImode:
5710 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5711 break;
5712 case V2DFmode:
5713 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5714 break;
5715 default:
5716 abort ();
5717 }
5718 return;
5719 }
5720
5721 from = spu_gen_subreg (TImode, ops[1]);
5722 rot = gen_reg_rtx (TImode);
5723 tmp = gen_reg_rtx (SImode);
5724
5725 switch (mode)
5726 {
5727 case V16QImode:
5728 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5729 break;
5730 case V8HImode:
5731 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5732 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5733 break;
5734 case V4SFmode:
5735 case V4SImode:
5736 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5737 break;
5738 case V2DImode:
5739 case V2DFmode:
5740 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5741 break;
5742 default:
5743 abort ();
5744 }
5745 emit_insn (gen_rotqby_ti (rot, from, tmp));
5746
5747 emit_insn (gen_spu_convert (ops[0], rot));
5748}
5749
5750void
5751spu_builtin_insert (rtx ops[])
5752{
5753 enum machine_mode mode = GET_MODE (ops[0]);
5754 enum machine_mode imode = GET_MODE_INNER (mode);
5755 rtx mask = gen_reg_rtx (TImode);
5756 rtx offset;
5757
5758 if (GET_CODE (ops[3]) == CONST_INT)
5759 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5760 else
5761 {
5762 offset = gen_reg_rtx (SImode);
5763 emit_insn (gen_mulsi3
5764 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5765 }
5766 emit_insn (gen_cpat
5767 (mask, stack_pointer_rtx, offset,
5768 GEN_INT (GET_MODE_SIZE (imode))));
5769 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5770}
5771
5772void
5773spu_builtin_promote (rtx ops[])
5774{
5775 enum machine_mode mode, imode;
5776 rtx rot, from, offset;
5777 HOST_WIDE_INT pos;
5778
5779 mode = GET_MODE (ops[0]);
5780 imode = GET_MODE_INNER (mode);
5781
5782 from = gen_reg_rtx (TImode);
5783 rot = spu_gen_subreg (TImode, ops[0]);
5784
5785 emit_insn (gen_spu_convert (from, ops[1]));
5786
5787 if (GET_CODE (ops[2]) == CONST_INT)
5788 {
5789 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5790 if (GET_MODE_SIZE (imode) < 4)
5791 pos += 4 - GET_MODE_SIZE (imode);
5792 offset = GEN_INT (pos & 15);
5793 }
5794 else
5795 {
5796 offset = gen_reg_rtx (SImode);
5797 switch (mode)
5798 {
5799 case V16QImode:
5800 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5801 break;
5802 case V8HImode:
5803 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5804 emit_insn (gen_addsi3 (offset, offset, offset));
5805 break;
5806 case V4SFmode:
5807 case V4SImode:
5808 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5809 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5810 break;
5811 case V2DImode:
5812 case V2DFmode:
5813 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5814 break;
5815 default:
5816 abort ();
5817 }
5818 }
5819 emit_insn (gen_rotqby_ti (rot, from, offset));
5820}
5821
e96f2783 5822static void
5823spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5824{
e96f2783 5825 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5826 rtx shuf = gen_reg_rtx (V4SImode);
5827 rtx insn = gen_reg_rtx (V4SImode);
5828 rtx shufc;
5829 rtx insnc;
5830 rtx mem;
5831
5832 fnaddr = force_reg (SImode, fnaddr);
5833 cxt = force_reg (SImode, cxt);
5834
5835 if (TARGET_LARGE_MEM)
5836 {
5837 rtx rotl = gen_reg_rtx (V4SImode);
5838 rtx mask = gen_reg_rtx (V4SImode);
5839 rtx bi = gen_reg_rtx (SImode);
e96f2783 5840 static unsigned char const shufa[16] = {
644459d0 5841 2, 3, 0, 1, 18, 19, 16, 17,
5842 0, 1, 2, 3, 16, 17, 18, 19
5843 };
e96f2783 5844 static unsigned char const insna[16] = {
644459d0 5845 0x41, 0, 0, 79,
5846 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5847 0x60, 0x80, 0, 79,
5848 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5849 };
5850
5851 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5852 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5853
5854 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5855 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5856 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5857 emit_insn (gen_selb (insn, insnc, rotl, mask));
5858
e96f2783 5859 mem = adjust_address (m_tramp, V4SImode, 0);
5860 emit_move_insn (mem, insn);
644459d0 5861
5862 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5863 mem = adjust_address (m_tramp, Pmode, 16);
5864 emit_move_insn (mem, bi);
644459d0 5865 }
5866 else
5867 {
5868 rtx scxt = gen_reg_rtx (SImode);
5869 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5870 static unsigned char const insna[16] = {
644459d0 5871 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5872 0x30, 0, 0, 0,
5873 0, 0, 0, 0,
5874 0, 0, 0, 0
5875 };
5876
5877 shufc = gen_reg_rtx (TImode);
5878 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5879
5880 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5881 fits 18 bits and the last 4 are zeros. This will be true if
5882 the stack pointer is initialized to 0x3fff0 at program start,
5883 otherwise the ila instruction will be garbage. */
5884
5885 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5886 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5887 emit_insn (gen_cpat
5888 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5889 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5890 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5891
e96f2783 5892 mem = adjust_address (m_tramp, V4SImode, 0);
5893 emit_move_insn (mem, insn);
644459d0 5894 }
5895 emit_insn (gen_sync ());
5896}
5897
08c6cbd2 5898static bool
5899spu_warn_func_return (tree decl)
5900{
5901 /* Naked functions are implemented entirely in assembly, including the
5902 return sequence, so suppress warnings about this. */
5903 return !spu_naked_function_p (decl);
5904}
5905
644459d0 5906void
5907spu_expand_sign_extend (rtx ops[])
5908{
5909 unsigned char arr[16];
5910 rtx pat = gen_reg_rtx (TImode);
5911 rtx sign, c;
5912 int i, last;
5913 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5914 if (GET_MODE (ops[1]) == QImode)
5915 {
5916 sign = gen_reg_rtx (HImode);
5917 emit_insn (gen_extendqihi2 (sign, ops[1]));
5918 for (i = 0; i < 16; i++)
5919 arr[i] = 0x12;
5920 arr[last] = 0x13;
5921 }
5922 else
5923 {
5924 for (i = 0; i < 16; i++)
5925 arr[i] = 0x10;
5926 switch (GET_MODE (ops[1]))
5927 {
5928 case HImode:
5929 sign = gen_reg_rtx (SImode);
5930 emit_insn (gen_extendhisi2 (sign, ops[1]));
5931 arr[last] = 0x03;
5932 arr[last - 1] = 0x02;
5933 break;
5934 case SImode:
5935 sign = gen_reg_rtx (SImode);
5936 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5937 for (i = 0; i < 4; i++)
5938 arr[last - i] = 3 - i;
5939 break;
5940 case DImode:
5941 sign = gen_reg_rtx (SImode);
5942 c = gen_reg_rtx (SImode);
5943 emit_insn (gen_spu_convert (c, ops[1]));
5944 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5945 for (i = 0; i < 8; i++)
5946 arr[last - i] = 7 - i;
5947 break;
5948 default:
5949 abort ();
5950 }
5951 }
5952 emit_move_insn (pat, array_to_constant (TImode, arr));
5953 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5954}
5955
5956/* expand vector initialization. If there are any constant parts,
5957 load constant parts first. Then load any non-constant parts. */
5958void
5959spu_expand_vector_init (rtx target, rtx vals)
5960{
5961 enum machine_mode mode = GET_MODE (target);
5962 int n_elts = GET_MODE_NUNITS (mode);
5963 int n_var = 0;
5964 bool all_same = true;
790c536c 5965 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5966 int i;
5967
5968 first = XVECEXP (vals, 0, 0);
5969 for (i = 0; i < n_elts; ++i)
5970 {
5971 x = XVECEXP (vals, 0, i);
e442af0b 5972 if (!(CONST_INT_P (x)
5973 || GET_CODE (x) == CONST_DOUBLE
5974 || GET_CODE (x) == CONST_FIXED))
644459d0 5975 ++n_var;
5976 else
5977 {
5978 if (first_constant == NULL_RTX)
5979 first_constant = x;
5980 }
5981 if (i > 0 && !rtx_equal_p (x, first))
5982 all_same = false;
5983 }
5984
5985 /* if all elements are the same, use splats to repeat elements */
5986 if (all_same)
5987 {
5988 if (!CONSTANT_P (first)
5989 && !register_operand (first, GET_MODE (x)))
5990 first = force_reg (GET_MODE (first), first);
5991 emit_insn (gen_spu_splats (target, first));
5992 return;
5993 }
5994
5995 /* load constant parts */
5996 if (n_var != n_elts)
5997 {
5998 if (n_var == 0)
5999 {
6000 emit_move_insn (target,
6001 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6002 }
6003 else
6004 {
6005 rtx constant_parts_rtx = copy_rtx (vals);
6006
6007 gcc_assert (first_constant != NULL_RTX);
6008 /* fill empty slots with the first constant, this increases
6009 our chance of using splats in the recursive call below. */
6010 for (i = 0; i < n_elts; ++i)
e442af0b 6011 {
6012 x = XVECEXP (constant_parts_rtx, 0, i);
6013 if (!(CONST_INT_P (x)
6014 || GET_CODE (x) == CONST_DOUBLE
6015 || GET_CODE (x) == CONST_FIXED))
6016 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6017 }
644459d0 6018
6019 spu_expand_vector_init (target, constant_parts_rtx);
6020 }
6021 }
6022
6023 /* load variable parts */
6024 if (n_var != 0)
6025 {
6026 rtx insert_operands[4];
6027
6028 insert_operands[0] = target;
6029 insert_operands[2] = target;
6030 for (i = 0; i < n_elts; ++i)
6031 {
6032 x = XVECEXP (vals, 0, i);
e442af0b 6033 if (!(CONST_INT_P (x)
6034 || GET_CODE (x) == CONST_DOUBLE
6035 || GET_CODE (x) == CONST_FIXED))
644459d0 6036 {
6037 if (!register_operand (x, GET_MODE (x)))
6038 x = force_reg (GET_MODE (x), x);
6039 insert_operands[1] = x;
6040 insert_operands[3] = GEN_INT (i);
6041 spu_builtin_insert (insert_operands);
6042 }
6043 }
6044 }
6045}
6352eedf 6046
5474166e 6047/* Return insn index for the vector compare instruction for given CODE,
6048 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6049
6050static int
6051get_vec_cmp_insn (enum rtx_code code,
6052 enum machine_mode dest_mode,
6053 enum machine_mode op_mode)
6054
6055{
6056 switch (code)
6057 {
6058 case EQ:
6059 if (dest_mode == V16QImode && op_mode == V16QImode)
6060 return CODE_FOR_ceq_v16qi;
6061 if (dest_mode == V8HImode && op_mode == V8HImode)
6062 return CODE_FOR_ceq_v8hi;
6063 if (dest_mode == V4SImode && op_mode == V4SImode)
6064 return CODE_FOR_ceq_v4si;
6065 if (dest_mode == V4SImode && op_mode == V4SFmode)
6066 return CODE_FOR_ceq_v4sf;
6067 if (dest_mode == V2DImode && op_mode == V2DFmode)
6068 return CODE_FOR_ceq_v2df;
6069 break;
6070 case GT:
6071 if (dest_mode == V16QImode && op_mode == V16QImode)
6072 return CODE_FOR_cgt_v16qi;
6073 if (dest_mode == V8HImode && op_mode == V8HImode)
6074 return CODE_FOR_cgt_v8hi;
6075 if (dest_mode == V4SImode && op_mode == V4SImode)
6076 return CODE_FOR_cgt_v4si;
6077 if (dest_mode == V4SImode && op_mode == V4SFmode)
6078 return CODE_FOR_cgt_v4sf;
6079 if (dest_mode == V2DImode && op_mode == V2DFmode)
6080 return CODE_FOR_cgt_v2df;
6081 break;
6082 case GTU:
6083 if (dest_mode == V16QImode && op_mode == V16QImode)
6084 return CODE_FOR_clgt_v16qi;
6085 if (dest_mode == V8HImode && op_mode == V8HImode)
6086 return CODE_FOR_clgt_v8hi;
6087 if (dest_mode == V4SImode && op_mode == V4SImode)
6088 return CODE_FOR_clgt_v4si;
6089 break;
6090 default:
6091 break;
6092 }
6093 return -1;
6094}
6095
6096/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6097 DMODE is expected destination mode. This is a recursive function. */
6098
6099static rtx
6100spu_emit_vector_compare (enum rtx_code rcode,
6101 rtx op0, rtx op1,
6102 enum machine_mode dmode)
6103{
6104 int vec_cmp_insn;
6105 rtx mask;
6106 enum machine_mode dest_mode;
6107 enum machine_mode op_mode = GET_MODE (op1);
6108
6109 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6110
6111 /* Floating point vector compare instructions uses destination V4SImode.
6112 Double floating point vector compare instructions uses destination V2DImode.
6113 Move destination to appropriate mode later. */
6114 if (dmode == V4SFmode)
6115 dest_mode = V4SImode;
6116 else if (dmode == V2DFmode)
6117 dest_mode = V2DImode;
6118 else
6119 dest_mode = dmode;
6120
6121 mask = gen_reg_rtx (dest_mode);
6122 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6123
6124 if (vec_cmp_insn == -1)
6125 {
6126 bool swap_operands = false;
6127 bool try_again = false;
6128 switch (rcode)
6129 {
6130 case LT:
6131 rcode = GT;
6132 swap_operands = true;
6133 try_again = true;
6134 break;
6135 case LTU:
6136 rcode = GTU;
6137 swap_operands = true;
6138 try_again = true;
6139 break;
6140 case NE:
e20943d4 6141 case UNEQ:
6142 case UNLE:
6143 case UNLT:
6144 case UNGE:
6145 case UNGT:
6146 case UNORDERED:
5474166e 6147 /* Treat A != B as ~(A==B). */
6148 {
e20943d4 6149 enum rtx_code rev_code;
5474166e 6150 enum insn_code nor_code;
e20943d4 6151 rtx rev_mask;
6152
6153 rev_code = reverse_condition_maybe_unordered (rcode);
6154 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6155
d6bf3b14 6156 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6157 gcc_assert (nor_code != CODE_FOR_nothing);
e20943d4 6158 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
5474166e 6159 if (dmode != dest_mode)
6160 {
6161 rtx temp = gen_reg_rtx (dest_mode);
6162 convert_move (temp, mask, 0);
6163 return temp;
6164 }
6165 return mask;
6166 }
6167 break;
6168 case GE:
6169 case GEU:
6170 case LE:
6171 case LEU:
6172 /* Try GT/GTU/LT/LTU OR EQ */
6173 {
6174 rtx c_rtx, eq_rtx;
6175 enum insn_code ior_code;
6176 enum rtx_code new_code;
6177
6178 switch (rcode)
6179 {
6180 case GE: new_code = GT; break;
6181 case GEU: new_code = GTU; break;
6182 case LE: new_code = LT; break;
6183 case LEU: new_code = LTU; break;
6184 default:
6185 gcc_unreachable ();
6186 }
6187
6188 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6189 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6190
d6bf3b14 6191 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6192 gcc_assert (ior_code != CODE_FOR_nothing);
6193 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6194 if (dmode != dest_mode)
6195 {
6196 rtx temp = gen_reg_rtx (dest_mode);
6197 convert_move (temp, mask, 0);
6198 return temp;
6199 }
6200 return mask;
6201 }
6202 break;
e20943d4 6203 case LTGT:
6204 /* Try LT OR GT */
6205 {
6206 rtx lt_rtx, gt_rtx;
6207 enum insn_code ior_code;
6208
6209 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6210 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6211
6212 ior_code = optab_handler (ior_optab, dest_mode);
6213 gcc_assert (ior_code != CODE_FOR_nothing);
6214 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6215 if (dmode != dest_mode)
6216 {
6217 rtx temp = gen_reg_rtx (dest_mode);
6218 convert_move (temp, mask, 0);
6219 return temp;
6220 }
6221 return mask;
6222 }
6223 break;
6224 case ORDERED:
6225 /* Implement as (A==A) & (B==B) */
6226 {
6227 rtx a_rtx, b_rtx;
6228 enum insn_code and_code;
6229
6230 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6231 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6232
6233 and_code = optab_handler (and_optab, dest_mode);
6234 gcc_assert (and_code != CODE_FOR_nothing);
6235 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6236 if (dmode != dest_mode)
6237 {
6238 rtx temp = gen_reg_rtx (dest_mode);
6239 convert_move (temp, mask, 0);
6240 return temp;
6241 }
6242 return mask;
6243 }
6244 break;
5474166e 6245 default:
6246 gcc_unreachable ();
6247 }
6248
6249 /* You only get two chances. */
6250 if (try_again)
6251 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6252
6253 gcc_assert (vec_cmp_insn != -1);
6254
6255 if (swap_operands)
6256 {
6257 rtx tmp;
6258 tmp = op0;
6259 op0 = op1;
6260 op1 = tmp;
6261 }
6262 }
6263
6264 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6265 if (dmode != dest_mode)
6266 {
6267 rtx temp = gen_reg_rtx (dest_mode);
6268 convert_move (temp, mask, 0);
6269 return temp;
6270 }
6271 return mask;
6272}
6273
6274
6275/* Emit vector conditional expression.
6276 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6277 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6278
6279int
6280spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6281 rtx cond, rtx cc_op0, rtx cc_op1)
6282{
6283 enum machine_mode dest_mode = GET_MODE (dest);
6284 enum rtx_code rcode = GET_CODE (cond);
6285 rtx mask;
6286
6287 /* Get the vector mask for the given relational operations. */
6288 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6289
6290 emit_insn(gen_selb (dest, op2, op1, mask));
6291
6292 return 1;
6293}
6294
6352eedf 6295static rtx
6296spu_force_reg (enum machine_mode mode, rtx op)
6297{
6298 rtx x, r;
6299 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6300 {
6301 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6302 || GET_MODE (op) == BLKmode)
6303 return force_reg (mode, convert_to_mode (mode, op, 0));
6304 abort ();
6305 }
6306
6307 r = force_reg (GET_MODE (op), op);
6308 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6309 {
6310 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6311 if (x)
6312 return x;
6313 }
6314
6315 x = gen_reg_rtx (mode);
6316 emit_insn (gen_spu_convert (x, r));
6317 return x;
6318}
6319
6320static void
6321spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6322{
6323 HOST_WIDE_INT v = 0;
6324 int lsbits;
6325 /* Check the range of immediate operands. */
6326 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6327 {
6328 int range = p - SPU_BTI_7;
5df189be 6329
6330 if (!CONSTANT_P (op))
bf776685 6331 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6332 d->name,
6333 spu_builtin_range[range].low, spu_builtin_range[range].high);
6334
6335 if (GET_CODE (op) == CONST
6336 && (GET_CODE (XEXP (op, 0)) == PLUS
6337 || GET_CODE (XEXP (op, 0)) == MINUS))
6338 {
6339 v = INTVAL (XEXP (XEXP (op, 0), 1));
6340 op = XEXP (XEXP (op, 0), 0);
6341 }
6342 else if (GET_CODE (op) == CONST_INT)
6343 v = INTVAL (op);
5df189be 6344 else if (GET_CODE (op) == CONST_VECTOR
6345 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6346 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6347
6348 /* The default for v is 0 which is valid in every range. */
6349 if (v < spu_builtin_range[range].low
6350 || v > spu_builtin_range[range].high)
bf776685 6351 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6352 d->name,
6353 spu_builtin_range[range].low, spu_builtin_range[range].high,
6354 v);
6352eedf 6355
6356 switch (p)
6357 {
6358 case SPU_BTI_S10_4:
6359 lsbits = 4;
6360 break;
6361 case SPU_BTI_U16_2:
6362 /* This is only used in lqa, and stqa. Even though the insns
6363 encode 16 bits of the address (all but the 2 least
6364 significant), only 14 bits are used because it is masked to
6365 be 16 byte aligned. */
6366 lsbits = 4;
6367 break;
6368 case SPU_BTI_S16_2:
6369 /* This is used for lqr and stqr. */
6370 lsbits = 2;
6371 break;
6372 default:
6373 lsbits = 0;
6374 }
6375
6376 if (GET_CODE (op) == LABEL_REF
6377 || (GET_CODE (op) == SYMBOL_REF
6378 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6379 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6380 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6381 d->name);
6382 }
6383}
6384
6385
70ca06f8 6386static int
5df189be 6387expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6388 rtx target, rtx ops[])
6389{
bc620c5c 6390 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6391 int i = 0, a;
6352eedf 6392
6393 /* Expand the arguments into rtl. */
6394
6395 if (d->parm[0] != SPU_BTI_VOID)
6396 ops[i++] = target;
6397
70ca06f8 6398 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6399 {
5df189be 6400 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6401 if (arg == 0)
6402 abort ();
b9c74b4d 6403 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6404 }
70ca06f8 6405
32f79657 6406 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6407 return i;
6352eedf 6408}
6409
6410static rtx
6411spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6412 tree exp, rtx target)
6352eedf 6413{
6414 rtx pat;
6415 rtx ops[8];
bc620c5c 6416 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6417 enum machine_mode mode, tmode;
6418 int i, p;
70ca06f8 6419 int n_operands;
6352eedf 6420 tree return_type;
6421
6422 /* Set up ops[] with values from arglist. */
70ca06f8 6423 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6424
6425 /* Handle the target operand which must be operand 0. */
6426 i = 0;
6427 if (d->parm[0] != SPU_BTI_VOID)
6428 {
6429
6430 /* We prefer the mode specified for the match_operand otherwise
6431 use the mode from the builtin function prototype. */
6432 tmode = insn_data[d->icode].operand[0].mode;
6433 if (tmode == VOIDmode)
6434 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6435
6436 /* Try to use target because not using it can lead to extra copies
6437 and when we are using all of the registers extra copies leads
6438 to extra spills. */
6439 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6440 ops[0] = target;
6441 else
6442 target = ops[0] = gen_reg_rtx (tmode);
6443
6444 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6445 abort ();
6446
6447 i++;
6448 }
6449
a76866d3 6450 if (d->fcode == SPU_MASK_FOR_LOAD)
6451 {
6452 enum machine_mode mode = insn_data[icode].operand[1].mode;
6453 tree arg;
6454 rtx addr, op, pat;
6455
6456 /* get addr */
5df189be 6457 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6458 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6459 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6460 addr = memory_address (mode, op);
6461
6462 /* negate addr */
6463 op = gen_reg_rtx (GET_MODE (addr));
6464 emit_insn (gen_rtx_SET (VOIDmode, op,
6465 gen_rtx_NEG (GET_MODE (addr), addr)));
6466 op = gen_rtx_MEM (mode, op);
6467
6468 pat = GEN_FCN (icode) (target, op);
6469 if (!pat)
6470 return 0;
6471 emit_insn (pat);
6472 return target;
6473 }
6474
6352eedf 6475 /* Ignore align_hint, but still expand it's args in case they have
6476 side effects. */
6477 if (icode == CODE_FOR_spu_align_hint)
6478 return 0;
6479
6480 /* Handle the rest of the operands. */
70ca06f8 6481 for (p = 1; i < n_operands; i++, p++)
6352eedf 6482 {
6483 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6484 mode = insn_data[d->icode].operand[i].mode;
6485 else
6486 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6487
6488 /* mode can be VOIDmode here for labels */
6489
6490 /* For specific intrinsics with an immediate operand, e.g.,
6491 si_ai(), we sometimes need to convert the scalar argument to a
6492 vector argument by splatting the scalar. */
6493 if (VECTOR_MODE_P (mode)
6494 && (GET_CODE (ops[i]) == CONST_INT
6495 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6496 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6497 {
6498 if (GET_CODE (ops[i]) == CONST_INT)
6499 ops[i] = spu_const (mode, INTVAL (ops[i]));
6500 else
6501 {
6502 rtx reg = gen_reg_rtx (mode);
6503 enum machine_mode imode = GET_MODE_INNER (mode);
6504 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6505 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6506 if (imode != GET_MODE (ops[i]))
6507 ops[i] = convert_to_mode (imode, ops[i],
6508 TYPE_UNSIGNED (spu_builtin_types
6509 [d->parm[i]]));
6510 emit_insn (gen_spu_splats (reg, ops[i]));
6511 ops[i] = reg;
6512 }
6513 }
6514
5df189be 6515 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6516
6352eedf 6517 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6518 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6519 }
6520
70ca06f8 6521 switch (n_operands)
6352eedf 6522 {
6523 case 0:
6524 pat = GEN_FCN (icode) (0);
6525 break;
6526 case 1:
6527 pat = GEN_FCN (icode) (ops[0]);
6528 break;
6529 case 2:
6530 pat = GEN_FCN (icode) (ops[0], ops[1]);
6531 break;
6532 case 3:
6533 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6534 break;
6535 case 4:
6536 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6537 break;
6538 case 5:
6539 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6540 break;
6541 case 6:
6542 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6543 break;
6544 default:
6545 abort ();
6546 }
6547
6548 if (!pat)
6549 abort ();
6550
6551 if (d->type == B_CALL || d->type == B_BISLED)
6552 emit_call_insn (pat);
6553 else if (d->type == B_JUMP)
6554 {
6555 emit_jump_insn (pat);
6556 emit_barrier ();
6557 }
6558 else
6559 emit_insn (pat);
6560
6561 return_type = spu_builtin_types[d->parm[0]];
6562 if (d->parm[0] != SPU_BTI_VOID
6563 && GET_MODE (target) != TYPE_MODE (return_type))
6564 {
6565 /* target is the return value. It should always be the mode of
6566 the builtin function prototype. */
6567 target = spu_force_reg (TYPE_MODE (return_type), target);
6568 }
6569
6570 return target;
6571}
6572
6573rtx
6574spu_expand_builtin (tree exp,
6575 rtx target,
6576 rtx subtarget ATTRIBUTE_UNUSED,
6577 enum machine_mode mode ATTRIBUTE_UNUSED,
6578 int ignore ATTRIBUTE_UNUSED)
6579{
5df189be 6580 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6581 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6582 struct spu_builtin_description *d;
6583
6584 if (fcode < NUM_SPU_BUILTINS)
6585 {
6586 d = &spu_builtins[fcode];
6587
5df189be 6588 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6589 }
6590 abort ();
6591}
6592
a76866d3 6593/* Implement targetm.vectorize.builtin_mask_for_load. */
6594static tree
6595spu_builtin_mask_for_load (void)
6596{
0c5c4d59 6597 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6598}
5df189be 6599
a28df51d 6600/* Implement targetm.vectorize.builtin_vectorization_cost. */
6601static int
0822b158 6602spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
d13adc77 6603 tree vectype,
0822b158 6604 int misalign ATTRIBUTE_UNUSED)
559093aa 6605{
d13adc77 6606 unsigned elements;
6607
559093aa 6608 switch (type_of_cost)
6609 {
6610 case scalar_stmt:
6611 case vector_stmt:
6612 case vector_load:
6613 case vector_store:
6614 case vec_to_scalar:
6615 case scalar_to_vec:
6616 case cond_branch_not_taken:
6617 case vec_perm:
5df2530b 6618 case vec_promote_demote:
559093aa 6619 return 1;
6620
6621 case scalar_store:
6622 return 10;
6623
6624 case scalar_load:
6625 /* Load + rotate. */
6626 return 2;
6627
6628 case unaligned_load:
6629 return 2;
6630
6631 case cond_branch_taken:
6632 return 6;
6633
d13adc77 6634 case vec_construct:
6635 elements = TYPE_VECTOR_SUBPARTS (vectype);
6636 return elements / 2 + 1;
6637
559093aa 6638 default:
6639 gcc_unreachable ();
6640 }
a28df51d 6641}
6642
4db2b577 6643/* Implement targetm.vectorize.init_cost. */
6644
61b33788 6645static void *
4db2b577 6646spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6647{
f97dec81 6648 unsigned *cost = XNEWVEC (unsigned, 3);
6649 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
4db2b577 6650 return cost;
6651}
6652
6653/* Implement targetm.vectorize.add_stmt_cost. */
6654
61b33788 6655static unsigned
4db2b577 6656spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
f97dec81 6657 struct _stmt_vec_info *stmt_info, int misalign,
6658 enum vect_cost_model_location where)
4db2b577 6659{
6660 unsigned *cost = (unsigned *) data;
6661 unsigned retval = 0;
6662
6663 if (flag_vect_cost_model)
6664 {
f97dec81 6665 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4db2b577 6666 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6667
6668 /* Statements in an inner loop relative to the loop being
6669 vectorized are weighted more heavily. The value here is
6670 arbitrary and could potentially be improved with analysis. */
f97dec81 6671 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4db2b577 6672 count *= 50; /* FIXME. */
6673
6674 retval = (unsigned) (count * stmt_cost);
f97dec81 6675 cost[where] += retval;
4db2b577 6676 }
6677
6678 return retval;
6679}
6680
6681/* Implement targetm.vectorize.finish_cost. */
6682
f97dec81 6683static void
6684spu_finish_cost (void *data, unsigned *prologue_cost,
6685 unsigned *body_cost, unsigned *epilogue_cost)
4db2b577 6686{
f97dec81 6687 unsigned *cost = (unsigned *) data;
6688 *prologue_cost = cost[vect_prologue];
6689 *body_cost = cost[vect_body];
6690 *epilogue_cost = cost[vect_epilogue];
4db2b577 6691}
6692
6693/* Implement targetm.vectorize.destroy_cost_data. */
6694
61b33788 6695static void
4db2b577 6696spu_destroy_cost_data (void *data)
6697{
6698 free (data);
6699}
6700
0e87db76 6701/* Return true iff, data reference of TYPE can reach vector alignment (16)
6702 after applying N number of iterations. This routine does not determine
6703 how may iterations are required to reach desired alignment. */
6704
6705static bool
a9f1838b 6706spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6707{
6708 if (is_packed)
6709 return false;
6710
6711 /* All other types are naturally aligned. */
6712 return true;
6713}
6714
6cf5579e 6715/* Return the appropriate mode for a named address pointer. */
6716static enum machine_mode
6717spu_addr_space_pointer_mode (addr_space_t addrspace)
6718{
6719 switch (addrspace)
6720 {
6721 case ADDR_SPACE_GENERIC:
6722 return ptr_mode;
6723 case ADDR_SPACE_EA:
6724 return EAmode;
6725 default:
6726 gcc_unreachable ();
6727 }
6728}
6729
6730/* Return the appropriate mode for a named address address. */
6731static enum machine_mode
6732spu_addr_space_address_mode (addr_space_t addrspace)
6733{
6734 switch (addrspace)
6735 {
6736 case ADDR_SPACE_GENERIC:
6737 return Pmode;
6738 case ADDR_SPACE_EA:
6739 return EAmode;
6740 default:
6741 gcc_unreachable ();
6742 }
6743}
6744
6745/* Determine if one named address space is a subset of another. */
6746
6747static bool
6748spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6749{
6750 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6751 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6752
6753 if (subset == superset)
6754 return true;
6755
6756 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6757 being subsets but instead as disjoint address spaces. */
6758 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6759 return false;
6760
6761 else
6762 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6763}
6764
6765/* Convert from one address space to another. */
6766static rtx
6767spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6768{
6769 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6770 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6771
6772 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6773 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6774
6775 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6776 {
6777 rtx result, ls;
6778
6779 ls = gen_const_mem (DImode,
6780 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6781 set_mem_align (ls, 128);
6782
6783 result = gen_reg_rtx (Pmode);
6784 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6785 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6786 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6787 ls, const0_rtx, Pmode, 1);
6788
6789 emit_insn (gen_subsi3 (result, op, ls));
6790
6791 return result;
6792 }
6793
6794 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6795 {
6796 rtx result, ls;
6797
6798 ls = gen_const_mem (DImode,
6799 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6800 set_mem_align (ls, 128);
6801
6802 result = gen_reg_rtx (EAmode);
6803 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6804 op = force_reg (Pmode, op);
6805 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6806 ls, const0_rtx, EAmode, 1);
6807 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6808
6809 if (EAmode == SImode)
6810 emit_insn (gen_addsi3 (result, op, ls));
6811 else
6812 emit_insn (gen_adddi3 (result, op, ls));
6813
6814 return result;
6815 }
6816
6817 else
6818 gcc_unreachable ();
6819}
6820
6821
d52fd16a 6822/* Count the total number of instructions in each pipe and return the
6823 maximum, which is used as the Minimum Iteration Interval (MII)
6824 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6825 -2 are instructions that can go in pipe0 or pipe1. */
6826static int
6827spu_sms_res_mii (struct ddg *g)
6828{
6829 int i;
6830 unsigned t[4] = {0, 0, 0, 0};
6831
6832 for (i = 0; i < g->num_nodes; i++)
6833 {
6834 rtx insn = g->nodes[i].insn;
6835 int p = get_pipe (insn) + 2;
6836
1e944a0b 6837 gcc_assert (p >= 0);
6838 gcc_assert (p < 4);
d52fd16a 6839
6840 t[p]++;
6841 if (dump_file && INSN_P (insn))
6842 fprintf (dump_file, "i%d %s %d %d\n",
6843 INSN_UID (insn),
6844 insn_data[INSN_CODE(insn)].name,
6845 p, t[p]);
6846 }
6847 if (dump_file)
6848 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6849
6850 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6851}
6852
6853
5df189be 6854void
6855spu_init_expanders (void)
9d98604b 6856{
5df189be 6857 if (cfun)
9d98604b 6858 {
6859 rtx r0, r1;
6860 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6861 frame_pointer_needed is true. We don't know that until we're
6862 expanding the prologue. */
6863 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6864
6865 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6866 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6867 to be treated as aligned, so generate them here. */
6868 r0 = gen_reg_rtx (SImode);
6869 r1 = gen_reg_rtx (SImode);
6870 mark_reg_pointer (r0, 128);
6871 mark_reg_pointer (r1, 128);
6872 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6873 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6874 }
ea32e033 6875}
6876
6877static enum machine_mode
6878spu_libgcc_cmp_return_mode (void)
6879{
6880
6881/* For SPU word mode is TI mode so it is better to use SImode
6882 for compare returns. */
6883 return SImode;
6884}
6885
6886static enum machine_mode
6887spu_libgcc_shift_count_mode (void)
6888{
6889/* For SPU word mode is TI mode so it is better to use SImode
6890 for shift counts. */
6891 return SImode;
6892}
5a976006 6893
a08dfd55 6894/* Implement targetm.section_type_flags. */
6895static unsigned int
6896spu_section_type_flags (tree decl, const char *name, int reloc)
6897{
6898 /* .toe needs to have type @nobits. */
6899 if (strcmp (name, ".toe") == 0)
6900 return SECTION_BSS;
6cf5579e 6901 /* Don't load _ea into the current address space. */
6902 if (strcmp (name, "._ea") == 0)
6903 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6904 return default_section_type_flags (decl, name, reloc);
6905}
c2233b46 6906
6cf5579e 6907/* Implement targetm.select_section. */
6908static section *
6909spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6910{
6911 /* Variables and constants defined in the __ea address space
6912 go into a special section named "._ea". */
6913 if (TREE_TYPE (decl) != error_mark_node
6914 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6915 {
6916 /* We might get called with string constants, but get_named_section
6917 doesn't like them as they are not DECLs. Also, we need to set
6918 flags in that case. */
6919 if (!DECL_P (decl))
6920 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6921
6922 return get_named_section (decl, "._ea", reloc);
6923 }
6924
6925 return default_elf_select_section (decl, reloc, align);
6926}
6927
6928/* Implement targetm.unique_section. */
6929static void
6930spu_unique_section (tree decl, int reloc)
6931{
6932 /* We don't support unique section names in the __ea address
6933 space for now. */
6934 if (TREE_TYPE (decl) != error_mark_node
6935 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6936 return;
6937
6938 default_unique_section (decl, reloc);
6939}
6940
56c7bfc2 6941/* Generate a constant or register which contains 2^SCALE. We assume
6942 the result is valid for MODE. Currently, MODE must be V4SFmode and
6943 SCALE must be SImode. */
6944rtx
6945spu_gen_exp2 (enum machine_mode mode, rtx scale)
6946{
6947 gcc_assert (mode == V4SFmode);
6948 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6949 if (GET_CODE (scale) != CONST_INT)
6950 {
6951 /* unsigned int exp = (127 + scale) << 23;
6952 __vector float m = (__vector float) spu_splats (exp); */
6953 rtx reg = force_reg (SImode, scale);
6954 rtx exp = gen_reg_rtx (SImode);
6955 rtx mul = gen_reg_rtx (mode);
6956 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6957 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6958 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6959 return mul;
6960 }
6961 else
6962 {
6963 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6964 unsigned char arr[16];
6965 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6966 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6967 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6968 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6969 return array_to_constant (mode, arr);
6970 }
6971}
6972
9d98604b 6973/* After reload, just change the convert into a move instruction
6974 or a dead instruction. */
6975void
6976spu_split_convert (rtx ops[])
6977{
6978 if (REGNO (ops[0]) == REGNO (ops[1]))
6979 emit_note (NOTE_INSN_DELETED);
6980 else
6981 {
6982 /* Use TImode always as this might help hard reg copyprop. */
6983 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6984 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6985 emit_insn (gen_move_insn (op0, op1));
6986 }
6987}
6988
b3878a6c 6989void
4cbad5bb 6990spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 6991{
6992 fprintf (file, "# profile\n");
6993 fprintf (file, "brsl $75, _mcount\n");
6994}
6995
329c1e4e 6996/* Implement targetm.ref_may_alias_errno. */
6997static bool
6998spu_ref_may_alias_errno (ao_ref *ref)
6999{
7000 tree base = ao_ref_base (ref);
7001
7002 /* With SPU newlib, errno is defined as something like
7003 _impure_data._errno
7004 The default implementation of this target macro does not
7005 recognize such expressions, so special-code for it here. */
7006
7007 if (TREE_CODE (base) == VAR_DECL
7008 && !TREE_STATIC (base)
7009 && DECL_EXTERNAL (base)
7010 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7011 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7012 "_impure_data") == 0
7013 /* _errno is the first member of _impure_data. */
7014 && ref->offset == 0)
7015 return true;
7016
7017 return default_ref_may_alias_errno (ref);
7018}
7019
f17d2d13 7020/* Output thunk to FILE that implements a C++ virtual function call (with
7021 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7022 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7023 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7024 relative to the resulting this pointer. */
7025
7026static void
7027spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7028 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7029 tree function)
7030{
7031 rtx op[8];
7032
7033 /* Make sure unwind info is emitted for the thunk if needed. */
7034 final_start_function (emit_barrier (), file, 1);
7035
7036 /* Operand 0 is the target function. */
7037 op[0] = XEXP (DECL_RTL (function), 0);
7038
7039 /* Operand 1 is the 'this' pointer. */
7040 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7041 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7042 else
7043 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7044
7045 /* Operands 2/3 are the low/high halfwords of delta. */
7046 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7047 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7048
7049 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7050 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7051 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7052
7053 /* Operands 6/7 are temporary registers. */
7054 op[6] = gen_rtx_REG (Pmode, 79);
7055 op[7] = gen_rtx_REG (Pmode, 78);
7056
7057 /* Add DELTA to this pointer. */
7058 if (delta)
7059 {
7060 if (delta >= -0x200 && delta < 0x200)
7061 output_asm_insn ("ai\t%1,%1,%2", op);
7062 else if (delta >= -0x8000 && delta < 0x8000)
7063 {
7064 output_asm_insn ("il\t%6,%2", op);
7065 output_asm_insn ("a\t%1,%1,%6", op);
7066 }
7067 else
7068 {
7069 output_asm_insn ("ilhu\t%6,%3", op);
7070 output_asm_insn ("iohl\t%6,%2", op);
7071 output_asm_insn ("a\t%1,%1,%6", op);
7072 }
7073 }
7074
7075 /* Perform vcall adjustment. */
7076 if (vcall_offset)
7077 {
7078 output_asm_insn ("lqd\t%7,0(%1)", op);
7079 output_asm_insn ("rotqby\t%7,%7,%1", op);
7080
7081 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7082 output_asm_insn ("ai\t%7,%7,%4", op);
7083 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7084 {
7085 output_asm_insn ("il\t%6,%4", op);
7086 output_asm_insn ("a\t%7,%7,%6", op);
7087 }
7088 else
7089 {
7090 output_asm_insn ("ilhu\t%6,%5", op);
7091 output_asm_insn ("iohl\t%6,%4", op);
7092 output_asm_insn ("a\t%7,%7,%6", op);
7093 }
7094
7095 output_asm_insn ("lqd\t%6,0(%7)", op);
7096 output_asm_insn ("rotqby\t%6,%6,%7", op);
7097 output_asm_insn ("a\t%1,%1,%6", op);
7098 }
7099
7100 /* Jump to target. */
7101 output_asm_insn ("br\t%0", op);
7102
7103 final_end_function ();
7104}
7105
d5065e6e 7106/* Canonicalize a comparison from one we don't have to one we do have. */
7107static void
7108spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7109 bool op0_preserve_value)
7110{
7111 if (!op0_preserve_value
7112 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7113 {
7114 rtx tem = *op0;
7115 *op0 = *op1;
7116 *op1 = tem;
7117 *code = (int)swap_condition ((enum rtx_code)*code);
7118 }
7119}
3defb88e 7120\f
7121/* Table of machine attributes. */
7122static const struct attribute_spec spu_attribute_table[] =
7123{
7124 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7125 affects_type_identity } */
7126 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7127 false },
7128 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7129 false },
7130 { NULL, 0, 0, false, false, false, NULL, false }
7131};
7132
7133/* TARGET overrides. */
7134
7135#undef TARGET_ADDR_SPACE_POINTER_MODE
7136#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7137
7138#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7139#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7140
7141#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7142#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7143 spu_addr_space_legitimate_address_p
7144
7145#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7146#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7147
7148#undef TARGET_ADDR_SPACE_SUBSET_P
7149#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7150
7151#undef TARGET_ADDR_SPACE_CONVERT
7152#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7153
7154#undef TARGET_INIT_BUILTINS
7155#define TARGET_INIT_BUILTINS spu_init_builtins
7156#undef TARGET_BUILTIN_DECL
7157#define TARGET_BUILTIN_DECL spu_builtin_decl
7158
7159#undef TARGET_EXPAND_BUILTIN
7160#define TARGET_EXPAND_BUILTIN spu_expand_builtin
7161
7162#undef TARGET_UNWIND_WORD_MODE
7163#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7164
7165#undef TARGET_LEGITIMIZE_ADDRESS
7166#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7167
7168/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7169 and .quad for the debugger. When it is known that the assembler is fixed,
7170 these can be removed. */
7171#undef TARGET_ASM_UNALIGNED_SI_OP
7172#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7173
7174#undef TARGET_ASM_ALIGNED_DI_OP
7175#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7176
7177/* The .8byte directive doesn't seem to work well for a 32 bit
7178 architecture. */
7179#undef TARGET_ASM_UNALIGNED_DI_OP
7180#define TARGET_ASM_UNALIGNED_DI_OP NULL
7181
7182#undef TARGET_RTX_COSTS
7183#define TARGET_RTX_COSTS spu_rtx_costs
7184
7185#undef TARGET_ADDRESS_COST
d9c5e5f4 7186#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
3defb88e 7187
7188#undef TARGET_SCHED_ISSUE_RATE
7189#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7190
7191#undef TARGET_SCHED_INIT_GLOBAL
7192#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7193
7194#undef TARGET_SCHED_INIT
7195#define TARGET_SCHED_INIT spu_sched_init
7196
7197#undef TARGET_SCHED_VARIABLE_ISSUE
7198#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7199
7200#undef TARGET_SCHED_REORDER
7201#define TARGET_SCHED_REORDER spu_sched_reorder
7202
7203#undef TARGET_SCHED_REORDER2
7204#define TARGET_SCHED_REORDER2 spu_sched_reorder
7205
7206#undef TARGET_SCHED_ADJUST_COST
7207#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7208
7209#undef TARGET_ATTRIBUTE_TABLE
7210#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7211
7212#undef TARGET_ASM_INTEGER
7213#define TARGET_ASM_INTEGER spu_assemble_integer
7214
7215#undef TARGET_SCALAR_MODE_SUPPORTED_P
7216#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7217
7218#undef TARGET_VECTOR_MODE_SUPPORTED_P
7219#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7220
7221#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7222#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7223
7224#undef TARGET_ASM_GLOBALIZE_LABEL
7225#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7226
7227#undef TARGET_PASS_BY_REFERENCE
7228#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7229
7230#undef TARGET_FUNCTION_ARG
7231#define TARGET_FUNCTION_ARG spu_function_arg
7232
7233#undef TARGET_FUNCTION_ARG_ADVANCE
7234#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7235
7236#undef TARGET_MUST_PASS_IN_STACK
7237#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7238
7239#undef TARGET_BUILD_BUILTIN_VA_LIST
7240#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7241
7242#undef TARGET_EXPAND_BUILTIN_VA_START
7243#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7244
7245#undef TARGET_SETUP_INCOMING_VARARGS
7246#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7247
7248#undef TARGET_MACHINE_DEPENDENT_REORG
7249#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7250
7251#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7252#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7253
7254#undef TARGET_INIT_LIBFUNCS
7255#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7256
7257#undef TARGET_RETURN_IN_MEMORY
7258#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7259
7260#undef TARGET_ENCODE_SECTION_INFO
7261#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7262
7263#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7264#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7265
7266#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7267#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7268
7269#undef TARGET_VECTORIZE_INIT_COST
7270#define TARGET_VECTORIZE_INIT_COST spu_init_cost
7271
7272#undef TARGET_VECTORIZE_ADD_STMT_COST
7273#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7274
7275#undef TARGET_VECTORIZE_FINISH_COST
7276#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7277
7278#undef TARGET_VECTORIZE_DESTROY_COST_DATA
7279#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7280
7281#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7282#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7283
7284#undef TARGET_LIBGCC_CMP_RETURN_MODE
7285#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7286
7287#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7288#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7289
7290#undef TARGET_SCHED_SMS_RES_MII
7291#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7292
7293#undef TARGET_SECTION_TYPE_FLAGS
7294#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7295
7296#undef TARGET_ASM_SELECT_SECTION
7297#define TARGET_ASM_SELECT_SECTION spu_select_section
7298
7299#undef TARGET_ASM_UNIQUE_SECTION
7300#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7301
7302#undef TARGET_LEGITIMATE_ADDRESS_P
7303#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7304
7305#undef TARGET_LEGITIMATE_CONSTANT_P
7306#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7307
7308#undef TARGET_TRAMPOLINE_INIT
7309#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7310
08c6cbd2 7311#undef TARGET_WARN_FUNC_RETURN
7312#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7313
3defb88e 7314#undef TARGET_OPTION_OVERRIDE
7315#define TARGET_OPTION_OVERRIDE spu_option_override
7316
7317#undef TARGET_CONDITIONAL_REGISTER_USAGE
7318#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7319
7320#undef TARGET_REF_MAY_ALIAS_ERRNO
7321#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7322
7323#undef TARGET_ASM_OUTPUT_MI_THUNK
7324#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7325#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7326#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7327
7328/* Variable tracking should be run after all optimizations which
7329 change order of insns. It also needs a valid CFG. */
7330#undef TARGET_DELAY_VARTRACK
7331#define TARGET_DELAY_VARTRACK true
7332
d5065e6e 7333#undef TARGET_CANONICALIZE_COMPARISON
7334#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7335
5f35dd0e 7336#undef TARGET_CAN_USE_DOLOOP_P
7337#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7338
3defb88e 7339struct gcc_target targetm = TARGET_INITIALIZER;
7340
c2233b46 7341#include "gt-spu.h"