]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* configure.in: Add x86 darwin support for libjava.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
644459d0 1/* Copyright (C) 2006 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
16 02110-1301, USA. */
17
18#include "config.h"
19#include "system.h"
20#include "coretypes.h"
21#include "tm.h"
22#include "rtl.h"
23#include "regs.h"
24#include "hard-reg-set.h"
25#include "real.h"
26#include "insn-config.h"
27#include "conditions.h"
28#include "insn-attr.h"
29#include "flags.h"
30#include "recog.h"
31#include "obstack.h"
32#include "tree.h"
33#include "expr.h"
34#include "optabs.h"
35#include "except.h"
36#include "function.h"
37#include "output.h"
38#include "basic-block.h"
39#include "integrate.h"
40#include "toplev.h"
41#include "ggc.h"
42#include "hashtab.h"
43#include "tm_p.h"
44#include "target.h"
45#include "target-def.h"
46#include "langhooks.h"
47#include "reload.h"
48#include "cfglayout.h"
49#include "sched-int.h"
50#include "params.h"
51#include "assert.h"
52#include "c-common.h"
53#include "machmode.h"
54#include "tree-gimple.h"
55#include "tm-constrs.h"
56#include "spu-builtins.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
59struct spu_builtin_range
60{
61 int low, high;
62};
63
64static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77};
78
644459d0 79\f
80/* Target specific attribute specifications. */
81char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83/* Prototypes and external defs. */
84static void spu_init_builtins (void);
85static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88static rtx get_pic_reg (void);
89static int need_to_save_reg (int regno, int saving);
90static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94static void emit_nop_for_insn (rtx insn);
95static bool insn_clobbers_hbr (rtx insn);
96static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98static rtx get_branch_target (rtx branch);
99static void insert_branch_hints (void);
100static void insert_nops (void);
101static void spu_machine_dependent_reorg (void);
102static int spu_sched_issue_rate (void);
103static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
104 int can_issue_more);
105static int get_pipe (rtx insn);
106static int spu_sched_adjust_priority (rtx insn, int pri);
107static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
109 int flags,
110 unsigned char *no_add_attrs);
111static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
112 int flags,
113 unsigned char *no_add_attrs);
114static int spu_naked_function_p (tree func);
115static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117static tree spu_build_builtin_va_list (void);
118static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
119 tree * post_p);
120static int regno_aligned_for_load (int regno);
121static int store_with_one_insn_p (rtx mem);
122static int reg_align (rtx reg);
123static int mem_is_padded_component_ref (rtx x);
124static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125static void spu_asm_globalize_label (FILE * file, const char *name);
126static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
127 int *total);
128static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129static void spu_init_libfuncs (void);
130static bool spu_return_in_memory (tree type, tree fntype);
131
132extern const char *reg_names[];
133rtx spu_compare_op0, spu_compare_op1;
134
135enum spu_immediate {
136 SPU_NONE,
137 SPU_IL,
138 SPU_ILA,
139 SPU_ILH,
140 SPU_ILHU,
141 SPU_ORI,
142 SPU_ORHI,
143 SPU_ORBI,
99369027 144 SPU_IOHL
644459d0 145};
146
147static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
148static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
149
150/* Built in types. */
151tree spu_builtin_types[SPU_BTI_MAX];
152\f
153/* TARGET overrides. */
154
155#undef TARGET_INIT_BUILTINS
156#define TARGET_INIT_BUILTINS spu_init_builtins
157
644459d0 158#undef TARGET_EXPAND_BUILTIN
159#define TARGET_EXPAND_BUILTIN spu_expand_builtin
160
161#undef TARGET_EH_RETURN_FILTER_MODE
162#define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
163
164/* The .8byte directive doesn't seem to work well for a 32 bit
165 architecture. */
166#undef TARGET_ASM_UNALIGNED_DI_OP
167#define TARGET_ASM_UNALIGNED_DI_OP NULL
168
169#undef TARGET_RTX_COSTS
170#define TARGET_RTX_COSTS spu_rtx_costs
171
172#undef TARGET_ADDRESS_COST
173#define TARGET_ADDRESS_COST hook_int_rtx_0
174
175#undef TARGET_SCHED_ISSUE_RATE
176#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
177
178#undef TARGET_SCHED_VARIABLE_ISSUE
179#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
180
181#undef TARGET_SCHED_ADJUST_PRIORITY
182#define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
183
184#undef TARGET_SCHED_ADJUST_COST
185#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
186
187const struct attribute_spec spu_attribute_table[];
188#undef TARGET_ATTRIBUTE_TABLE
189#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
190
191#undef TARGET_ASM_INTEGER
192#define TARGET_ASM_INTEGER spu_assemble_integer
193
194#undef TARGET_SCALAR_MODE_SUPPORTED_P
195#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
196
197#undef TARGET_VECTOR_MODE_SUPPORTED_P
198#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
199
200#undef TARGET_FUNCTION_OK_FOR_SIBCALL
201#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
202
203#undef TARGET_ASM_GLOBALIZE_LABEL
204#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
205
206#undef TARGET_PASS_BY_REFERENCE
207#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
208
209#undef TARGET_MUST_PASS_IN_STACK
210#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
211
212#undef TARGET_BUILD_BUILTIN_VA_LIST
213#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
214
215#undef TARGET_SETUP_INCOMING_VARARGS
216#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
217
218#undef TARGET_MACHINE_DEPENDENT_REORG
219#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
220
221#undef TARGET_GIMPLIFY_VA_ARG_EXPR
222#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
223
224#undef TARGET_DEFAULT_TARGET_FLAGS
225#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
226
227#undef TARGET_INIT_LIBFUNCS
228#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
229
230#undef TARGET_RETURN_IN_MEMORY
231#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
232
233struct gcc_target targetm = TARGET_INITIALIZER;
234
644459d0 235/* Sometimes certain combinations of command options do not make sense
236 on a particular target machine. You can define a macro
237 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
238 executed once just after all the command options have been parsed. */
239void
240spu_override_options (void)
241{
242
644459d0 243 /* Override some of the default param values. With so many registers
244 larger values are better for these params. */
245 if (MAX_UNROLLED_INSNS == 100)
246 MAX_UNROLLED_INSNS = 250;
247 if (MAX_PENDING_LIST_LENGTH == 32)
248 MAX_PENDING_LIST_LENGTH = 128;
249
250 flag_omit_frame_pointer = 1;
251
252 if (align_functions < 8)
253 align_functions = 8;
254}
255\f
256/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
257 struct attribute_spec.handler. */
258
259/* Table of machine attributes. */
260const struct attribute_spec spu_attribute_table[] =
261{
262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
263 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
264 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
265 { NULL, 0, 0, false, false, false, NULL }
266};
267
268/* True if MODE is valid for the target. By "valid", we mean able to
269 be manipulated in non-trivial ways. In particular, this means all
270 the arithmetic is supported. */
271static bool
272spu_scalar_mode_supported_p (enum machine_mode mode)
273{
274 switch (mode)
275 {
276 case QImode:
277 case HImode:
278 case SImode:
279 case SFmode:
280 case DImode:
281 case TImode:
282 case DFmode:
283 return true;
284
285 default:
286 return false;
287 }
288}
289
290/* Similarly for vector modes. "Supported" here is less strict. At
291 least some operations are supported; need to check optabs or builtins
292 for further details. */
293static bool
294spu_vector_mode_supported_p (enum machine_mode mode)
295{
296 switch (mode)
297 {
298 case V16QImode:
299 case V8HImode:
300 case V4SImode:
301 case V2DImode:
302 case V4SFmode:
303 case V2DFmode:
304 return true;
305
306 default:
307 return false;
308 }
309}
310
311/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
312 least significant bytes of the outer mode. This function returns
313 TRUE for the SUBREG's where this is correct. */
314int
315valid_subreg (rtx op)
316{
317 enum machine_mode om = GET_MODE (op);
318 enum machine_mode im = GET_MODE (SUBREG_REG (op));
319 return om != VOIDmode && im != VOIDmode
320 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
321 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
322}
323
324/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 325 and adjust the start offset. */
644459d0 326static rtx
327adjust_operand (rtx op, HOST_WIDE_INT * start)
328{
329 enum machine_mode mode;
330 int op_size;
331 /* Strip any SUBREG */
332 if (GET_CODE (op) == SUBREG)
333 {
334 if (start)
335 *start -=
336 GET_MODE_BITSIZE (GET_MODE (op)) -
337 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
338 op = SUBREG_REG (op);
339 }
340 /* If it is smaller than SI, assure a SUBREG */
341 op_size = GET_MODE_BITSIZE (GET_MODE (op));
342 if (op_size < 32)
343 {
344 if (start)
345 *start += 32 - op_size;
346 op_size = 32;
347 }
348 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
349 mode = mode_for_size (op_size, MODE_INT, 0);
350 if (mode != GET_MODE (op))
351 op = gen_rtx_SUBREG (mode, op, 0);
352 return op;
353}
354
355void
356spu_expand_extv (rtx ops[], int unsignedp)
357{
358 HOST_WIDE_INT width = INTVAL (ops[2]);
359 HOST_WIDE_INT start = INTVAL (ops[3]);
360 HOST_WIDE_INT src_size, dst_size;
361 enum machine_mode src_mode, dst_mode;
362 rtx dst = ops[0], src = ops[1];
363 rtx s;
364
365 dst = adjust_operand (ops[0], 0);
366 dst_mode = GET_MODE (dst);
367 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
368
644459d0 369 src = adjust_operand (src, &start);
370 src_mode = GET_MODE (src);
371 src_size = GET_MODE_BITSIZE (GET_MODE (src));
372
373 if (start > 0)
374 {
375 s = gen_reg_rtx (src_mode);
376 switch (src_mode)
377 {
378 case SImode:
379 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
380 break;
381 case DImode:
382 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
383 break;
384 case TImode:
385 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
386 break;
387 default:
388 abort ();
389 }
390 src = s;
391 }
392
393 if (width < src_size)
394 {
395 rtx pat;
396 int icode;
397 switch (src_mode)
398 {
399 case SImode:
400 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
401 break;
402 case DImode:
403 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
404 break;
405 case TImode:
406 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
407 break;
408 default:
409 abort ();
410 }
411 s = gen_reg_rtx (src_mode);
412 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
413 emit_insn (pat);
414 src = s;
415 }
416
417 convert_move (dst, src, unsignedp);
418}
419
420void
421spu_expand_insv (rtx ops[])
422{
423 HOST_WIDE_INT width = INTVAL (ops[1]);
424 HOST_WIDE_INT start = INTVAL (ops[2]);
425 HOST_WIDE_INT maskbits;
426 enum machine_mode dst_mode, src_mode;
427 rtx dst = ops[0], src = ops[3];
428 int dst_size, src_size;
429 rtx mask;
430 rtx shift_reg;
431 int shift;
432
433
434 if (GET_CODE (ops[0]) == MEM)
435 dst = gen_reg_rtx (TImode);
436 else
437 dst = adjust_operand (dst, &start);
438 dst_mode = GET_MODE (dst);
439 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
440
441 if (CONSTANT_P (src))
442 {
443 enum machine_mode m =
444 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
445 src = force_reg (m, convert_to_mode (m, src, 0));
446 }
447 src = adjust_operand (src, 0);
448 src_mode = GET_MODE (src);
449 src_size = GET_MODE_BITSIZE (GET_MODE (src));
450
451 mask = gen_reg_rtx (dst_mode);
452 shift_reg = gen_reg_rtx (dst_mode);
453 shift = dst_size - start - width;
454
455 /* It's not safe to use subreg here because the compiler assumes
456 that the SUBREG_REG is right justified in the SUBREG. */
457 convert_move (shift_reg, src, 1);
458
459 if (shift > 0)
460 {
461 switch (dst_mode)
462 {
463 case SImode:
464 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
465 break;
466 case DImode:
467 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
468 break;
469 case TImode:
470 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
471 break;
472 default:
473 abort ();
474 }
475 }
476 else if (shift < 0)
477 abort ();
478
479 switch (dst_size)
480 {
481 case 32:
482 maskbits = (-1ll << (32 - width - start));
483 if (start)
484 maskbits += (1ll << (32 - start));
485 emit_move_insn (mask, GEN_INT (maskbits));
486 break;
487 case 64:
488 maskbits = (-1ll << (64 - width - start));
489 if (start)
490 maskbits += (1ll << (64 - start));
491 emit_move_insn (mask, GEN_INT (maskbits));
492 break;
493 case 128:
494 {
495 unsigned char arr[16];
496 int i = start / 8;
497 memset (arr, 0, sizeof (arr));
498 arr[i] = 0xff >> (start & 7);
499 for (i++; i <= (start + width - 1) / 8; i++)
500 arr[i] = 0xff;
501 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
502 emit_move_insn (mask, array_to_constant (TImode, arr));
503 }
504 break;
505 default:
506 abort ();
507 }
508 if (GET_CODE (ops[0]) == MEM)
509 {
510 rtx aligned = gen_reg_rtx (SImode);
511 rtx low = gen_reg_rtx (SImode);
512 rtx addr = gen_reg_rtx (SImode);
513 rtx rotl = gen_reg_rtx (SImode);
514 rtx mask0 = gen_reg_rtx (TImode);
515 rtx mem;
516
517 emit_move_insn (addr, XEXP (ops[0], 0));
518 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
519 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
520 emit_insn (gen_negsi2 (rotl, low));
521 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
522 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
523 mem = change_address (ops[0], TImode, aligned);
524 set_mem_alias_set (mem, 0);
525 emit_move_insn (dst, mem);
526 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
527 emit_move_insn (mem, dst);
528 if (start + width > MEM_ALIGN (ops[0]))
529 {
530 rtx shl = gen_reg_rtx (SImode);
531 rtx mask1 = gen_reg_rtx (TImode);
532 rtx dst1 = gen_reg_rtx (TImode);
533 rtx mem1;
534 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
535 emit_insn (gen_shlqby_ti (mask1, mask, shl));
536 mem1 = adjust_address (mem, TImode, 16);
537 set_mem_alias_set (mem1, 0);
538 emit_move_insn (dst1, mem1);
539 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
540 emit_move_insn (mem1, dst1);
541 }
542 }
543 else
544 emit_insn (gen_selb (dst, dst, shift_reg, mask));
545}
546
547
548int
549spu_expand_block_move (rtx ops[])
550{
551 HOST_WIDE_INT bytes, align, offset;
552 rtx src, dst, sreg, dreg, target;
553 int i;
554 if (GET_CODE (ops[2]) != CONST_INT
555 || GET_CODE (ops[3]) != CONST_INT
556 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
557 return 0;
558
559 bytes = INTVAL (ops[2]);
560 align = INTVAL (ops[3]);
561
562 if (bytes <= 0)
563 return 1;
564
565 dst = ops[0];
566 src = ops[1];
567
568 if (align == 16)
569 {
570 for (offset = 0; offset + 16 <= bytes; offset += 16)
571 {
572 dst = adjust_address (ops[0], V16QImode, offset);
573 src = adjust_address (ops[1], V16QImode, offset);
574 emit_move_insn (dst, src);
575 }
576 if (offset < bytes)
577 {
578 rtx mask;
579 unsigned char arr[16] = { 0 };
580 for (i = 0; i < bytes - offset; i++)
581 arr[i] = 0xff;
582 dst = adjust_address (ops[0], V16QImode, offset);
583 src = adjust_address (ops[1], V16QImode, offset);
584 mask = gen_reg_rtx (V16QImode);
585 sreg = gen_reg_rtx (V16QImode);
586 dreg = gen_reg_rtx (V16QImode);
587 target = gen_reg_rtx (V16QImode);
588 emit_move_insn (mask, array_to_constant (V16QImode, arr));
589 emit_move_insn (dreg, dst);
590 emit_move_insn (sreg, src);
591 emit_insn (gen_selb (target, dreg, sreg, mask));
592 emit_move_insn (dst, target);
593 }
594 return 1;
595 }
596 return 0;
597}
598
599enum spu_comp_code
600{ SPU_EQ, SPU_GT, SPU_GTU };
601
602
603int spu_comp_icode[8][3] = {
604 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
605 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
606 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
607 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
608 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
609 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
610 {0, 0, 0},
611 {CODE_FOR_ceq_vec, 0, 0},
612};
613
614/* Generate a compare for CODE. Return a brand-new rtx that represents
615 the result of the compare. GCC can figure this out too if we don't
616 provide all variations of compares, but GCC always wants to use
617 WORD_MODE, we can generate better code in most cases if we do it
618 ourselves. */
619void
620spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
621{
622 int reverse_compare = 0;
623 int reverse_test = 0;
624 rtx compare_result;
625 rtx comp_rtx;
626 rtx target = operands[0];
627 enum machine_mode comp_mode;
628 enum machine_mode op_mode;
629 enum spu_comp_code scode;
630 int index;
631
632 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
633 and so on, to keep the constant in operand 1. */
634 if (GET_CODE (spu_compare_op1) == CONST_INT)
635 {
636 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
637 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
638 switch (code)
639 {
640 case GE:
641 spu_compare_op1 = GEN_INT (val);
642 code = GT;
643 break;
644 case LT:
645 spu_compare_op1 = GEN_INT (val);
646 code = LE;
647 break;
648 case GEU:
649 spu_compare_op1 = GEN_INT (val);
650 code = GTU;
651 break;
652 case LTU:
653 spu_compare_op1 = GEN_INT (val);
654 code = LEU;
655 break;
656 default:
657 break;
658 }
659 }
660
661 switch (code)
662 {
663 case GE:
664 reverse_compare = 1;
665 reverse_test = 1;
666 scode = SPU_GT;
667 break;
668 case LE:
669 reverse_compare = 0;
670 reverse_test = 1;
671 scode = SPU_GT;
672 break;
673 case LT:
674 reverse_compare = 1;
675 reverse_test = 0;
676 scode = SPU_GT;
677 break;
678 case GEU:
679 reverse_compare = 1;
680 reverse_test = 1;
681 scode = SPU_GTU;
682 break;
683 case LEU:
684 reverse_compare = 0;
685 reverse_test = 1;
686 scode = SPU_GTU;
687 break;
688 case LTU:
689 reverse_compare = 1;
690 reverse_test = 0;
691 scode = SPU_GTU;
692 break;
693 case NE:
694 reverse_compare = 0;
695 reverse_test = 1;
696 scode = SPU_EQ;
697 break;
698
699 case EQ:
700 scode = SPU_EQ;
701 break;
702 case GT:
703 scode = SPU_GT;
704 break;
705 case GTU:
706 scode = SPU_GTU;
707 break;
708 default:
709 scode = SPU_EQ;
710 break;
711 }
712
713 comp_mode = SImode;
714 op_mode = GET_MODE (spu_compare_op0);
715
716 switch (op_mode)
717 {
718 case QImode:
719 index = 0;
720 comp_mode = QImode;
721 break;
722 case HImode:
723 index = 1;
724 comp_mode = HImode;
725 break;
726 case SImode:
727 index = 2;
728 break;
729 case DImode:
730 index = 3;
731 break;
732 case TImode:
733 index = 4;
734 break;
735 case SFmode:
736 index = 5;
737 break;
738 case DFmode:
739 index = 6;
740 break;
741 case V16QImode:
742 case V8HImode:
743 case V4SImode:
744 case V2DImode:
745 case V4SFmode:
746 case V2DFmode:
747 index = 7;
748 break;
749 default:
750 abort ();
751 }
752
753 if (GET_MODE (spu_compare_op1) == DFmode)
754 {
755 rtx reg = gen_reg_rtx (DFmode);
756 if (!flag_unsafe_math_optimizations
757 || (scode != SPU_GT && scode != SPU_EQ))
758 abort ();
759 if (reverse_compare)
760 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
761 else
762 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
763 reverse_compare = 0;
764 spu_compare_op0 = reg;
765 spu_compare_op1 = CONST0_RTX (DFmode);
766 }
767
768 if (is_set == 0 && spu_compare_op1 == const0_rtx
769 && (GET_MODE (spu_compare_op0) == SImode
770 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
771 {
772 /* Don't need to set a register with the result when we are
773 comparing against zero and branching. */
774 reverse_test = !reverse_test;
775 compare_result = spu_compare_op0;
776 }
777 else
778 {
779 compare_result = gen_reg_rtx (comp_mode);
780
781 if (reverse_compare)
782 {
783 rtx t = spu_compare_op1;
784 spu_compare_op1 = spu_compare_op0;
785 spu_compare_op0 = t;
786 }
787
788 if (spu_comp_icode[index][scode] == 0)
789 abort ();
790
791 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
792 (spu_compare_op0, op_mode))
793 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
794 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
795 (spu_compare_op1, op_mode))
796 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
797 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
798 spu_compare_op0,
799 spu_compare_op1);
800 if (comp_rtx == 0)
801 abort ();
802 emit_insn (comp_rtx);
803
804 }
805
806 if (is_set == 0)
807 {
808 rtx bcomp;
809 rtx loc_ref;
810
811 /* We don't have branch on QI compare insns, so we convert the
812 QI compare result to a HI result. */
813 if (comp_mode == QImode)
814 {
815 rtx old_res = compare_result;
816 compare_result = gen_reg_rtx (HImode);
817 comp_mode = HImode;
818 emit_insn (gen_extendqihi2 (compare_result, old_res));
819 }
820
821 if (reverse_test)
822 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
823 else
824 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
825
826 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
827 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
828 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
829 loc_ref, pc_rtx)));
830 }
831 else if (is_set == 2)
832 {
833 int compare_size = GET_MODE_BITSIZE (comp_mode);
834 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
835 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
836 rtx select_mask;
837 rtx op_t = operands[2];
838 rtx op_f = operands[3];
839
840 /* The result of the comparison can be SI, HI or QI mode. Create a
841 mask based on that result. */
842 if (target_size > compare_size)
843 {
844 select_mask = gen_reg_rtx (mode);
845 emit_insn (gen_extend_compare (select_mask, compare_result));
846 }
847 else if (target_size < compare_size)
848 select_mask =
849 gen_rtx_SUBREG (mode, compare_result,
850 (compare_size - target_size) / BITS_PER_UNIT);
851 else if (comp_mode != mode)
852 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
853 else
854 select_mask = compare_result;
855
856 if (GET_MODE (target) != GET_MODE (op_t)
857 || GET_MODE (target) != GET_MODE (op_f))
858 abort ();
859
860 if (reverse_test)
861 emit_insn (gen_selb (target, op_t, op_f, select_mask));
862 else
863 emit_insn (gen_selb (target, op_f, op_t, select_mask));
864 }
865 else
866 {
867 if (reverse_test)
868 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
869 gen_rtx_NOT (comp_mode, compare_result)));
870 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
871 emit_insn (gen_extendhisi2 (target, compare_result));
872 else if (GET_MODE (target) == SImode
873 && GET_MODE (compare_result) == QImode)
874 emit_insn (gen_extend_compare (target, compare_result));
875 else
876 emit_move_insn (target, compare_result);
877 }
878}
879
880HOST_WIDE_INT
881const_double_to_hwint (rtx x)
882{
883 HOST_WIDE_INT val;
884 REAL_VALUE_TYPE rv;
885 if (GET_MODE (x) == SFmode)
886 {
887 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
888 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
889 }
890 else if (GET_MODE (x) == DFmode)
891 {
892 long l[2];
893 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
894 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
895 val = l[0];
896 val = (val << 32) | (l[1] & 0xffffffff);
897 }
898 else
899 abort ();
900 return val;
901}
902
903rtx
904hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
905{
906 long tv[2];
907 REAL_VALUE_TYPE rv;
908 gcc_assert (mode == SFmode || mode == DFmode);
909
910 if (mode == SFmode)
911 tv[0] = (v << 32) >> 32;
912 else if (mode == DFmode)
913 {
914 tv[1] = (v << 32) >> 32;
915 tv[0] = v >> 32;
916 }
917 real_from_target (&rv, tv, mode);
918 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
919}
920
921void
922print_operand_address (FILE * file, register rtx addr)
923{
924 rtx reg;
925 rtx offset;
926
e04cf423 927 if (GET_CODE (addr) == AND
928 && GET_CODE (XEXP (addr, 1)) == CONST_INT
929 && INTVAL (XEXP (addr, 1)) == -16)
930 addr = XEXP (addr, 0);
931
644459d0 932 switch (GET_CODE (addr))
933 {
934 case REG:
935 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
936 break;
937
938 case PLUS:
939 reg = XEXP (addr, 0);
940 offset = XEXP (addr, 1);
941 if (GET_CODE (offset) == REG)
942 {
943 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
944 reg_names[REGNO (offset)]);
945 }
946 else if (GET_CODE (offset) == CONST_INT)
947 {
948 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
949 INTVAL (offset), reg_names[REGNO (reg)]);
950 }
951 else
952 abort ();
953 break;
954
955 case CONST:
956 case LABEL_REF:
957 case SYMBOL_REF:
958 case CONST_INT:
959 output_addr_const (file, addr);
960 break;
961
962 default:
963 debug_rtx (addr);
964 abort ();
965 }
966}
967
968void
969print_operand (FILE * file, rtx x, int code)
970{
971 enum machine_mode mode = GET_MODE (x);
972 HOST_WIDE_INT val;
973 unsigned char arr[16];
974 int xcode = GET_CODE (x);
975 if (GET_MODE (x) == VOIDmode)
976 switch (code)
977 {
978 case 'H': /* 128 bits, signed */
979 case 'L': /* 128 bits, signed */
980 case 'm': /* 128 bits, signed */
981 case 'T': /* 128 bits, signed */
982 case 't': /* 128 bits, signed */
983 mode = TImode;
984 break;
985 case 'G': /* 64 bits, signed */
986 case 'K': /* 64 bits, signed */
987 case 'k': /* 64 bits, signed */
988 case 'D': /* 64 bits, signed */
989 case 'd': /* 64 bits, signed */
990 mode = DImode;
991 break;
992 case 'F': /* 32 bits, signed */
993 case 'J': /* 32 bits, signed */
994 case 'j': /* 32 bits, signed */
995 case 's': /* 32 bits, signed */
996 case 'S': /* 32 bits, signed */
997 mode = SImode;
998 break;
999 }
1000 switch (code)
1001 {
1002
1003 case 'j': /* 32 bits, signed */
1004 case 'k': /* 64 bits, signed */
1005 case 'm': /* 128 bits, signed */
1006 if (xcode == CONST_INT
1007 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1008 {
1009 gcc_assert (logical_immediate_p (x, mode));
1010 constant_to_array (mode, x, arr);
1011 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1012 val = trunc_int_for_mode (val, SImode);
1013 switch (which_logical_immediate (val))
1014 {
1015 case SPU_ORI:
1016 break;
1017 case SPU_ORHI:
1018 fprintf (file, "h");
1019 break;
1020 case SPU_ORBI:
1021 fprintf (file, "b");
1022 break;
1023 default:
1024 gcc_unreachable();
1025 }
1026 }
1027 else
1028 gcc_unreachable();
1029 return;
1030
1031 case 'J': /* 32 bits, signed */
1032 case 'K': /* 64 bits, signed */
1033 case 'L': /* 128 bits, signed */
1034 if (xcode == CONST_INT
1035 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1036 {
1037 gcc_assert (logical_immediate_p (x, mode)
1038 || iohl_immediate_p (x, mode));
1039 constant_to_array (mode, x, arr);
1040 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1041 val = trunc_int_for_mode (val, SImode);
1042 switch (which_logical_immediate (val))
1043 {
1044 case SPU_ORI:
1045 case SPU_IOHL:
1046 break;
1047 case SPU_ORHI:
1048 val = trunc_int_for_mode (val, HImode);
1049 break;
1050 case SPU_ORBI:
1051 val = trunc_int_for_mode (val, QImode);
1052 break;
1053 default:
1054 gcc_unreachable();
1055 }
1056 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1057 }
1058 else
1059 gcc_unreachable();
1060 return;
1061
1062 case 't': /* 128 bits, signed */
1063 case 'd': /* 64 bits, signed */
1064 case 's': /* 32 bits, signed */
1065 if (xcode == CONST_INT
1066 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1067 {
1068 gcc_assert (immediate_load_p (x, mode));
1069 constant_to_array (mode, x, arr);
1070 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1071 val = trunc_int_for_mode (val, SImode);
1072 switch (which_immediate_load (val))
1073 {
1074 case SPU_IL:
1075 break;
1076 case SPU_ILA:
1077 fprintf (file, "a");
1078 break;
1079 case SPU_ILH:
1080 fprintf (file, "h");
1081 break;
1082 case SPU_ILHU:
1083 fprintf (file, "hu");
1084 break;
1085 default:
1086 gcc_unreachable();
1087 }
1088 }
1089 else if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1090 fprintf (file, "a");
c8befdb9 1091 else if (xcode == HIGH)
1092 fprintf (file, "hu");
644459d0 1093 else
1094 gcc_unreachable ();
1095 return;
1096
1097 case 'T': /* 128 bits, signed */
1098 case 'D': /* 64 bits, signed */
1099 case 'S': /* 32 bits, signed */
1100 if (xcode == CONST_INT
1101 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1102 {
1103 gcc_assert (immediate_load_p (x, mode));
1104 constant_to_array (mode, x, arr);
1105 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1106 val = trunc_int_for_mode (val, SImode);
1107 switch (which_immediate_load (val))
1108 {
1109 case SPU_IL:
1110 case SPU_ILA:
1111 break;
1112 case SPU_ILH:
1113 case SPU_ILHU:
1114 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1115 break;
1116 default:
1117 gcc_unreachable();
1118 }
1119 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1120 }
1121 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1122 output_addr_const (file, x);
c8befdb9 1123 else if (xcode == HIGH)
1124 {
1125 output_addr_const (file, XEXP (x, 0));
1126 fprintf (file, "@h");
1127 }
644459d0 1128 else
1129 gcc_unreachable ();
1130 return;
1131
1132 case 'F':
1133 case 'G':
1134 case 'H':
1135 if (xcode == CONST_INT
1136 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1137 { /* immediate operand for fsmbi */
1138 int i;
1139 HOST_WIDE_INT val = 0;
1140 unsigned char arr[16];
1141 constant_to_array (mode, x, arr);
1142 for (i = 0; i < 16; i++)
1143 {
1144 val <<= 1;
1145 val |= arr[i] & 1;
1146 }
1147 print_operand (file, GEN_INT (val), 0);
1148 }
1149 else
1150 gcc_unreachable();
1151 return;
1152
1153 case 'C':
1154 if (xcode == CONST_INT)
1155 {
1156 /* Only 4 least significant bits are relevant for generate
1157 control word instructions. */
1158 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1159 return;
1160 }
1161 break;
1162
1163 case 'M': /* print code for c*d */
1164 if (GET_CODE (x) == CONST_INT)
1165 switch (INTVAL (x))
1166 {
1167 case 1:
1168 fprintf (file, "b");
1169 break;
1170 case 2:
1171 fprintf (file, "h");
1172 break;
1173 case 4:
1174 fprintf (file, "w");
1175 break;
1176 case 8:
1177 fprintf (file, "d");
1178 break;
1179 default:
1180 gcc_unreachable();
1181 }
1182 else
1183 gcc_unreachable();
1184 return;
1185
1186 case 'N': /* Negate the operand */
1187 if (xcode == CONST_INT)
1188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1189 else if (xcode == CONST_VECTOR)
1190 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1191 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1192 return;
1193
1194 case 'I': /* enable/disable interrupts */
1195 if (xcode == CONST_INT)
1196 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1197 return;
1198
1199 case 'b': /* branch modifiers */
1200 if (xcode == REG)
1201 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1202 else if (COMPARISON_P (x))
1203 fprintf (file, "%s", xcode == NE ? "n" : "");
1204 return;
1205
1206 case 'i': /* indirect call */
1207 if (xcode == MEM)
1208 {
1209 if (GET_CODE (XEXP (x, 0)) == REG)
1210 /* Used in indirect function calls. */
1211 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1212 else
1213 output_address (XEXP (x, 0));
1214 }
1215 return;
1216
1217 case 'p': /* load/store */
1218 if (xcode == MEM)
1219 {
1220 x = XEXP (x, 0);
1221 xcode = GET_CODE (x);
1222 }
e04cf423 1223 if (xcode == AND)
1224 {
1225 x = XEXP (x, 0);
1226 xcode = GET_CODE (x);
1227 }
644459d0 1228 if (xcode == REG)
1229 fprintf (file, "d");
1230 else if (xcode == CONST_INT)
1231 fprintf (file, "a");
1232 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1233 fprintf (file, "r");
1234 else if (xcode == PLUS || xcode == LO_SUM)
1235 {
1236 if (GET_CODE (XEXP (x, 1)) == REG)
1237 fprintf (file, "x");
1238 else
1239 fprintf (file, "d");
1240 }
1241 return;
1242
1243 case 0:
1244 if (xcode == REG)
1245 fprintf (file, "%s", reg_names[REGNO (x)]);
1246 else if (xcode == MEM)
1247 output_address (XEXP (x, 0));
1248 else if (xcode == CONST_VECTOR)
1249 output_addr_const (file, CONST_VECTOR_ELT (x, 0));
1250 else
1251 output_addr_const (file, x);
1252 return;
1253
1254 default:
1255 output_operand_lossage ("invalid %%xn code");
1256 }
1257 gcc_unreachable ();
1258}
1259
1260extern char call_used_regs[];
1261extern char regs_ever_live[];
1262
1263/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1264 caller saved register. For leaf functions it is more efficient to
1265 use a volatile register because we won't need to save and restore the
1266 pic register. This routine is only valid after register allocation
1267 is completed, so we can pick an unused register. */
1268static rtx
1269get_pic_reg (void)
1270{
1271 rtx pic_reg = pic_offset_table_rtx;
1272 if (!reload_completed && !reload_in_progress)
1273 abort ();
1274 return pic_reg;
1275}
1276
c8befdb9 1277/* Split constant addresses to handle cases that are too large. Also, add in
1278 the pic register when in PIC mode. */
1279void
1280spu_split_address (rtx * ops)
1281{
1282 if (TARGET_LARGE_MEM
1283 || (GET_CODE (ops[1]) == CONST && !legitimate_const (ops[1], 0)))
1284 {
1285 emit_insn (gen_high (ops[0], ops[1]));
1286 emit_insn (gen_low (ops[0], ops[0], ops[1]));
1287 }
1288 else if (flag_pic)
1289 emit_insn (gen_pic (ops[0], ops[1]));
1290 if (flag_pic)
1291 {
1292 rtx pic_reg = get_pic_reg ();
1293 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1294 current_function_uses_pic_offset_table = 1;
1295 }
1296}
1297
644459d0 1298/* SAVING is TRUE when we are generating the actual load and store
1299 instructions for REGNO. When determining the size of the stack
1300 needed for saving register we must allocate enough space for the
1301 worst case, because we don't always have the information early enough
1302 to not allocate it. But we can at least eliminate the actual loads
1303 and stores during the prologue/epilogue. */
1304static int
1305need_to_save_reg (int regno, int saving)
1306{
1307 if (regs_ever_live[regno] && !call_used_regs[regno])
1308 return 1;
1309 if (flag_pic
1310 && regno == PIC_OFFSET_TABLE_REGNUM
1311 && (!saving || current_function_uses_pic_offset_table)
1312 && (!saving
1313 || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM]))
1314 return 1;
1315 return 0;
1316}
1317
1318/* This function is only correct starting with local register
1319 allocation */
1320int
1321spu_saved_regs_size (void)
1322{
1323 int reg_save_size = 0;
1324 int regno;
1325
1326 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1327 if (need_to_save_reg (regno, 0))
1328 reg_save_size += 0x10;
1329 return reg_save_size;
1330}
1331
1332static rtx
1333frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1334{
1335 rtx reg = gen_rtx_REG (V4SImode, regno);
1336 rtx mem =
1337 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1338 return emit_insn (gen_movv4si (mem, reg));
1339}
1340
1341static rtx
1342frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1343{
1344 rtx reg = gen_rtx_REG (V4SImode, regno);
1345 rtx mem =
1346 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1347 return emit_insn (gen_movv4si (reg, mem));
1348}
1349
1350/* This happens after reload, so we need to expand it. */
1351static rtx
1352frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1353{
1354 rtx insn;
1355 if (satisfies_constraint_K (GEN_INT (imm)))
1356 {
1357 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1358 }
1359 else
1360 {
1361 insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1362 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1363 REG_NOTES (insn));
1364 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1365 if (REGNO (src) == REGNO (scratch))
1366 abort ();
1367 }
1368 if (REGNO (dst) == REGNO (scratch))
1369 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1370 REG_NOTES (insn));
1371 return insn;
1372}
1373
1374/* Return nonzero if this function is known to have a null epilogue. */
1375
1376int
1377direct_return (void)
1378{
1379 if (reload_completed)
1380 {
1381 if (cfun->static_chain_decl == 0
1382 && (spu_saved_regs_size ()
1383 + get_frame_size ()
1384 + current_function_outgoing_args_size
1385 + current_function_pretend_args_size == 0)
1386 && current_function_is_leaf)
1387 return 1;
1388 }
1389 return 0;
1390}
1391
1392/*
1393 The stack frame looks like this:
1394 +-------------+
1395 | incoming |
1396 AP | args |
1397 +-------------+
1398 | $lr save |
1399 +-------------+
1400 prev SP | back chain |
1401 +-------------+
1402 | var args |
1403 | reg save | current_function_pretend_args_size bytes
1404 +-------------+
1405 | ... |
1406 | saved regs | spu_saved_regs_size() bytes
1407 +-------------+
1408 | ... |
1409 FP | vars | get_frame_size() bytes
1410 +-------------+
1411 | ... |
1412 | outgoing |
1413 | args | current_function_outgoing_args_size bytes
1414 +-------------+
1415 | $lr of next |
1416 | frame |
1417 +-------------+
1418 SP | back chain |
1419 +-------------+
1420
1421*/
1422void
1423spu_expand_prologue (void)
1424{
1425 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1426 HOST_WIDE_INT total_size;
1427 HOST_WIDE_INT saved_regs_size;
1428 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1429 rtx scratch_reg_0, scratch_reg_1;
1430 rtx insn, real;
1431
1432 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1433 the "toplevel" insn chain. */
1434 emit_note (NOTE_INSN_DELETED);
1435
1436 if (flag_pic && optimize == 0)
1437 current_function_uses_pic_offset_table = 1;
1438
1439 if (spu_naked_function_p (current_function_decl))
1440 return;
1441
1442 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1443 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1444
1445 saved_regs_size = spu_saved_regs_size ();
1446 total_size = size + saved_regs_size
1447 + current_function_outgoing_args_size
1448 + current_function_pretend_args_size;
1449
1450 if (!current_function_is_leaf
1451 || current_function_calls_alloca || total_size > 0)
1452 total_size += STACK_POINTER_OFFSET;
1453
1454 /* Save this first because code after this might use the link
1455 register as a scratch register. */
1456 if (!current_function_is_leaf)
1457 {
1458 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1459 RTX_FRAME_RELATED_P (insn) = 1;
1460 }
1461
1462 if (total_size > 0)
1463 {
1464 offset = -current_function_pretend_args_size;
1465 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1466 if (need_to_save_reg (regno, 1))
1467 {
1468 offset -= 16;
1469 insn = frame_emit_store (regno, sp_reg, offset);
1470 RTX_FRAME_RELATED_P (insn) = 1;
1471 }
1472 }
1473
1474 if (flag_pic && current_function_uses_pic_offset_table)
1475 {
1476 rtx pic_reg = get_pic_reg ();
1477 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1478 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1479 REG_NOTES (insn));
1480 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1481 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1482 REG_NOTES (insn));
1483 }
1484
1485 if (total_size > 0)
1486 {
1487 if (flag_stack_check)
1488 {
1489 /* We compare agains total_size-1 because
1490 ($sp >= total_size) <=> ($sp > total_size-1) */
1491 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1492 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1493 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1494 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1495 {
1496 emit_move_insn (scratch_v4si, size_v4si);
1497 size_v4si = scratch_v4si;
1498 }
1499 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1500 emit_insn (gen_vec_extractv4si
1501 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1502 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1503 }
1504
1505 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1506 the value of the previous $sp because we save it as the back
1507 chain. */
1508 if (total_size <= 2000)
1509 {
1510 /* In this case we save the back chain first. */
1511 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1512 RTX_FRAME_RELATED_P (insn) = 1;
1513 insn =
1514 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1515 }
1516 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1517 {
1518 insn = emit_move_insn (scratch_reg_0, sp_reg);
1519 RTX_FRAME_RELATED_P (insn) = 1;
1520 insn =
1521 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1522 }
1523 else
1524 {
1525 insn = emit_move_insn (scratch_reg_0, sp_reg);
1526 RTX_FRAME_RELATED_P (insn) = 1;
1527 insn =
1528 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1529 }
1530 RTX_FRAME_RELATED_P (insn) = 1;
1531 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1532 REG_NOTES (insn) =
1533 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1534
1535 if (total_size > 2000)
1536 {
1537 /* Save the back chain ptr */
1538 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1539 RTX_FRAME_RELATED_P (insn) = 1;
1540 }
1541
1542 if (frame_pointer_needed)
1543 {
1544 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1545 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1546 + current_function_outgoing_args_size;
1547 /* Set the new frame_pointer */
1548 frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1549 }
1550 }
1551
1552 emit_note (NOTE_INSN_DELETED);
1553}
1554
1555void
1556spu_expand_epilogue (bool sibcall_p)
1557{
1558 int size = get_frame_size (), offset, regno;
1559 HOST_WIDE_INT saved_regs_size, total_size;
1560 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1561 rtx jump, scratch_reg_0;
1562
1563 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1564 the "toplevel" insn chain. */
1565 emit_note (NOTE_INSN_DELETED);
1566
1567 if (spu_naked_function_p (current_function_decl))
1568 return;
1569
1570 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1571
1572 saved_regs_size = spu_saved_regs_size ();
1573 total_size = size + saved_regs_size
1574 + current_function_outgoing_args_size
1575 + current_function_pretend_args_size;
1576
1577 if (!current_function_is_leaf
1578 || current_function_calls_alloca || total_size > 0)
1579 total_size += STACK_POINTER_OFFSET;
1580
1581 if (total_size > 0)
1582 {
1583 if (current_function_calls_alloca)
1584 /* Load it from the back chain because our save_stack_block and
1585 restore_stack_block do nothing. */
1586 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1587 else
1588 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1589
1590
1591 if (saved_regs_size > 0)
1592 {
1593 offset = -current_function_pretend_args_size;
1594 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1595 if (need_to_save_reg (regno, 1))
1596 {
1597 offset -= 0x10;
1598 frame_emit_load (regno, sp_reg, offset);
1599 }
1600 }
1601 }
1602
1603 if (!current_function_is_leaf)
1604 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1605
1606 if (!sibcall_p)
1607 {
1608 emit_insn (gen_rtx_USE
1609 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1610 jump = emit_jump_insn (gen__return ());
1611 emit_barrier_after (jump);
1612 }
1613
1614 emit_note (NOTE_INSN_DELETED);
1615}
1616
1617rtx
1618spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1619{
1620 if (count != 0)
1621 return 0;
1622 /* This is inefficient because it ends up copying to a save-register
1623 which then gets saved even though $lr has already been saved. But
1624 it does generate better code for leaf functions and we don't need
1625 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1626 used for __builtin_return_address anyway, so maybe we don't care if
1627 it's inefficient. */
1628 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1629}
1630\f
1631
1632/* Given VAL, generate a constant appropriate for MODE.
1633 If MODE is a vector mode, every element will be VAL.
1634 For TImode, VAL will be zero extended to 128 bits. */
1635rtx
1636spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1637{
1638 rtx inner;
1639 rtvec v;
1640 int units, i;
1641
1642 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1643 || GET_MODE_CLASS (mode) == MODE_FLOAT
1644 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1645 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1646
1647 if (GET_MODE_CLASS (mode) == MODE_INT)
1648 return immed_double_const (val, 0, mode);
1649
1650 /* val is the bit representation of the float */
1651 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1652 return hwint_to_const_double (mode, val);
1653
1654 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1655 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1656 else
1657 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1658
1659 units = GET_MODE_NUNITS (mode);
1660
1661 v = rtvec_alloc (units);
1662
1663 for (i = 0; i < units; ++i)
1664 RTVEC_ELT (v, i) = inner;
1665
1666 return gen_rtx_CONST_VECTOR (mode, v);
1667}
1668\f
1669/* branch hint stuff */
1670
1671/* The hardware requires 8 insns between a hint and the branch it
1672 effects. This variable describes how many rtl instructions the
1673 compiler needs to see before inserting a hint. (FIXME: We should
1674 accept less and insert nops to enforce it because hinting is always
1675 profitable for performance, but we do need to be careful of code
1676 size.) */
1677int spu_hint_dist = (8 * 4);
1678
1679/* An array of these is used to propagate hints to predecessor blocks. */
1680struct spu_bb_info
1681{
fa7637bd 1682 rtx prop_jump; /* propagated from another block */
1683 basic_block bb; /* the original block. */
644459d0 1684};
1685
1686/* The special $hbr register is used to prevent the insn scheduler from
1687 moving hbr insns across instructions which invalidate them. It
1688 should only be used in a clobber, and this function searches for
1689 insns which clobber it. */
1690static bool
1691insn_clobbers_hbr (rtx insn)
1692{
1693 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1694 {
1695 rtx parallel = PATTERN (insn);
1696 rtx clobber;
1697 int j;
1698 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1699 {
1700 clobber = XVECEXP (parallel, 0, j);
1701 if (GET_CODE (clobber) == CLOBBER
1702 && GET_CODE (XEXP (clobber, 0)) == REG
1703 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1704 return 1;
1705 }
1706 }
1707 return 0;
1708}
1709
1710static void
1711spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1712{
1713 rtx branch_label;
1714 rtx hint, insn, prev, next;
1715
1716 if (before == 0 || branch == 0 || target == 0)
1717 return;
1718
1719 if (distance > 600)
1720 return;
1721
1722
1723 branch_label = gen_label_rtx ();
1724 LABEL_NUSES (branch_label)++;
1725 LABEL_PRESERVE_P (branch_label) = 1;
1726 insn = emit_label_before (branch_label, branch);
1727 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1728
1729 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1730 the current insn is pipe0, dual issue with it. */
1731 prev = prev_active_insn (before);
1732 if (prev && get_pipe (prev) == 0)
1733 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1734 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1735 {
1736 next = next_active_insn (before);
1737 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1738 if (next)
1739 PUT_MODE (next, TImode);
1740 }
1741 else
1742 {
1743 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1744 PUT_MODE (hint, TImode);
1745 }
1746 recog_memoized (hint);
1747}
1748
1749/* Returns 0 if we don't want a hint for this branch. Otherwise return
1750 the rtx for the branch target. */
1751static rtx
1752get_branch_target (rtx branch)
1753{
1754 if (GET_CODE (branch) == JUMP_INSN)
1755 {
1756 rtx set, src;
1757
1758 /* Return statements */
1759 if (GET_CODE (PATTERN (branch)) == RETURN)
1760 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1761
1762 /* jump table */
1763 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1764 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1765 return 0;
1766
1767 set = single_set (branch);
1768 src = SET_SRC (set);
1769 if (GET_CODE (SET_DEST (set)) != PC)
1770 abort ();
1771
1772 if (GET_CODE (src) == IF_THEN_ELSE)
1773 {
1774 rtx lab = 0;
1775 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1776 if (note)
1777 {
1778 /* If the more probable case is not a fall through, then
1779 try a branch hint. */
1780 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1781 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1782 && GET_CODE (XEXP (src, 1)) != PC)
1783 lab = XEXP (src, 1);
1784 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1785 && GET_CODE (XEXP (src, 2)) != PC)
1786 lab = XEXP (src, 2);
1787 }
1788 if (lab)
1789 {
1790 if (GET_CODE (lab) == RETURN)
1791 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1792 return lab;
1793 }
1794 return 0;
1795 }
1796
1797 return src;
1798 }
1799 else if (GET_CODE (branch) == CALL_INSN)
1800 {
1801 rtx call;
1802 /* All of our call patterns are in a PARALLEL and the CALL is
1803 the first pattern in the PARALLEL. */
1804 if (GET_CODE (PATTERN (branch)) != PARALLEL)
1805 abort ();
1806 call = XVECEXP (PATTERN (branch), 0, 0);
1807 if (GET_CODE (call) == SET)
1808 call = SET_SRC (call);
1809 if (GET_CODE (call) != CALL)
1810 abort ();
1811 return XEXP (XEXP (call, 0), 0);
1812 }
1813 return 0;
1814}
1815
1816static void
1817insert_branch_hints (void)
1818{
1819 struct spu_bb_info *spu_bb_info;
1820 rtx branch, insn, next;
1821 rtx branch_target = 0;
1822 int branch_addr = 0, insn_addr, head_addr;
1823 basic_block bb;
1824 unsigned int j;
1825
1826 spu_bb_info =
1827 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
1828 sizeof (struct spu_bb_info));
1829
1830 /* We need exact insn addresses and lengths. */
1831 shorten_branches (get_insns ());
1832
1833 FOR_EACH_BB_REVERSE (bb)
1834 {
1835 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
1836 branch = 0;
1837 if (spu_bb_info[bb->index].prop_jump)
1838 {
1839 branch = spu_bb_info[bb->index].prop_jump;
1840 branch_target = get_branch_target (branch);
1841 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
1842 }
1843 /* Search from end of a block to beginning. In this loop, find
1844 jumps which need a branch and emit them only when:
1845 - it's an indirect branch and we're at the insn which sets
1846 the register
1847 - we're at an insn that will invalidate the hint. e.g., a
1848 call, another hint insn, inline asm that clobbers $hbr, and
1849 some inlined operations (divmodsi4). Don't consider jumps
1850 because they are only at the end of a block and are
1851 considered when we are deciding whether to propagate
1852 - we're getting too far away from the branch. The hbr insns
1853 only have a signed 10 bit offset
1854 We go back as far as possible so the branch will be considered
1855 for propagation when we get to the beginning of the block. */
1856 next = 0;
1857 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
1858 {
1859 if (INSN_P (insn))
1860 {
1861 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
1862 if (branch && next
1863 && ((GET_CODE (branch_target) == REG
1864 && set_of (branch_target, insn) != NULL_RTX)
1865 || insn_clobbers_hbr (insn)
1866 || branch_addr - insn_addr > 600))
1867 {
1868 int next_addr = INSN_ADDRESSES (INSN_UID (next));
1869 if (insn != BB_END (bb)
1870 && branch_addr - next_addr >= spu_hint_dist)
1871 {
1872 if (dump_file)
1873 fprintf (dump_file,
1874 "hint for %i in block %i before %i\n",
1875 INSN_UID (branch), bb->index, INSN_UID (next));
1876 spu_emit_branch_hint (next, branch, branch_target,
1877 branch_addr - next_addr);
1878 }
1879 branch = 0;
1880 }
1881
1882 /* JUMP_P will only be true at the end of a block. When
1883 branch is already set it means we've previously decided
1884 to propagate a hint for that branch into this block. */
1885 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
1886 {
1887 branch = 0;
1888 if ((branch_target = get_branch_target (insn)))
1889 {
1890 branch = insn;
1891 branch_addr = insn_addr;
1892 }
1893 }
1894
1895 /* When a branch hint is emitted it will be inserted
1896 before "next". Make sure next is the beginning of a
1897 cycle to minimize impact on the scheduled insns. */
1898 if (GET_MODE (insn) == TImode)
1899 next = insn;
1900 }
1901 if (insn == BB_HEAD (bb))
1902 break;
1903 }
1904
1905 if (branch)
1906 {
1907 /* If we haven't emitted a hint for this branch yet, it might
1908 be profitable to emit it in one of the predecessor blocks,
1909 especially for loops. */
1910 rtx bbend;
1911 basic_block prev = 0, prop = 0, prev2 = 0;
1912 int loop_exit = 0, simple_loop = 0;
1913 int next_addr = 0;
1914 if (next)
1915 next_addr = INSN_ADDRESSES (INSN_UID (next));
1916
1917 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
1918 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
1919 prev = EDGE_PRED (bb, j)->src;
1920 else
1921 prev2 = EDGE_PRED (bb, j)->src;
1922
1923 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
1924 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
1925 loop_exit = 1;
1926 else if (EDGE_SUCC (bb, j)->dest == bb)
1927 simple_loop = 1;
1928
1929 /* If this branch is a loop exit then propagate to previous
1930 fallthru block. This catches the cases when it is a simple
1931 loop or when there is an initial branch into the loop. */
1932 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
1933 prop = prev;
1934
1935 /* If there is only one adjacent predecessor. Don't propagate
1936 outside this loop. This loop_depth test isn't perfect, but
1937 I'm not sure the loop_father member is valid at this point. */
1938 else if (prev && single_pred_p (bb)
1939 && prev->loop_depth == bb->loop_depth)
1940 prop = prev;
1941
1942 /* If this is the JOIN block of a simple IF-THEN then
1943 propogate the hint to the HEADER block. */
1944 else if (prev && prev2
1945 && EDGE_COUNT (bb->preds) == 2
1946 && EDGE_COUNT (prev->preds) == 1
1947 && EDGE_PRED (prev, 0)->src == prev2
1948 && prev2->loop_depth == bb->loop_depth
1949 && GET_CODE (branch_target) != REG)
1950 prop = prev;
1951
1952 /* Don't propagate when:
1953 - this is a simple loop and the hint would be too far
1954 - this is not a simple loop and there are 16 insns in
1955 this block already
1956 - the predecessor block ends in a branch that will be
1957 hinted
1958 - the predecessor block ends in an insn that invalidates
1959 the hint */
1960 if (prop
1961 && prop->index >= 0
1962 && (bbend = BB_END (prop))
1963 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
1964 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
1965 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
1966 {
1967 if (dump_file)
1968 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
1969 "for %i (loop_exit %i simple_loop %i dist %i)\n",
1970 bb->index, prop->index, bb->loop_depth,
1971 INSN_UID (branch), loop_exit, simple_loop,
1972 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
1973
1974 spu_bb_info[prop->index].prop_jump = branch;
1975 spu_bb_info[prop->index].bb = bb;
1976 }
1977 else if (next && branch_addr - next_addr >= spu_hint_dist)
1978 {
1979 if (dump_file)
1980 fprintf (dump_file, "hint for %i in block %i before %i\n",
1981 INSN_UID (branch), bb->index, INSN_UID (next));
1982 spu_emit_branch_hint (next, branch, branch_target,
1983 branch_addr - next_addr);
1984 }
1985 branch = 0;
1986 }
1987 }
1988 free (spu_bb_info);
1989}
1990\f
1991/* Emit a nop for INSN such that the two will dual issue. This assumes
1992 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1993 We check for TImode to handle a MULTI1 insn which has dual issued its
1994 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
1995 ADDR_VEC insns. */
1996static void
1997emit_nop_for_insn (rtx insn)
1998{
1999 int p;
2000 rtx new_insn;
2001 p = get_pipe (insn);
2002 if (p == 1 && GET_MODE (insn) == TImode)
2003 {
2004 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2005 PUT_MODE (new_insn, TImode);
2006 PUT_MODE (insn, VOIDmode);
2007 }
2008 else
2009 new_insn = emit_insn_after (gen_lnop (), insn);
2010}
2011
2012/* Insert nops in basic blocks to meet dual issue alignment
2013 requirements. */
2014static void
2015insert_nops (void)
2016{
2017 rtx insn, next_insn, prev_insn;
2018 int length;
2019 int addr;
2020
2021 /* This sets up INSN_ADDRESSES. */
2022 shorten_branches (get_insns ());
2023
2024 /* Keep track of length added by nops. */
2025 length = 0;
2026
2027 prev_insn = 0;
2028 for (insn = get_insns (); insn; insn = next_insn)
2029 {
2030 next_insn = next_active_insn (insn);
2031 addr = INSN_ADDRESSES (INSN_UID (insn));
2032 if (GET_MODE (insn) == TImode
2033 && next_insn
2034 && GET_MODE (next_insn) != TImode
2035 && ((addr + length) & 7) != 0)
2036 {
2037 /* prev_insn will always be set because the first insn is
2038 always 8-byte aligned. */
2039 emit_nop_for_insn (prev_insn);
2040 length += 4;
2041 }
2042 prev_insn = insn;
2043 }
2044}
2045
2046static void
2047spu_machine_dependent_reorg (void)
2048{
2049 if (optimize > 0)
2050 {
2051 if (TARGET_BRANCH_HINTS)
2052 insert_branch_hints ();
2053 insert_nops ();
2054 }
2055}
2056\f
2057
2058/* Insn scheduling routines, primarily for dual issue. */
2059static int
2060spu_sched_issue_rate (void)
2061{
2062 return 2;
2063}
2064
2065static int
2066spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2067 int verbose ATTRIBUTE_UNUSED, rtx insn,
2068 int can_issue_more)
2069{
2070 if (GET_CODE (PATTERN (insn)) != USE
2071 && GET_CODE (PATTERN (insn)) != CLOBBER
2072 && get_pipe (insn) != -2)
2073 can_issue_more--;
2074 return can_issue_more;
2075}
2076
2077static int
2078get_pipe (rtx insn)
2079{
2080 enum attr_type t;
2081 /* Handle inline asm */
2082 if (INSN_CODE (insn) == -1)
2083 return -1;
2084 t = get_attr_type (insn);
2085 switch (t)
2086 {
2087 case TYPE_CONVERT:
2088 return -2;
2089 case TYPE_MULTI0:
2090 return -1;
2091
2092 case TYPE_FX2:
2093 case TYPE_FX3:
2094 case TYPE_SPR:
2095 case TYPE_NOP:
2096 case TYPE_FXB:
2097 case TYPE_FPD:
2098 case TYPE_FP6:
2099 case TYPE_FP7:
2100 case TYPE_IPREFETCH:
2101 return 0;
2102
2103 case TYPE_LNOP:
2104 case TYPE_SHUF:
2105 case TYPE_LOAD:
2106 case TYPE_STORE:
2107 case TYPE_BR:
2108 case TYPE_MULTI1:
2109 case TYPE_HBR:
2110 return 1;
2111 default:
2112 abort ();
2113 }
2114}
2115
2116static int
2117spu_sched_adjust_priority (rtx insn, int pri)
2118{
2119 int p = get_pipe (insn);
2120 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2121 * scheduling. */
2122 if (GET_CODE (PATTERN (insn)) == USE
2123 || GET_CODE (PATTERN (insn)) == CLOBBER
2124 || p == -2)
2125 return pri + 100;
2126 /* Schedule pipe0 insns early for greedier dual issue. */
2127 if (p != 1)
2128 return pri + 50;
2129 return pri;
2130}
2131
2132/* INSN is dependent on DEP_INSN. */
2133static int
2134spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2135 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2136{
2137 if (GET_CODE (insn) == CALL_INSN)
2138 return cost - 2;
2139 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2140 scheduler makes every insn in a block anti-dependent on the final
2141 jump_insn. We adjust here so higher cost insns will get scheduled
2142 earlier. */
2143 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2144 return INSN_COST (dep_insn) - 3;
2145 return cost;
2146}
2147\f
2148/* Create a CONST_DOUBLE from a string. */
2149struct rtx_def *
2150spu_float_const (const char *string, enum machine_mode mode)
2151{
2152 REAL_VALUE_TYPE value;
2153 value = REAL_VALUE_ATOF (string, mode);
2154 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2155}
2156
2157/* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2158 CONST_INT fits constraint 'K', i.e., is small. */
2159int
2160legitimate_const (rtx x, int aligned)
2161{
2162 /* We can never know if the resulting address fits in 18 bits and can be
2163 loaded with ila. Instead we should use the HI and LO relocations to
2164 load a 32 bit address. */
2165 rtx sym, cst;
2166
2167 gcc_assert (GET_CODE (x) == CONST);
2168
2169 if (GET_CODE (XEXP (x, 0)) != PLUS)
2170 return 0;
2171 sym = XEXP (XEXP (x, 0), 0);
2172 cst = XEXP (XEXP (x, 0), 1);
2173 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2174 return 0;
2175 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2176 return 0;
2177 return satisfies_constraint_K (cst);
2178}
2179
2180int
2181spu_constant_address_p (rtx x)
2182{
2183 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2184 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2185 || GET_CODE (x) == HIGH);
2186}
2187
2188static enum spu_immediate
2189which_immediate_load (HOST_WIDE_INT val)
2190{
2191 gcc_assert (val == trunc_int_for_mode (val, SImode));
2192
2193 if (val >= -0x8000 && val <= 0x7fff)
2194 return SPU_IL;
2195 if (val >= 0 && val <= 0x3ffff)
2196 return SPU_ILA;
2197 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2198 return SPU_ILH;
2199 if ((val & 0xffff) == 0)
2200 return SPU_ILHU;
2201
2202 return SPU_NONE;
2203}
2204
2205int
2206immediate_load_p (rtx op, enum machine_mode mode)
2207{
2208 HOST_WIDE_INT val;
2209 unsigned char arr[16];
2210 int i, j;
2211 if (GET_MODE (op) != VOIDmode)
2212 mode = GET_MODE (op);
2213
2214 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2215 || GET_CODE (op) == CONST_VECTOR);
2216
2217 /* V4SI with all identical symbols is valid. */
2218 if (mode == V4SImode
2219 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == SYMBOL_REF)
2220 return !TARGET_LARGE_MEM && !flag_pic
2221 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2222 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2223 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3);
2224
2225 constant_to_array (mode, op, arr);
2226
2227 /* Check that bytes are repeated. */
2228 for (i = 4; i < 16; i += 4)
2229 for (j = 0; j < 4; j++)
2230 if (arr[j] != arr[i + j])
2231 return 0;
2232
2233 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2234 val = trunc_int_for_mode (val, SImode);
2235
2236 return which_immediate_load (val) != SPU_NONE;
2237}
2238
2239static enum spu_immediate
2240which_logical_immediate (HOST_WIDE_INT val)
2241{
2242 gcc_assert (val == trunc_int_for_mode (val, SImode));
2243
2244 if (val >= -0x200 && val <= 0x1ff)
2245 return SPU_ORI;
2246 if (val >= 0 && val <= 0xffff)
2247 return SPU_IOHL;
2248 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2249 {
2250 val = trunc_int_for_mode (val, HImode);
2251 if (val >= -0x200 && val <= 0x1ff)
2252 return SPU_ORHI;
2253 if ((val & 0xff) == ((val >> 8) & 0xff))
2254 {
2255 val = trunc_int_for_mode (val, QImode);
2256 if (val >= -0x200 && val <= 0x1ff)
2257 return SPU_ORBI;
2258 }
2259 }
2260 return SPU_NONE;
2261}
2262
2263int
2264logical_immediate_p (rtx op, enum machine_mode mode)
2265{
2266 HOST_WIDE_INT val;
2267 unsigned char arr[16];
2268 int i, j;
2269
2270 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2271 || GET_CODE (op) == CONST_VECTOR);
2272
2273 if (GET_MODE (op) != VOIDmode)
2274 mode = GET_MODE (op);
2275
2276 constant_to_array (mode, op, arr);
2277
2278 /* Check that bytes are repeated. */
2279 for (i = 4; i < 16; i += 4)
2280 for (j = 0; j < 4; j++)
2281 if (arr[j] != arr[i + j])
2282 return 0;
2283
2284 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2285 val = trunc_int_for_mode (val, SImode);
2286
2287 i = which_logical_immediate (val);
2288 return i != SPU_NONE && i != SPU_IOHL;
2289}
2290
2291int
2292iohl_immediate_p (rtx op, enum machine_mode mode)
2293{
2294 HOST_WIDE_INT val;
2295 unsigned char arr[16];
2296 int i, j;
2297
2298 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2299 || GET_CODE (op) == CONST_VECTOR);
2300
2301 if (GET_MODE (op) != VOIDmode)
2302 mode = GET_MODE (op);
2303
2304 constant_to_array (mode, op, arr);
2305
2306 /* Check that bytes are repeated. */
2307 for (i = 4; i < 16; i += 4)
2308 for (j = 0; j < 4; j++)
2309 if (arr[j] != arr[i + j])
2310 return 0;
2311
2312 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2313 val = trunc_int_for_mode (val, SImode);
2314
2315 return val >= 0 && val <= 0xffff;
2316}
2317
2318int
2319arith_immediate_p (rtx op, enum machine_mode mode,
2320 HOST_WIDE_INT low, HOST_WIDE_INT high)
2321{
2322 HOST_WIDE_INT val;
2323 unsigned char arr[16];
2324 int bytes, i, j;
2325
2326 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2327 || GET_CODE (op) == CONST_VECTOR);
2328
2329 if (GET_MODE (op) != VOIDmode)
2330 mode = GET_MODE (op);
2331
2332 constant_to_array (mode, op, arr);
2333
2334 if (VECTOR_MODE_P (mode))
2335 mode = GET_MODE_INNER (mode);
2336
2337 bytes = GET_MODE_SIZE (mode);
2338 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2339
2340 /* Check that bytes are repeated. */
2341 for (i = bytes; i < 16; i += bytes)
2342 for (j = 0; j < bytes; j++)
2343 if (arr[j] != arr[i + j])
2344 return 0;
2345
2346 val = arr[0];
2347 for (j = 1; j < bytes; j++)
2348 val = (val << 8) | arr[j];
2349
2350 val = trunc_int_for_mode (val, mode);
2351
2352 return val >= low && val <= high;
2353}
2354
2355/* We accept:
2356 - any 32 bit constant (SImode, SFmode)
2357 - any constant that can be generated with fsmbi (any mode)
2358 - a 64 bit constant where the high and low bits are identical
2359 (DImode, DFmode)
2360 - a 128 bit constant where the four 32 bit words match. */
2361int
2362spu_legitimate_constant_p (rtx x)
2363{
2364 unsigned char arr[16];
2365 int i, j;
2366
2367 if (GET_CODE (x) == HIGH
2368 || GET_CODE (x) == CONST
2369 || GET_CODE (x) == SYMBOL_REF
2370 || GET_CODE (x) == LABEL_REF)
2371 return 1;
2372
2373 if (fsmbi_const_p (x))
2374 return 1;
2375
2376 if (GET_CODE (x) == CONST_INT)
2377 return (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0x7fffffffll)
2378 || ((INTVAL (x) >> 32) & 0xffffffffll) == (INTVAL (x) & 0xffffffffll);
2379
2380 if (GET_MODE (x) == SFmode)
2381 return 1;
2382
2383 if (GET_MODE (x) == DFmode)
2384 {
2385 HOST_WIDE_INT val = const_double_to_hwint (x);
2386 return ((val >> 32) & 0xffffffffll) == (val & 0xffffffffll);
2387 }
2388
2389 /* V4SI with all identical symbols is valid. */
2390 if (GET_MODE (x) == V4SImode
2391 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2392 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2393 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST
2394 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH))
2395 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2396 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2397 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2398
2399 if (VECTOR_MODE_P (GET_MODE (x)))
2400 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2401 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2402 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2403 return 0;
2404
2405 constant_to_array (SImode, x, arr);
2406
2407 /* Check that bytes are repeated. */
2408 for (i = 4; i < 16; i += 4)
2409 for (j = 0; j < 4; j++)
2410 if (arr[j] != arr[i + j])
2411 return 0;
2412
2413 return 1;
2414}
2415
2416/* Valid address are:
2417 - symbol_ref, label_ref, const
2418 - reg
2419 - reg + const, where either reg or const is 16 byte aligned
2420 - reg + reg, alignment doesn't matter
2421 The alignment matters in the reg+const case because lqd and stqd
2422 ignore the 4 least significant bits of the const. (TODO: It might be
2423 preferable to allow any alignment and fix it up when splitting.) */
2424int
2425spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2426 rtx x, int reg_ok_strict)
2427{
2428 if (mode == TImode && GET_CODE (x) == AND
2429 && GET_CODE (XEXP (x, 1)) == CONST_INT
2430 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2431 x = XEXP (x, 0);
2432 switch (GET_CODE (x))
2433 {
2434 case SYMBOL_REF:
2435 case LABEL_REF:
2436 return !TARGET_LARGE_MEM;
2437
2438 case CONST:
c8befdb9 2439 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
644459d0 2440
2441 case CONST_INT:
2442 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2443
2444 case SUBREG:
2445 x = XEXP (x, 0);
2446 gcc_assert (GET_CODE (x) == REG);
2447
2448 case REG:
2449 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2450
2451 case PLUS:
2452 case LO_SUM:
2453 {
2454 rtx op0 = XEXP (x, 0);
2455 rtx op1 = XEXP (x, 1);
2456 if (GET_CODE (op0) == SUBREG)
2457 op0 = XEXP (op0, 0);
2458 if (GET_CODE (op1) == SUBREG)
2459 op1 = XEXP (op1, 0);
2460 /* We can't just accept any aligned register because CSE can
2461 change it to a register that is not marked aligned and then
2462 recog will fail. So we only accept frame registers because
2463 they will only be changed to other frame registers. */
2464 if (GET_CODE (op0) == REG
2465 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2466 && GET_CODE (op1) == CONST_INT
2467 && INTVAL (op1) >= -0x2000
2468 && INTVAL (op1) <= 0x1fff
2469 && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2470 return 1;
2471 if (GET_CODE (op0) == REG
2472 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2473 && GET_CODE (op1) == REG
2474 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2475 return 1;
2476 }
2477 break;
2478
2479 default:
2480 break;
2481 }
2482 return 0;
2483}
2484
2485/* When the address is reg + const_int, force the const_int into a
fa7637bd 2486 register. */
644459d0 2487rtx
2488spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2489 enum machine_mode mode)
2490{
2491 rtx op0, op1;
2492 /* Make sure both operands are registers. */
2493 if (GET_CODE (x) == PLUS)
2494 {
2495 op0 = XEXP (x, 0);
2496 op1 = XEXP (x, 1);
2497 if (ALIGNED_SYMBOL_REF_P (op0))
2498 {
2499 op0 = force_reg (Pmode, op0);
2500 mark_reg_pointer (op0, 128);
2501 }
2502 else if (GET_CODE (op0) != REG)
2503 op0 = force_reg (Pmode, op0);
2504 if (ALIGNED_SYMBOL_REF_P (op1))
2505 {
2506 op1 = force_reg (Pmode, op1);
2507 mark_reg_pointer (op1, 128);
2508 }
2509 else if (GET_CODE (op1) != REG)
2510 op1 = force_reg (Pmode, op1);
2511 x = gen_rtx_PLUS (Pmode, op0, op1);
2512 if (spu_legitimate_address (mode, x, 0))
2513 return x;
2514 }
2515 return NULL_RTX;
2516}
2517
2518/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2519 struct attribute_spec.handler. */
2520static tree
2521spu_handle_fndecl_attribute (tree * node,
2522 tree name,
2523 tree args ATTRIBUTE_UNUSED,
2524 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2525{
2526 if (TREE_CODE (*node) != FUNCTION_DECL)
2527 {
2528 warning (0, "`%s' attribute only applies to functions",
2529 IDENTIFIER_POINTER (name));
2530 *no_add_attrs = true;
2531 }
2532
2533 return NULL_TREE;
2534}
2535
2536/* Handle the "vector" attribute. */
2537static tree
2538spu_handle_vector_attribute (tree * node, tree name,
2539 tree args ATTRIBUTE_UNUSED,
2540 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2541{
2542 tree type = *node, result = NULL_TREE;
2543 enum machine_mode mode;
2544 int unsigned_p;
2545
2546 while (POINTER_TYPE_P (type)
2547 || TREE_CODE (type) == FUNCTION_TYPE
2548 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2549 type = TREE_TYPE (type);
2550
2551 mode = TYPE_MODE (type);
2552
2553 unsigned_p = TYPE_UNSIGNED (type);
2554 switch (mode)
2555 {
2556 case DImode:
2557 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2558 break;
2559 case SImode:
2560 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2561 break;
2562 case HImode:
2563 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2564 break;
2565 case QImode:
2566 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2567 break;
2568 case SFmode:
2569 result = V4SF_type_node;
2570 break;
2571 case DFmode:
2572 result = V2DF_type_node;
2573 break;
2574 default:
2575 break;
2576 }
2577
2578 /* Propagate qualifiers attached to the element type
2579 onto the vector type. */
2580 if (result && result != type && TYPE_QUALS (type))
2581 result = build_qualified_type (result, TYPE_QUALS (type));
2582
2583 *no_add_attrs = true; /* No need to hang on to the attribute. */
2584
2585 if (!result)
2586 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2587 else
2588 *node = reconstruct_complex_type (*node, result);
2589
2590 return NULL_TREE;
2591}
2592
2593/* Return non-zero if FUNC is a naked function. */
2594static int
2595spu_naked_function_p (tree func)
2596{
2597 tree a;
2598
2599 if (TREE_CODE (func) != FUNCTION_DECL)
2600 abort ();
2601
2602 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2603 return a != NULL_TREE;
2604}
2605
2606int
2607spu_initial_elimination_offset (int from, int to)
2608{
2609 int saved_regs_size = spu_saved_regs_size ();
2610 int sp_offset = 0;
2611 if (!current_function_is_leaf || current_function_outgoing_args_size
2612 || get_frame_size () || saved_regs_size)
2613 sp_offset = STACK_POINTER_OFFSET;
2614 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2615 return (sp_offset + current_function_outgoing_args_size);
2616 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2617 return 0;
2618 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2619 return sp_offset + current_function_outgoing_args_size
2620 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2621 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2622 return get_frame_size () + saved_regs_size + sp_offset;
2623 return 0;
2624}
2625
2626rtx
2627spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2628{
2629 enum machine_mode mode = TYPE_MODE (type);
2630 int byte_size = ((mode == BLKmode)
2631 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2632
2633 /* Make sure small structs are left justified in a register. */
2634 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2635 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2636 {
2637 enum machine_mode smode;
2638 rtvec v;
2639 int i;
2640 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2641 int n = byte_size / UNITS_PER_WORD;
2642 v = rtvec_alloc (nregs);
2643 for (i = 0; i < n; i++)
2644 {
2645 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2646 gen_rtx_REG (TImode,
2647 FIRST_RETURN_REGNUM
2648 + i),
2649 GEN_INT (UNITS_PER_WORD * i));
2650 byte_size -= UNITS_PER_WORD;
2651 }
2652
2653 if (n < nregs)
2654 {
2655 if (byte_size < 4)
2656 byte_size = 4;
2657 smode =
2658 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2659 RTVEC_ELT (v, n) =
2660 gen_rtx_EXPR_LIST (VOIDmode,
2661 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2662 GEN_INT (UNITS_PER_WORD * n));
2663 }
2664 return gen_rtx_PARALLEL (mode, v);
2665 }
2666 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2667}
2668
2669rtx
2670spu_function_arg (CUMULATIVE_ARGS cum,
2671 enum machine_mode mode,
2672 tree type, int named ATTRIBUTE_UNUSED)
2673{
2674 int byte_size;
2675
2676 if (cum >= MAX_REGISTER_ARGS)
2677 return 0;
2678
2679 byte_size = ((mode == BLKmode)
2680 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2681
2682 /* The ABI does not allow parameters to be passed partially in
2683 reg and partially in stack. */
2684 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
2685 return 0;
2686
2687 /* Make sure small structs are left justified in a register. */
2688 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2689 && byte_size < UNITS_PER_WORD && byte_size > 0)
2690 {
2691 enum machine_mode smode;
2692 rtx gr_reg;
2693 if (byte_size < 4)
2694 byte_size = 4;
2695 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2696 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2697 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
2698 const0_rtx);
2699 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
2700 }
2701 else
2702 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
2703}
2704
2705/* Variable sized types are passed by reference. */
2706static bool
2707spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
2708 enum machine_mode mode ATTRIBUTE_UNUSED,
2709 tree type, bool named ATTRIBUTE_UNUSED)
2710{
2711 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2712}
2713\f
2714
2715/* Var args. */
2716
2717/* Create and return the va_list datatype.
2718
2719 On SPU, va_list is an array type equivalent to
2720
2721 typedef struct __va_list_tag
2722 {
2723 void *__args __attribute__((__aligned(16)));
2724 void *__skip __attribute__((__aligned(16)));
2725
2726 } va_list[1];
2727
fa7637bd 2728 where __args points to the arg that will be returned by the next
644459d0 2729 va_arg(), and __skip points to the previous stack frame such that
2730 when __args == __skip we should advance __args by 32 bytes. */
2731static tree
2732spu_build_builtin_va_list (void)
2733{
2734 tree f_args, f_skip, record, type_decl;
2735 bool owp;
2736
2737 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2738
2739 type_decl =
2740 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2741
2742 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
2743 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
2744
2745 DECL_FIELD_CONTEXT (f_args) = record;
2746 DECL_ALIGN (f_args) = 128;
2747 DECL_USER_ALIGN (f_args) = 1;
2748
2749 DECL_FIELD_CONTEXT (f_skip) = record;
2750 DECL_ALIGN (f_skip) = 128;
2751 DECL_USER_ALIGN (f_skip) = 1;
2752
2753 TREE_CHAIN (record) = type_decl;
2754 TYPE_NAME (record) = type_decl;
2755 TYPE_FIELDS (record) = f_args;
2756 TREE_CHAIN (f_args) = f_skip;
2757
2758 /* We know this is being padded and we want it too. It is an internal
2759 type so hide the warnings from the user. */
2760 owp = warn_padded;
2761 warn_padded = false;
2762
2763 layout_type (record);
2764
2765 warn_padded = owp;
2766
2767 /* The correct type is an array type of one element. */
2768 return build_array_type (record, build_index_type (size_zero_node));
2769}
2770
2771/* Implement va_start by filling the va_list structure VALIST.
2772 NEXTARG points to the first anonymous stack argument.
2773
2774 The following global variables are used to initialize
2775 the va_list structure:
2776
2777 current_function_args_info;
2778 the CUMULATIVE_ARGS for this function
2779
2780 current_function_arg_offset_rtx:
2781 holds the offset of the first anonymous stack argument
2782 (relative to the virtual arg pointer). */
2783
2784void
2785spu_va_start (tree valist, rtx nextarg)
2786{
2787 tree f_args, f_skip;
2788 tree args, skip, t;
2789
2790 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2791 f_skip = TREE_CHAIN (f_args);
2792
2793 valist = build_va_arg_indirect_ref (valist);
2794 args =
2795 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2796 skip =
2797 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2798
2799 /* Find the __args area. */
2800 t = make_tree (TREE_TYPE (args), nextarg);
2801 if (current_function_pretend_args_size > 0)
2802 t = build2 (PLUS_EXPR, TREE_TYPE (args), t,
2803 build_int_cst (integer_type_node, -STACK_POINTER_OFFSET));
35cc02b5 2804 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
644459d0 2805 TREE_SIDE_EFFECTS (t) = 1;
2806 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2807
2808 /* Find the __skip area. */
2809 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2810 t = build2 (PLUS_EXPR, TREE_TYPE (skip), t,
2811 build_int_cst (integer_type_node,
2812 (current_function_pretend_args_size
2813 - STACK_POINTER_OFFSET)));
35cc02b5 2814 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
644459d0 2815 TREE_SIDE_EFFECTS (t) = 1;
2816 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2817}
2818
2819/* Gimplify va_arg by updating the va_list structure
2820 VALIST as required to retrieve an argument of type
2821 TYPE, and returning that argument.
2822
2823 ret = va_arg(VALIST, TYPE);
2824
2825 generates code equivalent to:
2826
2827 paddedsize = (sizeof(TYPE) + 15) & -16;
2828 if (VALIST.__args + paddedsize > VALIST.__skip
2829 && VALIST.__args <= VALIST.__skip)
2830 addr = VALIST.__skip + 32;
2831 else
2832 addr = VALIST.__args;
2833 VALIST.__args = addr + paddedsize;
2834 ret = *(TYPE *)addr;
2835 */
2836static tree
2837spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
2838 tree * post_p ATTRIBUTE_UNUSED)
2839{
2840 tree f_args, f_skip;
2841 tree args, skip;
2842 HOST_WIDE_INT size, rsize;
2843 tree paddedsize, addr, tmp;
2844 bool pass_by_reference_p;
2845
2846 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2847 f_skip = TREE_CHAIN (f_args);
2848
2849 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2850 args =
2851 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2852 skip =
2853 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2854
2855 addr = create_tmp_var (ptr_type_node, "va_arg");
2856 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
2857
2858 /* if an object is dynamically sized, a pointer to it is passed
2859 instead of the object itself. */
2860 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
2861 false);
2862 if (pass_by_reference_p)
2863 type = build_pointer_type (type);
2864 size = int_size_in_bytes (type);
2865 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
2866
2867 /* build conditional expression to calculate addr. The expression
2868 will be gimplified later. */
2869 paddedsize = fold_convert (ptr_type_node, size_int (rsize));
2870 tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize);
2871 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
2872 build2 (GT_EXPR, boolean_type_node, tmp, skip),
2873 build2 (LE_EXPR, boolean_type_node, args, skip));
2874
2875 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2876 build2 (PLUS_EXPR, ptr_type_node, skip,
2877 fold_convert (ptr_type_node, size_int (32))), args);
2878
35cc02b5 2879 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
644459d0 2880 gimplify_and_add (tmp, pre_p);
2881
2882 /* update VALIST.__args */
2883 tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize);
35cc02b5 2884 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
644459d0 2885 gimplify_and_add (tmp, pre_p);
2886
2887 addr = fold_convert (build_pointer_type (type), addr);
2888
2889 if (pass_by_reference_p)
2890 addr = build_va_arg_indirect_ref (addr);
2891
2892 return build_va_arg_indirect_ref (addr);
2893}
2894
2895/* Save parameter registers starting with the register that corresponds
2896 to the first unnamed parameters. If the first unnamed parameter is
2897 in the stack then save no registers. Set pretend_args_size to the
2898 amount of space needed to save the registers. */
2899void
2900spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
2901 tree type, int *pretend_size, int no_rtl)
2902{
2903 if (!no_rtl)
2904 {
2905 rtx tmp;
2906 int regno;
2907 int offset;
2908 int ncum = *cum;
2909
2910 /* cum currently points to the last named argument, we want to
2911 start at the next argument. */
2912 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
2913
2914 offset = -STACK_POINTER_OFFSET;
2915 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
2916 {
2917 tmp = gen_frame_mem (V4SImode,
2918 plus_constant (virtual_incoming_args_rtx,
2919 offset));
2920 emit_move_insn (tmp,
2921 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
2922 offset += 16;
2923 }
2924 *pretend_size = offset + STACK_POINTER_OFFSET;
2925 }
2926}
2927\f
2928void
2929spu_conditional_register_usage (void)
2930{
2931 if (flag_pic)
2932 {
2933 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2934 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2935 }
2936 global_regs[INTR_REGNUM] = 1;
2937}
2938
2939/* This is called to decide when we can simplify a load instruction. We
2940 must only return true for registers which we know will always be
2941 aligned. Taking into account that CSE might replace this reg with
2942 another one that has not been marked aligned.
2943 So this is really only true for frame, stack and virtual registers,
fa7637bd 2944 which we know are always aligned and should not be adversely effected
2945 by CSE. */
644459d0 2946static int
2947regno_aligned_for_load (int regno)
2948{
2949 return regno == FRAME_POINTER_REGNUM
2950 || regno == HARD_FRAME_POINTER_REGNUM
2951 || regno == STACK_POINTER_REGNUM
2952 || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER);
2953}
2954
2955/* Return TRUE when mem is known to be 16-byte aligned. */
2956int
2957aligned_mem_p (rtx mem)
2958{
2959 if (MEM_ALIGN (mem) >= 128)
2960 return 1;
2961 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
2962 return 1;
2963 if (GET_CODE (XEXP (mem, 0)) == PLUS)
2964 {
2965 rtx p0 = XEXP (XEXP (mem, 0), 0);
2966 rtx p1 = XEXP (XEXP (mem, 0), 1);
2967 if (regno_aligned_for_load (REGNO (p0)))
2968 {
2969 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
2970 return 1;
2971 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2972 return 1;
2973 }
2974 }
2975 else if (GET_CODE (XEXP (mem, 0)) == REG)
2976 {
2977 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
2978 return 1;
2979 }
2980 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
2981 return 1;
2982 else if (GET_CODE (XEXP (mem, 0)) == CONST)
2983 {
2984 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
2985 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
2986 if (GET_CODE (p0) == SYMBOL_REF
2987 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2988 return 1;
2989 }
2990 return 0;
2991}
2992
2993/* Return TRUE if we are certain the mem refers to a complete object
2994 which is both 16-byte aligned and padded to a 16-byte boundary. This
2995 would make it safe to store with a single instruction.
2996 We guarantee the alignment and padding for static objects by aligning
2997 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
2998 FIXME: We currently cannot guarantee this for objects on the stack
2999 because assign_parm_setup_stack calls assign_stack_local with the
3000 alignment of the parameter mode and in that case the alignment never
3001 gets adjusted by LOCAL_ALIGNMENT. */
3002static int
3003store_with_one_insn_p (rtx mem)
3004{
3005 rtx addr = XEXP (mem, 0);
3006 if (GET_MODE (mem) == BLKmode)
3007 return 0;
3008 /* Only static objects. */
3009 if (GET_CODE (addr) == SYMBOL_REF)
3010 {
3011 /* We use the associated declaration to make sure the access is
fa7637bd 3012 referring to the whole object.
644459d0 3013 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3014 if it is necessary. Will there be cases where one exists, and
3015 the other does not? Will there be cases where both exist, but
3016 have different types? */
3017 tree decl = MEM_EXPR (mem);
3018 if (decl
3019 && TREE_CODE (decl) == VAR_DECL
3020 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3021 return 1;
3022 decl = SYMBOL_REF_DECL (addr);
3023 if (decl
3024 && TREE_CODE (decl) == VAR_DECL
3025 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3026 return 1;
3027 }
3028 return 0;
3029}
3030
3031int
3032spu_expand_mov (rtx * ops, enum machine_mode mode)
3033{
3034 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3035 abort ();
3036
3037 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3038 {
3039 rtx from = SUBREG_REG (ops[1]);
3040 enum machine_mode imode = GET_MODE (from);
3041
3042 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3043 && GET_MODE_CLASS (imode) == MODE_INT
3044 && subreg_lowpart_p (ops[1]));
3045
3046 if (GET_MODE_SIZE (imode) < 4)
3047 {
3048 from = gen_rtx_SUBREG (SImode, from, 0);
3049 imode = SImode;
3050 }
3051
3052 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3053 {
3054 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3055 emit_insn (GEN_FCN (icode) (ops[0], from));
3056 }
3057 else
3058 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3059 return 1;
3060 }
3061
3062 /* At least one of the operands needs to be a register. */
3063 if ((reload_in_progress | reload_completed) == 0
3064 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3065 {
3066 rtx temp = force_reg (mode, ops[1]);
3067 emit_move_insn (ops[0], temp);
3068 return 1;
3069 }
3070 if (reload_in_progress || reload_completed)
3071 {
3072 enum machine_mode mode = GET_MODE (ops[0]);
3073 if (GET_CODE (ops[1]) == CONST_INT
3074 && (mode == DImode || mode == TImode)
3075 && ((INTVAL (ops[1]) >> 32) & 0xffffffffll) !=
3076 (INTVAL (ops[1]) & 0xffffffffll))
3077 {
3078 rtx mem = force_const_mem (mode, ops[1]);
3079 if (TARGET_LARGE_MEM)
3080 {
3081 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
3082 emit_move_insn (addr, XEXP (mem, 0));
3083 mem = replace_equiv_address (mem, addr);
3084 }
3085 emit_move_insn (ops[0], mem);
3086 return 1;
3087 }
3088 else if ((GET_CODE (ops[1]) == CONST_INT
3089 || GET_CODE (ops[1]) == CONST_DOUBLE
3090 || GET_CODE (ops[1]) == CONST_VECTOR)
3091 && !immediate_load_p (ops[1], mode)
3092 && !fsmbi_const_p (ops[1]))
3093 {
3094 unsigned char arrlo[16];
3095 unsigned char arrhi[16];
3096 rtx to = ops[0], hi, lo;
3097 int i;
3098 constant_to_array (mode, ops[1], arrhi);
3099 for (i = 0; i < 16; i += 4)
3100 {
3101 arrlo[i + 2] = arrhi[i + 2];
3102 arrlo[i + 3] = arrhi[i + 3];
3103 arrlo[i + 0] = arrlo[i + 1] = 0;
3104 arrhi[i + 2] = arrhi[i + 3] = 0;
3105 }
3106 if (mode == SFmode)
3107 {
3108 to = spu_gen_subreg (SImode, ops[0]);
3109 mode = SImode;
3110 }
3111 else if (mode == V4SFmode)
3112 {
3113 to = spu_gen_subreg (V4SImode, ops[0]);
3114 mode = V4SImode;
3115 }
3116 hi = array_to_constant (mode, arrhi);
3117 lo = array_to_constant (mode, arrlo);
3118 emit_move_insn (to, hi);
3119 emit_insn (gen_rtx_SET (VOIDmode, to, gen_rtx_IOR (mode, to, lo)));
3120 return 1;
3121 }
644459d0 3122 return 0;
3123 }
3124 else
3125 {
3126 if (GET_CODE (ops[0]) == MEM)
3127 {
3128 if (!spu_valid_move (ops))
3129 {
3130 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3131 gen_reg_rtx (TImode)));
3132 return 1;
3133 }
3134 }
3135 else if (GET_CODE (ops[1]) == MEM)
3136 {
3137 if (!spu_valid_move (ops))
3138 {
3139 emit_insn (gen_load
3140 (ops[0], ops[1], gen_reg_rtx (TImode),
3141 gen_reg_rtx (SImode)));
3142 return 1;
3143 }
3144 }
3145 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3146 extend them. */
3147 if (GET_CODE (ops[1]) == CONST_INT)
3148 {
3149 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3150 if (val != INTVAL (ops[1]))
3151 {
3152 emit_move_insn (ops[0], GEN_INT (val));
3153 return 1;
3154 }
3155 }
3156 }
3157 return 0;
3158}
3159
3160static int
3161reg_align (rtx reg)
3162{
3163 /* For now, only frame registers are known to be aligned at all times.
3164 We can't trust REGNO_POINTER_ALIGN because optimization will move
3165 registers around, potentially changing an "aligned" register in an
3166 address to an unaligned register, which would result in an invalid
3167 address. */
3168 int regno = REGNO (reg);
3169 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3170}
3171
3172void
3173spu_split_load (rtx * ops)
3174{
3175 enum machine_mode mode = GET_MODE (ops[0]);
3176 rtx addr, load, rot, mem, p0, p1;
3177 int rot_amt;
3178
3179 addr = XEXP (ops[1], 0);
3180
3181 rot = 0;
3182 rot_amt = 0;
3183 if (GET_CODE (addr) == PLUS)
3184 {
3185 /* 8 cases:
3186 aligned reg + aligned reg => lqx
3187 aligned reg + unaligned reg => lqx, rotqby
3188 aligned reg + aligned const => lqd
3189 aligned reg + unaligned const => lqd, rotqbyi
3190 unaligned reg + aligned reg => lqx, rotqby
3191 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3192 unaligned reg + aligned const => lqd, rotqby
3193 unaligned reg + unaligned const -> not allowed by legitimate address
3194 */
3195 p0 = XEXP (addr, 0);
3196 p1 = XEXP (addr, 1);
3197 if (reg_align (p0) < 128)
3198 {
3199 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3200 {
3201 emit_insn (gen_addsi3 (ops[3], p0, p1));
3202 rot = ops[3];
3203 }
3204 else
3205 rot = p0;
3206 }
3207 else
3208 {
3209 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3210 {
3211 rot_amt = INTVAL (p1) & 15;
3212 p1 = GEN_INT (INTVAL (p1) & -16);
3213 addr = gen_rtx_PLUS (SImode, p0, p1);
3214 }
3215 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3216 rot = p1;
3217 }
3218 }
3219 else if (GET_CODE (addr) == REG)
3220 {
3221 if (reg_align (addr) < 128)
3222 rot = addr;
3223 }
3224 else if (GET_CODE (addr) == CONST)
3225 {
3226 if (GET_CODE (XEXP (addr, 0)) == PLUS
3227 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3228 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3229 {
3230 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3231 if (rot_amt & -16)
3232 addr = gen_rtx_CONST (Pmode,
3233 gen_rtx_PLUS (Pmode,
3234 XEXP (XEXP (addr, 0), 0),
3235 GEN_INT (rot_amt & -16)));
3236 else
3237 addr = XEXP (XEXP (addr, 0), 0);
3238 }
3239 else
3240 rot = addr;
3241 }
3242 else if (GET_CODE (addr) == CONST_INT)
3243 {
3244 rot_amt = INTVAL (addr);
3245 addr = GEN_INT (rot_amt & -16);
3246 }
3247 else if (!ALIGNED_SYMBOL_REF_P (addr))
3248 rot = addr;
3249
3250 if (GET_MODE_SIZE (mode) < 4)
3251 rot_amt += GET_MODE_SIZE (mode) - 4;
3252
3253 rot_amt &= 15;
3254
3255 if (rot && rot_amt)
3256 {
3257 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3258 rot = ops[3];
3259 rot_amt = 0;
3260 }
3261
3262 load = ops[2];
3263
3264 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3265 mem = change_address (ops[1], TImode, addr);
3266
e04cf423 3267 emit_insn (gen_movti (load, mem));
644459d0 3268
3269 if (rot)
3270 emit_insn (gen_rotqby_ti (load, load, rot));
3271 else if (rot_amt)
3272 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3273
3274 if (reload_completed)
3275 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3276 else
3277 emit_insn (gen_spu_convert (ops[0], load));
3278}
3279
3280void
3281spu_split_store (rtx * ops)
3282{
3283 enum machine_mode mode = GET_MODE (ops[0]);
3284 rtx pat = ops[2];
3285 rtx reg = ops[3];
3286 rtx addr, p0, p1, p1_lo, smem;
3287 int aform;
3288 int scalar;
3289
3290 addr = XEXP (ops[0], 0);
3291
3292 if (GET_CODE (addr) == PLUS)
3293 {
3294 /* 8 cases:
3295 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3296 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3297 aligned reg + aligned const => lqd, c?d, shuf, stqx
3298 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3299 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3300 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3301 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3302 unaligned reg + unaligned const -> not allowed by legitimate address
3303 */
3304 aform = 0;
3305 p0 = XEXP (addr, 0);
3306 p1 = p1_lo = XEXP (addr, 1);
3307 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3308 {
3309 p1_lo = GEN_INT (INTVAL (p1) & 15);
3310 p1 = GEN_INT (INTVAL (p1) & -16);
3311 addr = gen_rtx_PLUS (SImode, p0, p1);
3312 }
3313 }
3314 else if (GET_CODE (addr) == REG)
3315 {
3316 aform = 0;
3317 p0 = addr;
3318 p1 = p1_lo = const0_rtx;
3319 }
3320 else
3321 {
3322 aform = 1;
3323 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3324 p1 = 0; /* aform doesn't use p1 */
3325 p1_lo = addr;
3326 if (ALIGNED_SYMBOL_REF_P (addr))
3327 p1_lo = const0_rtx;
3328 else if (GET_CODE (addr) == CONST)
3329 {
3330 if (GET_CODE (XEXP (addr, 0)) == PLUS
3331 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3332 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3333 {
3334 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3335 if ((v & -16) != 0)
3336 addr = gen_rtx_CONST (Pmode,
3337 gen_rtx_PLUS (Pmode,
3338 XEXP (XEXP (addr, 0), 0),
3339 GEN_INT (v & -16)));
3340 else
3341 addr = XEXP (XEXP (addr, 0), 0);
3342 p1_lo = GEN_INT (v & 15);
3343 }
3344 }
3345 else if (GET_CODE (addr) == CONST_INT)
3346 {
3347 p1_lo = GEN_INT (INTVAL (addr) & 15);
3348 addr = GEN_INT (INTVAL (addr) & -16);
3349 }
3350 }
3351
e04cf423 3352 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3353
644459d0 3354 scalar = store_with_one_insn_p (ops[0]);
3355 if (!scalar)
3356 {
3357 /* We could copy the flags from the ops[0] MEM to mem here,
3358 We don't because we want this load to be optimized away if
3359 possible, and copying the flags will prevent that in certain
3360 cases, e.g. consider the volatile flag. */
3361
e04cf423 3362 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3363 set_mem_alias_set (lmem, 0);
3364 emit_insn (gen_movti (reg, lmem));
644459d0 3365
3366 if (!p0 || reg_align (p0) >= 128)
3367 p0 = stack_pointer_rtx;
3368 if (!p1_lo)
3369 p1_lo = const0_rtx;
3370
3371 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3372 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3373 }
3374 else if (reload_completed)
3375 {
3376 if (GET_CODE (ops[1]) == REG)
3377 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3378 else if (GET_CODE (ops[1]) == SUBREG)
3379 emit_move_insn (reg,
3380 gen_rtx_REG (GET_MODE (reg),
3381 REGNO (SUBREG_REG (ops[1]))));
3382 else
3383 abort ();
3384 }
3385 else
3386 {
3387 if (GET_CODE (ops[1]) == REG)
3388 emit_insn (gen_spu_convert (reg, ops[1]));
3389 else if (GET_CODE (ops[1]) == SUBREG)
3390 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3391 else
3392 abort ();
3393 }
3394
3395 if (GET_MODE_SIZE (mode) < 4 && scalar)
3396 emit_insn (gen_shlqby_ti
3397 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3398
644459d0 3399 smem = change_address (ops[0], TImode, addr);
3400 /* We can't use the previous alias set because the memory has changed
3401 size and can potentially overlap objects of other types. */
3402 set_mem_alias_set (smem, 0);
3403
e04cf423 3404 emit_insn (gen_movti (smem, reg));
644459d0 3405}
3406
3407/* Return TRUE if X is MEM which is a struct member reference
3408 and the member can safely be loaded and stored with a single
3409 instruction because it is padded. */
3410static int
3411mem_is_padded_component_ref (rtx x)
3412{
3413 tree t = MEM_EXPR (x);
3414 tree r;
3415 if (!t || TREE_CODE (t) != COMPONENT_REF)
3416 return 0;
3417 t = TREE_OPERAND (t, 1);
3418 if (!t || TREE_CODE (t) != FIELD_DECL
3419 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3420 return 0;
3421 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3422 r = DECL_FIELD_CONTEXT (t);
3423 if (!r || TREE_CODE (r) != RECORD_TYPE)
3424 return 0;
3425 /* Make sure they are the same mode */
3426 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3427 return 0;
3428 /* If there are no following fields then the field alignment assures
fa7637bd 3429 the structure is padded to the alignment which means this field is
3430 padded too. */
644459d0 3431 if (TREE_CHAIN (t) == 0)
3432 return 1;
3433 /* If the following field is also aligned then this field will be
3434 padded. */
3435 t = TREE_CHAIN (t);
3436 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3437 return 1;
3438 return 0;
3439}
3440
3441int
3442spu_valid_move (rtx * ops)
3443{
3444 enum machine_mode mode = GET_MODE (ops[0]);
3445 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3446 return 0;
3447
3448 /* init_expr_once tries to recog against load and store insns to set
3449 the direct_load[] and direct_store[] arrays. We always want to
3450 consider those loads and stores valid. init_expr_once is called in
3451 the context of a dummy function which does not have a decl. */
3452 if (cfun->decl == 0)
3453 return 1;
3454
3455 /* Don't allows loads/stores which would require more than 1 insn.
3456 During and after reload we assume loads and stores only take 1
3457 insn. */
3458 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3459 {
3460 if (GET_CODE (ops[0]) == MEM
3461 && (GET_MODE_SIZE (mode) < 4
3462 || !(store_with_one_insn_p (ops[0])
3463 || mem_is_padded_component_ref (ops[0]))))
3464 return 0;
3465 if (GET_CODE (ops[1]) == MEM
3466 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3467 return 0;
3468 }
3469 return 1;
3470}
3471
3472/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3473 can be generated using the fsmbi instruction. */
3474int
3475fsmbi_const_p (rtx x)
3476{
3477 enum machine_mode mode;
3478 unsigned char arr[16];
3479 int i;
3480
3481 /* We can always choose DImode for CONST_INT because the high bits of
3482 an SImode will always be all 1s, i.e., valid for fsmbi. */
3483 mode = GET_CODE (x) == CONST_INT ? DImode : GET_MODE (x);
3484 constant_to_array (mode, x, arr);
3485
3486 for (i = 0; i < 16; i++)
3487 if (arr[i] != 0 && arr[i] != 0xff)
3488 return 0;
3489 return 1;
3490}
3491
3492/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3493 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3494 than 16 bytes, the value is repeated across the rest of the array. */
3495void
3496constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3497{
3498 HOST_WIDE_INT val;
3499 int i, j, first;
3500
3501 memset (arr, 0, 16);
3502 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3503 if (GET_CODE (x) == CONST_INT
3504 || (GET_CODE (x) == CONST_DOUBLE
3505 && (mode == SFmode || mode == DFmode)))
3506 {
3507 gcc_assert (mode != VOIDmode && mode != BLKmode);
3508
3509 if (GET_CODE (x) == CONST_DOUBLE)
3510 val = const_double_to_hwint (x);
3511 else
3512 val = INTVAL (x);
3513 first = GET_MODE_SIZE (mode) - 1;
3514 for (i = first; i >= 0; i--)
3515 {
3516 arr[i] = val & 0xff;
3517 val >>= 8;
3518 }
3519 /* Splat the constant across the whole array. */
3520 for (j = 0, i = first + 1; i < 16; i++)
3521 {
3522 arr[i] = arr[j];
3523 j = (j == first) ? 0 : j + 1;
3524 }
3525 }
3526 else if (GET_CODE (x) == CONST_DOUBLE)
3527 {
3528 val = CONST_DOUBLE_LOW (x);
3529 for (i = 15; i >= 8; i--)
3530 {
3531 arr[i] = val & 0xff;
3532 val >>= 8;
3533 }
3534 val = CONST_DOUBLE_HIGH (x);
3535 for (i = 7; i >= 0; i--)
3536 {
3537 arr[i] = val & 0xff;
3538 val >>= 8;
3539 }
3540 }
3541 else if (GET_CODE (x) == CONST_VECTOR)
3542 {
3543 int units;
3544 rtx elt;
3545 mode = GET_MODE_INNER (mode);
3546 units = CONST_VECTOR_NUNITS (x);
3547 for (i = 0; i < units; i++)
3548 {
3549 elt = CONST_VECTOR_ELT (x, i);
3550 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3551 {
3552 if (GET_CODE (elt) == CONST_DOUBLE)
3553 val = const_double_to_hwint (elt);
3554 else
3555 val = INTVAL (elt);
3556 first = GET_MODE_SIZE (mode) - 1;
3557 if (first + i * GET_MODE_SIZE (mode) > 16)
3558 abort ();
3559 for (j = first; j >= 0; j--)
3560 {
3561 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3562 val >>= 8;
3563 }
3564 }
3565 }
3566 }
3567 else
3568 gcc_unreachable();
3569}
3570
3571/* Convert a 16 byte array to a constant of mode MODE. When MODE is
3572 smaller than 16 bytes, use the bytes that would represent that value
3573 in a register, e.g., for QImode return the value of arr[3]. */
3574rtx
3575array_to_constant (enum machine_mode mode, unsigned char arr[16])
3576{
3577 enum machine_mode inner_mode;
3578 rtvec v;
3579 int units, size, i, j, k;
3580 HOST_WIDE_INT val;
3581
3582 if (GET_MODE_CLASS (mode) == MODE_INT
3583 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3584 {
3585 j = GET_MODE_SIZE (mode);
3586 i = j < 4 ? 4 - j : 0;
3587 for (val = 0; i < j; i++)
3588 val = (val << 8) | arr[i];
3589 val = trunc_int_for_mode (val, mode);
3590 return GEN_INT (val);
3591 }
3592
3593 if (mode == TImode)
3594 {
3595 HOST_WIDE_INT high;
3596 for (i = high = 0; i < 8; i++)
3597 high = (high << 8) | arr[i];
3598 for (i = 8, val = 0; i < 16; i++)
3599 val = (val << 8) | arr[i];
3600 return immed_double_const (val, high, TImode);
3601 }
3602 if (mode == SFmode)
3603 {
3604 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3605 val = trunc_int_for_mode (val, SImode);
171b6d22 3606 return hwint_to_const_double (SFmode, val);
644459d0 3607 }
3608 if (mode == DFmode)
3609 {
3610 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3611 val <<= 32;
3612 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
171b6d22 3613 return hwint_to_const_double (DFmode, val);
644459d0 3614 }
3615
3616 if (!VECTOR_MODE_P (mode))
3617 abort ();
3618
3619 units = GET_MODE_NUNITS (mode);
3620 size = GET_MODE_UNIT_SIZE (mode);
3621 inner_mode = GET_MODE_INNER (mode);
3622 v = rtvec_alloc (units);
3623
3624 for (k = i = 0; i < units; ++i)
3625 {
3626 val = 0;
3627 for (j = 0; j < size; j++, k++)
3628 val = (val << 8) | arr[k];
3629
3630 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
3631 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
3632 else
3633 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
3634 }
3635 if (k > 16)
3636 abort ();
3637
3638 return gen_rtx_CONST_VECTOR (mode, v);
3639}
3640
3641static void
3642reloc_diagnostic (rtx x)
3643{
3644 tree loc_decl, decl = 0;
3645 const char *msg;
3646 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
3647 return;
3648
3649 if (GET_CODE (x) == SYMBOL_REF)
3650 decl = SYMBOL_REF_DECL (x);
3651 else if (GET_CODE (x) == CONST
3652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3653 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
3654
3655 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3656 if (decl && !DECL_P (decl))
3657 decl = 0;
3658
3659 /* We use last_assemble_variable_decl to get line information. It's
3660 not always going to be right and might not even be close, but will
3661 be right for the more common cases. */
3662 if (!last_assemble_variable_decl)
3663 loc_decl = decl;
3664 else
3665 loc_decl = last_assemble_variable_decl;
3666
3667 /* The decl could be a string constant. */
3668 if (decl && DECL_P (decl))
3669 msg = "%Jcreating run-time relocation for %qD";
3670 else
3671 msg = "creating run-time relocation";
3672
99369027 3673 if (TARGET_WARN_RELOC)
644459d0 3674 warning (0, msg, loc_decl, decl);
99369027 3675 else
3676 error (msg, loc_decl, decl);
644459d0 3677}
3678
3679/* Hook into assemble_integer so we can generate an error for run-time
3680 relocations. The SPU ABI disallows them. */
3681static bool
3682spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
3683{
3684 /* By default run-time relocations aren't supported, but we allow them
3685 in case users support it in their own run-time loader. And we provide
3686 a warning for those users that don't. */
3687 if ((GET_CODE (x) == SYMBOL_REF)
3688 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
3689 reloc_diagnostic (x);
3690
3691 return default_assemble_integer (x, size, aligned_p);
3692}
3693
3694static void
3695spu_asm_globalize_label (FILE * file, const char *name)
3696{
3697 fputs ("\t.global\t", file);
3698 assemble_name (file, name);
3699 fputs ("\n", file);
3700}
3701
3702static bool
3703spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
3704{
3705 enum machine_mode mode = GET_MODE (x);
3706 int cost = COSTS_N_INSNS (2);
3707
3708 /* Folding to a CONST_VECTOR will use extra space but there might
3709 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 3710 only if it allows us to fold away multiple insns. Changing the cost
644459d0 3711 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3712 because this cost will only be compared against a single insn.
3713 if (code == CONST_VECTOR)
3714 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3715 */
3716
3717 /* Use defaults for float operations. Not accurate but good enough. */
3718 if (mode == DFmode)
3719 {
3720 *total = COSTS_N_INSNS (13);
3721 return true;
3722 }
3723 if (mode == SFmode)
3724 {
3725 *total = COSTS_N_INSNS (6);
3726 return true;
3727 }
3728 switch (code)
3729 {
3730 case CONST_INT:
3731 if (satisfies_constraint_K (x))
3732 *total = 0;
3733 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
3734 *total = COSTS_N_INSNS (1);
3735 else
3736 *total = COSTS_N_INSNS (3);
3737 return true;
3738
3739 case CONST:
3740 *total = COSTS_N_INSNS (3);
3741 return true;
3742
3743 case LABEL_REF:
3744 case SYMBOL_REF:
3745 *total = COSTS_N_INSNS (0);
3746 return true;
3747
3748 case CONST_DOUBLE:
3749 *total = COSTS_N_INSNS (5);
3750 return true;
3751
3752 case FLOAT_EXTEND:
3753 case FLOAT_TRUNCATE:
3754 case FLOAT:
3755 case UNSIGNED_FLOAT:
3756 case FIX:
3757 case UNSIGNED_FIX:
3758 *total = COSTS_N_INSNS (7);
3759 return true;
3760
3761 case PLUS:
3762 if (mode == TImode)
3763 {
3764 *total = COSTS_N_INSNS (9);
3765 return true;
3766 }
3767 break;
3768
3769 case MULT:
3770 cost =
3771 GET_CODE (XEXP (x, 0)) ==
3772 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
3773 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
3774 {
3775 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3776 {
3777 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3778 cost = COSTS_N_INSNS (14);
3779 if ((val & 0xffff) == 0)
3780 cost = COSTS_N_INSNS (9);
3781 else if (val > 0 && val < 0x10000)
3782 cost = COSTS_N_INSNS (11);
3783 }
3784 }
3785 *total = cost;
3786 return true;
3787 case DIV:
3788 case UDIV:
3789 case MOD:
3790 case UMOD:
3791 *total = COSTS_N_INSNS (20);
3792 return true;
3793 case ROTATE:
3794 case ROTATERT:
3795 case ASHIFT:
3796 case ASHIFTRT:
3797 case LSHIFTRT:
3798 *total = COSTS_N_INSNS (4);
3799 return true;
3800 case UNSPEC:
3801 if (XINT (x, 1) == UNSPEC_CONVERT)
3802 *total = COSTS_N_INSNS (0);
3803 else
3804 *total = COSTS_N_INSNS (4);
3805 return true;
3806 }
3807 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
3808 if (GET_MODE_CLASS (mode) == MODE_INT
3809 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
3810 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
3811 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
3812 *total = cost;
3813 return true;
3814}
3815
3816enum machine_mode
3817spu_eh_return_filter_mode (void)
3818{
3819 /* We would like this to be SImode, but sjlj exceptions seems to work
3820 only with word_mode. */
3821 return TImode;
3822}
3823
3824/* Decide whether we can make a sibling call to a function. DECL is the
3825 declaration of the function being targeted by the call and EXP is the
3826 CALL_EXPR representing the call. */
3827static bool
3828spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3829{
3830 return decl && !TARGET_LARGE_MEM;
3831}
3832
3833/* We need to correctly update the back chain pointer and the Available
3834 Stack Size (which is in the second slot of the sp register.) */
3835void
3836spu_allocate_stack (rtx op0, rtx op1)
3837{
3838 HOST_WIDE_INT v;
3839 rtx chain = gen_reg_rtx (V4SImode);
3840 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
3841 rtx sp = gen_reg_rtx (V4SImode);
3842 rtx splatted = gen_reg_rtx (V4SImode);
3843 rtx pat = gen_reg_rtx (TImode);
3844
3845 /* copy the back chain so we can save it back again. */
3846 emit_move_insn (chain, stack_bot);
3847
3848 op1 = force_reg (SImode, op1);
3849
3850 v = 0x1020300010203ll;
3851 emit_move_insn (pat, immed_double_const (v, v, TImode));
3852 emit_insn (gen_shufb (splatted, op1, op1, pat));
3853
3854 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
3855 emit_insn (gen_subv4si3 (sp, sp, splatted));
3856
3857 if (flag_stack_check)
3858 {
3859 rtx avail = gen_reg_rtx(SImode);
3860 rtx result = gen_reg_rtx(SImode);
3861 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
3862 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
3863 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
3864 }
3865
3866 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
3867
3868 emit_move_insn (stack_bot, chain);
3869
3870 emit_move_insn (op0, virtual_stack_dynamic_rtx);
3871}
3872
3873void
3874spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
3875{
3876 static unsigned char arr[16] =
3877 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
3878 rtx temp = gen_reg_rtx (SImode);
3879 rtx temp2 = gen_reg_rtx (SImode);
3880 rtx temp3 = gen_reg_rtx (V4SImode);
3881 rtx temp4 = gen_reg_rtx (V4SImode);
3882 rtx pat = gen_reg_rtx (TImode);
3883 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
3884
3885 /* Restore the backchain from the first word, sp from the second. */
3886 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
3887 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
3888
3889 emit_move_insn (pat, array_to_constant (TImode, arr));
3890
3891 /* Compute Available Stack Size for sp */
3892 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
3893 emit_insn (gen_shufb (temp3, temp, temp, pat));
3894
3895 /* Compute Available Stack Size for back chain */
3896 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
3897 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
3898 emit_insn (gen_addv4si3 (temp4, sp, temp4));
3899
3900 emit_insn (gen_addv4si3 (sp, sp, temp3));
3901 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
3902}
3903
3904static void
3905spu_init_libfuncs (void)
3906{
3907 set_optab_libfunc (smul_optab, DImode, "__muldi3");
3908 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
3909 set_optab_libfunc (smod_optab, DImode, "__moddi3");
3910 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
3911 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
3912 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
3913 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
3914 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
3915 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
3916 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
3917 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
3918
3919 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
3920 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
3921}
3922
3923/* Make a subreg, stripping any existing subreg. We could possibly just
3924 call simplify_subreg, but in this case we know what we want. */
3925rtx
3926spu_gen_subreg (enum machine_mode mode, rtx x)
3927{
3928 if (GET_CODE (x) == SUBREG)
3929 x = SUBREG_REG (x);
3930 if (GET_MODE (x) == mode)
3931 return x;
3932 return gen_rtx_SUBREG (mode, x, 0);
3933}
3934
3935static bool
3936spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
3937{
3938 return (TYPE_MODE (type) == BLKmode
3939 && ((type) == 0
3940 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3941 || int_size_in_bytes (type) >
3942 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
3943}
3944\f
3945/* Create the built-in types and functions */
3946
3947struct spu_builtin_description spu_builtins[] = {
3948#define DEF_BUILTIN(fcode, icode, name, type, params) \
3949 {fcode, icode, name, type, params, NULL_TREE},
3950#include "spu-builtins.def"
3951#undef DEF_BUILTIN
3952};
3953
3954static void
3955spu_init_builtins (void)
3956{
3957 struct spu_builtin_description *d;
3958 unsigned int i;
3959
3960 V16QI_type_node = build_vector_type (intQI_type_node, 16);
3961 V8HI_type_node = build_vector_type (intHI_type_node, 8);
3962 V4SI_type_node = build_vector_type (intSI_type_node, 4);
3963 V2DI_type_node = build_vector_type (intDI_type_node, 2);
3964 V4SF_type_node = build_vector_type (float_type_node, 4);
3965 V2DF_type_node = build_vector_type (double_type_node, 2);
3966
3967 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
3968 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
3969 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
3970 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
3971
3972 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
3973
3974 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
3975 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
3976 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
3977 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
3978 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
3979 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
3980 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
3981 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
3982 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
3983 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
3984 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
3985 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
3986
3987 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
3988 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
3989 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
3990 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
3991 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
3992 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
3993 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
3994 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
3995
3996 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
3997 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
3998
3999 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4000
4001 spu_builtin_types[SPU_BTI_PTR] =
4002 build_pointer_type (build_qualified_type
4003 (void_type_node,
4004 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4005
4006 /* For each builtin we build a new prototype. The tree code will make
4007 sure nodes are shared. */
4008 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4009 {
4010 tree p;
4011 char name[64]; /* build_function will make a copy. */
4012 int parm;
4013
4014 if (d->name == 0)
4015 continue;
4016
4017 /* find last parm */
4018 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4019 {
4020 }
4021
4022 p = void_list_node;
4023 while (parm > 1)
4024 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4025
4026 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4027
4028 sprintf (name, "__builtin_%s", d->name);
4029 d->fndecl =
4030 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4031 NULL, NULL_TREE);
4032 }
4033}
4034
4035int
4036spu_safe_dma (HOST_WIDE_INT channel)
4037{
4038 return (channel >= 21 && channel <= 27);
4039}
4040
4041void
4042spu_builtin_splats (rtx ops[])
4043{
4044 enum machine_mode mode = GET_MODE (ops[0]);
4045 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4046 {
4047 unsigned char arr[16];
4048 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4049 emit_move_insn (ops[0], array_to_constant (mode, arr));
4050 }
4051 else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4052 {
4053 rtvec v = rtvec_alloc (4);
4054 RTVEC_ELT (v, 0) = ops[1];
4055 RTVEC_ELT (v, 1) = ops[1];
4056 RTVEC_ELT (v, 2) = ops[1];
4057 RTVEC_ELT (v, 3) = ops[1];
4058 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4059 }
4060 else
4061 {
4062 rtx reg = gen_reg_rtx (TImode);
4063 rtx shuf;
4064 if (GET_CODE (ops[1]) != REG
4065 && GET_CODE (ops[1]) != SUBREG)
4066 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4067 switch (mode)
4068 {
4069 case V2DImode:
4070 case V2DFmode:
4071 shuf =
4072 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4073 TImode);
4074 break;
4075 case V4SImode:
4076 case V4SFmode:
4077 shuf =
4078 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4079 TImode);
4080 break;
4081 case V8HImode:
4082 shuf =
4083 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4084 TImode);
4085 break;
4086 case V16QImode:
4087 shuf =
4088 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4089 TImode);
4090 break;
4091 default:
4092 abort ();
4093 }
4094 emit_move_insn (reg, shuf);
4095 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4096 }
4097}
4098
4099void
4100spu_builtin_extract (rtx ops[])
4101{
4102 enum machine_mode mode;
4103 rtx rot, from, tmp;
4104
4105 mode = GET_MODE (ops[1]);
4106
4107 if (GET_CODE (ops[2]) == CONST_INT)
4108 {
4109 switch (mode)
4110 {
4111 case V16QImode:
4112 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4113 break;
4114 case V8HImode:
4115 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4116 break;
4117 case V4SFmode:
4118 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4119 break;
4120 case V4SImode:
4121 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4122 break;
4123 case V2DImode:
4124 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4125 break;
4126 case V2DFmode:
4127 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4128 break;
4129 default:
4130 abort ();
4131 }
4132 return;
4133 }
4134
4135 from = spu_gen_subreg (TImode, ops[1]);
4136 rot = gen_reg_rtx (TImode);
4137 tmp = gen_reg_rtx (SImode);
4138
4139 switch (mode)
4140 {
4141 case V16QImode:
4142 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4143 break;
4144 case V8HImode:
4145 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4146 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4147 break;
4148 case V4SFmode:
4149 case V4SImode:
4150 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4151 break;
4152 case V2DImode:
4153 case V2DFmode:
4154 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4155 break;
4156 default:
4157 abort ();
4158 }
4159 emit_insn (gen_rotqby_ti (rot, from, tmp));
4160
4161 emit_insn (gen_spu_convert (ops[0], rot));
4162}
4163
4164void
4165spu_builtin_insert (rtx ops[])
4166{
4167 enum machine_mode mode = GET_MODE (ops[0]);
4168 enum machine_mode imode = GET_MODE_INNER (mode);
4169 rtx mask = gen_reg_rtx (TImode);
4170 rtx offset;
4171
4172 if (GET_CODE (ops[3]) == CONST_INT)
4173 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4174 else
4175 {
4176 offset = gen_reg_rtx (SImode);
4177 emit_insn (gen_mulsi3
4178 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4179 }
4180 emit_insn (gen_cpat
4181 (mask, stack_pointer_rtx, offset,
4182 GEN_INT (GET_MODE_SIZE (imode))));
4183 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4184}
4185
4186void
4187spu_builtin_promote (rtx ops[])
4188{
4189 enum machine_mode mode, imode;
4190 rtx rot, from, offset;
4191 HOST_WIDE_INT pos;
4192
4193 mode = GET_MODE (ops[0]);
4194 imode = GET_MODE_INNER (mode);
4195
4196 from = gen_reg_rtx (TImode);
4197 rot = spu_gen_subreg (TImode, ops[0]);
4198
4199 emit_insn (gen_spu_convert (from, ops[1]));
4200
4201 if (GET_CODE (ops[2]) == CONST_INT)
4202 {
4203 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4204 if (GET_MODE_SIZE (imode) < 4)
4205 pos += 4 - GET_MODE_SIZE (imode);
4206 offset = GEN_INT (pos & 15);
4207 }
4208 else
4209 {
4210 offset = gen_reg_rtx (SImode);
4211 switch (mode)
4212 {
4213 case V16QImode:
4214 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4215 break;
4216 case V8HImode:
4217 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4218 emit_insn (gen_addsi3 (offset, offset, offset));
4219 break;
4220 case V4SFmode:
4221 case V4SImode:
4222 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4223 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4224 break;
4225 case V2DImode:
4226 case V2DFmode:
4227 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4228 break;
4229 default:
4230 abort ();
4231 }
4232 }
4233 emit_insn (gen_rotqby_ti (rot, from, offset));
4234}
4235
4236void
4237spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4238{
4239 rtx shuf = gen_reg_rtx (V4SImode);
4240 rtx insn = gen_reg_rtx (V4SImode);
4241 rtx shufc;
4242 rtx insnc;
4243 rtx mem;
4244
4245 fnaddr = force_reg (SImode, fnaddr);
4246 cxt = force_reg (SImode, cxt);
4247
4248 if (TARGET_LARGE_MEM)
4249 {
4250 rtx rotl = gen_reg_rtx (V4SImode);
4251 rtx mask = gen_reg_rtx (V4SImode);
4252 rtx bi = gen_reg_rtx (SImode);
4253 unsigned char shufa[16] = {
4254 2, 3, 0, 1, 18, 19, 16, 17,
4255 0, 1, 2, 3, 16, 17, 18, 19
4256 };
4257 unsigned char insna[16] = {
4258 0x41, 0, 0, 79,
4259 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4260 0x60, 0x80, 0, 79,
4261 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4262 };
4263
4264 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4265 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4266
4267 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4268 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4269 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4270 emit_insn (gen_selb (insn, insnc, rotl, mask));
4271
4272 mem = memory_address (Pmode, tramp);
4273 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4274
4275 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4276 mem = memory_address (Pmode, plus_constant (tramp, 16));
4277 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4278 }
4279 else
4280 {
4281 rtx scxt = gen_reg_rtx (SImode);
4282 rtx sfnaddr = gen_reg_rtx (SImode);
4283 unsigned char insna[16] = {
4284 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4285 0x30, 0, 0, 0,
4286 0, 0, 0, 0,
4287 0, 0, 0, 0
4288 };
4289
4290 shufc = gen_reg_rtx (TImode);
4291 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4292
4293 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4294 fits 18 bits and the last 4 are zeros. This will be true if
4295 the stack pointer is initialized to 0x3fff0 at program start,
4296 otherwise the ila instruction will be garbage. */
4297
4298 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4299 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4300 emit_insn (gen_cpat
4301 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4302 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4303 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4304
4305 mem = memory_address (Pmode, tramp);
4306 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4307
4308 }
4309 emit_insn (gen_sync ());
4310}
4311
4312void
4313spu_expand_sign_extend (rtx ops[])
4314{
4315 unsigned char arr[16];
4316 rtx pat = gen_reg_rtx (TImode);
4317 rtx sign, c;
4318 int i, last;
4319 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4320 if (GET_MODE (ops[1]) == QImode)
4321 {
4322 sign = gen_reg_rtx (HImode);
4323 emit_insn (gen_extendqihi2 (sign, ops[1]));
4324 for (i = 0; i < 16; i++)
4325 arr[i] = 0x12;
4326 arr[last] = 0x13;
4327 }
4328 else
4329 {
4330 for (i = 0; i < 16; i++)
4331 arr[i] = 0x10;
4332 switch (GET_MODE (ops[1]))
4333 {
4334 case HImode:
4335 sign = gen_reg_rtx (SImode);
4336 emit_insn (gen_extendhisi2 (sign, ops[1]));
4337 arr[last] = 0x03;
4338 arr[last - 1] = 0x02;
4339 break;
4340 case SImode:
4341 sign = gen_reg_rtx (SImode);
4342 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4343 for (i = 0; i < 4; i++)
4344 arr[last - i] = 3 - i;
4345 break;
4346 case DImode:
4347 sign = gen_reg_rtx (SImode);
4348 c = gen_reg_rtx (SImode);
4349 emit_insn (gen_spu_convert (c, ops[1]));
4350 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4351 for (i = 0; i < 8; i++)
4352 arr[last - i] = 7 - i;
4353 break;
4354 default:
4355 abort ();
4356 }
4357 }
4358 emit_move_insn (pat, array_to_constant (TImode, arr));
4359 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4360}
4361
4362/* expand vector initialization. If there are any constant parts,
4363 load constant parts first. Then load any non-constant parts. */
4364void
4365spu_expand_vector_init (rtx target, rtx vals)
4366{
4367 enum machine_mode mode = GET_MODE (target);
4368 int n_elts = GET_MODE_NUNITS (mode);
4369 int n_var = 0;
4370 bool all_same = true;
790c536c 4371 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 4372 int i;
4373
4374 first = XVECEXP (vals, 0, 0);
4375 for (i = 0; i < n_elts; ++i)
4376 {
4377 x = XVECEXP (vals, 0, i);
4378 if (!CONSTANT_P (x))
4379 ++n_var;
4380 else
4381 {
4382 if (first_constant == NULL_RTX)
4383 first_constant = x;
4384 }
4385 if (i > 0 && !rtx_equal_p (x, first))
4386 all_same = false;
4387 }
4388
4389 /* if all elements are the same, use splats to repeat elements */
4390 if (all_same)
4391 {
4392 if (!CONSTANT_P (first)
4393 && !register_operand (first, GET_MODE (x)))
4394 first = force_reg (GET_MODE (first), first);
4395 emit_insn (gen_spu_splats (target, first));
4396 return;
4397 }
4398
4399 /* load constant parts */
4400 if (n_var != n_elts)
4401 {
4402 if (n_var == 0)
4403 {
4404 emit_move_insn (target,
4405 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4406 }
4407 else
4408 {
4409 rtx constant_parts_rtx = copy_rtx (vals);
4410
4411 gcc_assert (first_constant != NULL_RTX);
4412 /* fill empty slots with the first constant, this increases
4413 our chance of using splats in the recursive call below. */
4414 for (i = 0; i < n_elts; ++i)
4415 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4416 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4417
4418 spu_expand_vector_init (target, constant_parts_rtx);
4419 }
4420 }
4421
4422 /* load variable parts */
4423 if (n_var != 0)
4424 {
4425 rtx insert_operands[4];
4426
4427 insert_operands[0] = target;
4428 insert_operands[2] = target;
4429 for (i = 0; i < n_elts; ++i)
4430 {
4431 x = XVECEXP (vals, 0, i);
4432 if (!CONSTANT_P (x))
4433 {
4434 if (!register_operand (x, GET_MODE (x)))
4435 x = force_reg (GET_MODE (x), x);
4436 insert_operands[1] = x;
4437 insert_operands[3] = GEN_INT (i);
4438 spu_builtin_insert (insert_operands);
4439 }
4440 }
4441 }
4442}
6352eedf 4443
4444static rtx
4445spu_force_reg (enum machine_mode mode, rtx op)
4446{
4447 rtx x, r;
4448 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4449 {
4450 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4451 || GET_MODE (op) == BLKmode)
4452 return force_reg (mode, convert_to_mode (mode, op, 0));
4453 abort ();
4454 }
4455
4456 r = force_reg (GET_MODE (op), op);
4457 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4458 {
4459 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4460 if (x)
4461 return x;
4462 }
4463
4464 x = gen_reg_rtx (mode);
4465 emit_insn (gen_spu_convert (x, r));
4466 return x;
4467}
4468
4469static void
4470spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4471{
4472 HOST_WIDE_INT v = 0;
4473 int lsbits;
4474 /* Check the range of immediate operands. */
4475 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4476 {
4477 int range = p - SPU_BTI_7;
4478 if (!CONSTANT_P (op)
4479 || (GET_CODE (op) == CONST_INT
4480 && (INTVAL (op) < spu_builtin_range[range].low
4481 || INTVAL (op) > spu_builtin_range[range].high)))
4482 error ("%s expects an integer literal in the range [%d, %d].",
4483 d->name,
4484 spu_builtin_range[range].low, spu_builtin_range[range].high);
4485
4486 if (GET_CODE (op) == CONST
4487 && (GET_CODE (XEXP (op, 0)) == PLUS
4488 || GET_CODE (XEXP (op, 0)) == MINUS))
4489 {
4490 v = INTVAL (XEXP (XEXP (op, 0), 1));
4491 op = XEXP (XEXP (op, 0), 0);
4492 }
4493 else if (GET_CODE (op) == CONST_INT)
4494 v = INTVAL (op);
4495
4496 switch (p)
4497 {
4498 case SPU_BTI_S10_4:
4499 lsbits = 4;
4500 break;
4501 case SPU_BTI_U16_2:
4502 /* This is only used in lqa, and stqa. Even though the insns
4503 encode 16 bits of the address (all but the 2 least
4504 significant), only 14 bits are used because it is masked to
4505 be 16 byte aligned. */
4506 lsbits = 4;
4507 break;
4508 case SPU_BTI_S16_2:
4509 /* This is used for lqr and stqr. */
4510 lsbits = 2;
4511 break;
4512 default:
4513 lsbits = 0;
4514 }
4515
4516 if (GET_CODE (op) == LABEL_REF
4517 || (GET_CODE (op) == SYMBOL_REF
4518 && SYMBOL_REF_FUNCTION_P (op))
4519 || (INTVAL (op) & ((1 << lsbits) - 1)) != 0)
4520 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4521 d->name);
4522 }
4523}
4524
4525
4526static void
4527expand_builtin_args (struct spu_builtin_description *d, tree arglist,
4528 rtx target, rtx ops[])
4529{
4530 enum insn_code icode = d->icode;
4531 int i = 0;
4532
4533 /* Expand the arguments into rtl. */
4534
4535 if (d->parm[0] != SPU_BTI_VOID)
4536 ops[i++] = target;
4537
4538 for (; i < insn_data[icode].n_operands; i++)
4539 {
4540 tree arg = TREE_VALUE (arglist);
4541 if (arg == 0)
4542 abort ();
4543 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4544 arglist = TREE_CHAIN (arglist);
4545 }
4546}
4547
4548static rtx
4549spu_expand_builtin_1 (struct spu_builtin_description *d,
4550 tree arglist, rtx target)
4551{
4552 rtx pat;
4553 rtx ops[8];
4554 enum insn_code icode = d->icode;
4555 enum machine_mode mode, tmode;
4556 int i, p;
4557 tree return_type;
4558
4559 /* Set up ops[] with values from arglist. */
4560 expand_builtin_args (d, arglist, target, ops);
4561
4562 /* Handle the target operand which must be operand 0. */
4563 i = 0;
4564 if (d->parm[0] != SPU_BTI_VOID)
4565 {
4566
4567 /* We prefer the mode specified for the match_operand otherwise
4568 use the mode from the builtin function prototype. */
4569 tmode = insn_data[d->icode].operand[0].mode;
4570 if (tmode == VOIDmode)
4571 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4572
4573 /* Try to use target because not using it can lead to extra copies
4574 and when we are using all of the registers extra copies leads
4575 to extra spills. */
4576 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
4577 ops[0] = target;
4578 else
4579 target = ops[0] = gen_reg_rtx (tmode);
4580
4581 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
4582 abort ();
4583
4584 i++;
4585 }
4586
4587 /* Ignore align_hint, but still expand it's args in case they have
4588 side effects. */
4589 if (icode == CODE_FOR_spu_align_hint)
4590 return 0;
4591
4592 /* Handle the rest of the operands. */
4593 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
4594 {
4595 if (insn_data[d->icode].operand[i].mode != VOIDmode)
4596 mode = insn_data[d->icode].operand[i].mode;
4597 else
4598 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
4599
4600 /* mode can be VOIDmode here for labels */
4601
4602 /* For specific intrinsics with an immediate operand, e.g.,
4603 si_ai(), we sometimes need to convert the scalar argument to a
4604 vector argument by splatting the scalar. */
4605 if (VECTOR_MODE_P (mode)
4606 && (GET_CODE (ops[i]) == CONST_INT
4607 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
4608 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
4609 {
4610 if (GET_CODE (ops[i]) == CONST_INT)
4611 ops[i] = spu_const (mode, INTVAL (ops[i]));
4612 else
4613 {
4614 rtx reg = gen_reg_rtx (mode);
4615 enum machine_mode imode = GET_MODE_INNER (mode);
4616 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
4617 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
4618 if (imode != GET_MODE (ops[i]))
4619 ops[i] = convert_to_mode (imode, ops[i],
4620 TYPE_UNSIGNED (spu_builtin_types
4621 [d->parm[i]]));
4622 emit_insn (gen_spu_splats (reg, ops[i]));
4623 ops[i] = reg;
4624 }
4625 }
4626
4627 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
4628 ops[i] = spu_force_reg (mode, ops[i]);
4629
4630 spu_check_builtin_parm (d, ops[i], d->parm[p]);
4631 }
4632
4633 switch (insn_data[icode].n_operands)
4634 {
4635 case 0:
4636 pat = GEN_FCN (icode) (0);
4637 break;
4638 case 1:
4639 pat = GEN_FCN (icode) (ops[0]);
4640 break;
4641 case 2:
4642 pat = GEN_FCN (icode) (ops[0], ops[1]);
4643 break;
4644 case 3:
4645 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
4646 break;
4647 case 4:
4648 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
4649 break;
4650 case 5:
4651 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
4652 break;
4653 case 6:
4654 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
4655 break;
4656 default:
4657 abort ();
4658 }
4659
4660 if (!pat)
4661 abort ();
4662
4663 if (d->type == B_CALL || d->type == B_BISLED)
4664 emit_call_insn (pat);
4665 else if (d->type == B_JUMP)
4666 {
4667 emit_jump_insn (pat);
4668 emit_barrier ();
4669 }
4670 else
4671 emit_insn (pat);
4672
4673 return_type = spu_builtin_types[d->parm[0]];
4674 if (d->parm[0] != SPU_BTI_VOID
4675 && GET_MODE (target) != TYPE_MODE (return_type))
4676 {
4677 /* target is the return value. It should always be the mode of
4678 the builtin function prototype. */
4679 target = spu_force_reg (TYPE_MODE (return_type), target);
4680 }
4681
4682 return target;
4683}
4684
4685rtx
4686spu_expand_builtin (tree exp,
4687 rtx target,
4688 rtx subtarget ATTRIBUTE_UNUSED,
4689 enum machine_mode mode ATTRIBUTE_UNUSED,
4690 int ignore ATTRIBUTE_UNUSED)
4691{
4692 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
4693 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
4694 tree arglist = TREE_OPERAND (exp, 1);
4695 struct spu_builtin_description *d;
4696
4697 if (fcode < NUM_SPU_BUILTINS)
4698 {
4699 d = &spu_builtins[fcode];
4700
4701 return spu_expand_builtin_1 (d, arglist, target);
4702 }
4703 abort ();
4704}
4705