]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* doc/extend.texi, doc/invoke.texi, doc/md.texi: Fix typos.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
644459d0 1/* Copyright (C) 2006 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
16 02110-1301, USA. */
17
18#include "config.h"
19#include "system.h"
20#include "coretypes.h"
21#include "tm.h"
22#include "rtl.h"
23#include "regs.h"
24#include "hard-reg-set.h"
25#include "real.h"
26#include "insn-config.h"
27#include "conditions.h"
28#include "insn-attr.h"
29#include "flags.h"
30#include "recog.h"
31#include "obstack.h"
32#include "tree.h"
33#include "expr.h"
34#include "optabs.h"
35#include "except.h"
36#include "function.h"
37#include "output.h"
38#include "basic-block.h"
39#include "integrate.h"
40#include "toplev.h"
41#include "ggc.h"
42#include "hashtab.h"
43#include "tm_p.h"
44#include "target.h"
45#include "target-def.h"
46#include "langhooks.h"
47#include "reload.h"
48#include "cfglayout.h"
49#include "sched-int.h"
50#include "params.h"
51#include "assert.h"
52#include "c-common.h"
53#include "machmode.h"
54#include "tree-gimple.h"
55#include "tm-constrs.h"
56#include "spu-builtins.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
59struct spu_builtin_range
60{
61 int low, high;
62};
63
64static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77};
78
644459d0 79\f
80/* Target specific attribute specifications. */
81char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83/* Prototypes and external defs. */
84static void spu_init_builtins (void);
85static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88static rtx get_pic_reg (void);
89static int need_to_save_reg (int regno, int saving);
90static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94static void emit_nop_for_insn (rtx insn);
95static bool insn_clobbers_hbr (rtx insn);
96static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98static rtx get_branch_target (rtx branch);
99static void insert_branch_hints (void);
100static void insert_nops (void);
101static void spu_machine_dependent_reorg (void);
102static int spu_sched_issue_rate (void);
103static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
104 int can_issue_more);
105static int get_pipe (rtx insn);
106static int spu_sched_adjust_priority (rtx insn, int pri);
107static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
109 int flags,
110 unsigned char *no_add_attrs);
111static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
112 int flags,
113 unsigned char *no_add_attrs);
114static int spu_naked_function_p (tree func);
115static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117static tree spu_build_builtin_va_list (void);
118static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
119 tree * post_p);
120static int regno_aligned_for_load (int regno);
121static int store_with_one_insn_p (rtx mem);
122static int reg_align (rtx reg);
123static int mem_is_padded_component_ref (rtx x);
124static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125static void spu_asm_globalize_label (FILE * file, const char *name);
126static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
127 int *total);
128static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129static void spu_init_libfuncs (void);
130static bool spu_return_in_memory (tree type, tree fntype);
131
132extern const char *reg_names[];
133rtx spu_compare_op0, spu_compare_op1;
134
135enum spu_immediate {
136 SPU_NONE,
137 SPU_IL,
138 SPU_ILA,
139 SPU_ILH,
140 SPU_ILHU,
141 SPU_ORI,
142 SPU_ORHI,
143 SPU_ORBI,
99369027 144 SPU_IOHL
644459d0 145};
146
147static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
148static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
149
150/* Built in types. */
151tree spu_builtin_types[SPU_BTI_MAX];
152\f
153/* TARGET overrides. */
154
155#undef TARGET_INIT_BUILTINS
156#define TARGET_INIT_BUILTINS spu_init_builtins
157
644459d0 158#undef TARGET_EXPAND_BUILTIN
159#define TARGET_EXPAND_BUILTIN spu_expand_builtin
160
161#undef TARGET_EH_RETURN_FILTER_MODE
162#define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
163
164/* The .8byte directive doesn't seem to work well for a 32 bit
165 architecture. */
166#undef TARGET_ASM_UNALIGNED_DI_OP
167#define TARGET_ASM_UNALIGNED_DI_OP NULL
168
169#undef TARGET_RTX_COSTS
170#define TARGET_RTX_COSTS spu_rtx_costs
171
172#undef TARGET_ADDRESS_COST
173#define TARGET_ADDRESS_COST hook_int_rtx_0
174
175#undef TARGET_SCHED_ISSUE_RATE
176#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
177
178#undef TARGET_SCHED_VARIABLE_ISSUE
179#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
180
181#undef TARGET_SCHED_ADJUST_PRIORITY
182#define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
183
184#undef TARGET_SCHED_ADJUST_COST
185#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
186
187const struct attribute_spec spu_attribute_table[];
188#undef TARGET_ATTRIBUTE_TABLE
189#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
190
191#undef TARGET_ASM_INTEGER
192#define TARGET_ASM_INTEGER spu_assemble_integer
193
194#undef TARGET_SCALAR_MODE_SUPPORTED_P
195#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
196
197#undef TARGET_VECTOR_MODE_SUPPORTED_P
198#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
199
200#undef TARGET_FUNCTION_OK_FOR_SIBCALL
201#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
202
203#undef TARGET_ASM_GLOBALIZE_LABEL
204#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
205
206#undef TARGET_PASS_BY_REFERENCE
207#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
208
209#undef TARGET_MUST_PASS_IN_STACK
210#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
211
212#undef TARGET_BUILD_BUILTIN_VA_LIST
213#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
214
215#undef TARGET_SETUP_INCOMING_VARARGS
216#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
217
218#undef TARGET_MACHINE_DEPENDENT_REORG
219#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
220
221#undef TARGET_GIMPLIFY_VA_ARG_EXPR
222#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
223
224#undef TARGET_DEFAULT_TARGET_FLAGS
225#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
226
227#undef TARGET_INIT_LIBFUNCS
228#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
229
230#undef TARGET_RETURN_IN_MEMORY
231#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
232
233struct gcc_target targetm = TARGET_INITIALIZER;
234
644459d0 235/* Sometimes certain combinations of command options do not make sense
236 on a particular target machine. You can define a macro
237 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
238 executed once just after all the command options have been parsed. */
239void
240spu_override_options (void)
241{
242
644459d0 243 /* Override some of the default param values. With so many registers
244 larger values are better for these params. */
245 if (MAX_UNROLLED_INSNS == 100)
246 MAX_UNROLLED_INSNS = 250;
247 if (MAX_PENDING_LIST_LENGTH == 32)
248 MAX_PENDING_LIST_LENGTH = 128;
249
250 flag_omit_frame_pointer = 1;
251
252 if (align_functions < 8)
253 align_functions = 8;
254}
255\f
256/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
257 struct attribute_spec.handler. */
258
259/* Table of machine attributes. */
260const struct attribute_spec spu_attribute_table[] =
261{
262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
263 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
264 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
265 { NULL, 0, 0, false, false, false, NULL }
266};
267
268/* True if MODE is valid for the target. By "valid", we mean able to
269 be manipulated in non-trivial ways. In particular, this means all
270 the arithmetic is supported. */
271static bool
272spu_scalar_mode_supported_p (enum machine_mode mode)
273{
274 switch (mode)
275 {
276 case QImode:
277 case HImode:
278 case SImode:
279 case SFmode:
280 case DImode:
281 case TImode:
282 case DFmode:
283 return true;
284
285 default:
286 return false;
287 }
288}
289
290/* Similarly for vector modes. "Supported" here is less strict. At
291 least some operations are supported; need to check optabs or builtins
292 for further details. */
293static bool
294spu_vector_mode_supported_p (enum machine_mode mode)
295{
296 switch (mode)
297 {
298 case V16QImode:
299 case V8HImode:
300 case V4SImode:
301 case V2DImode:
302 case V4SFmode:
303 case V2DFmode:
304 return true;
305
306 default:
307 return false;
308 }
309}
310
311/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
312 least significant bytes of the outer mode. This function returns
313 TRUE for the SUBREG's where this is correct. */
314int
315valid_subreg (rtx op)
316{
317 enum machine_mode om = GET_MODE (op);
318 enum machine_mode im = GET_MODE (SUBREG_REG (op));
319 return om != VOIDmode && im != VOIDmode
320 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
321 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
322}
323
324/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
325 and ajust the start offset. */
326static rtx
327adjust_operand (rtx op, HOST_WIDE_INT * start)
328{
329 enum machine_mode mode;
330 int op_size;
331 /* Strip any SUBREG */
332 if (GET_CODE (op) == SUBREG)
333 {
334 if (start)
335 *start -=
336 GET_MODE_BITSIZE (GET_MODE (op)) -
337 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
338 op = SUBREG_REG (op);
339 }
340 /* If it is smaller than SI, assure a SUBREG */
341 op_size = GET_MODE_BITSIZE (GET_MODE (op));
342 if (op_size < 32)
343 {
344 if (start)
345 *start += 32 - op_size;
346 op_size = 32;
347 }
348 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
349 mode = mode_for_size (op_size, MODE_INT, 0);
350 if (mode != GET_MODE (op))
351 op = gen_rtx_SUBREG (mode, op, 0);
352 return op;
353}
354
355void
356spu_expand_extv (rtx ops[], int unsignedp)
357{
358 HOST_WIDE_INT width = INTVAL (ops[2]);
359 HOST_WIDE_INT start = INTVAL (ops[3]);
360 HOST_WIDE_INT src_size, dst_size;
361 enum machine_mode src_mode, dst_mode;
362 rtx dst = ops[0], src = ops[1];
363 rtx s;
364
365 dst = adjust_operand (ops[0], 0);
366 dst_mode = GET_MODE (dst);
367 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
368
644459d0 369 src = adjust_operand (src, &start);
370 src_mode = GET_MODE (src);
371 src_size = GET_MODE_BITSIZE (GET_MODE (src));
372
373 if (start > 0)
374 {
375 s = gen_reg_rtx (src_mode);
376 switch (src_mode)
377 {
378 case SImode:
379 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
380 break;
381 case DImode:
382 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
383 break;
384 case TImode:
385 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
386 break;
387 default:
388 abort ();
389 }
390 src = s;
391 }
392
393 if (width < src_size)
394 {
395 rtx pat;
396 int icode;
397 switch (src_mode)
398 {
399 case SImode:
400 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
401 break;
402 case DImode:
403 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
404 break;
405 case TImode:
406 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
407 break;
408 default:
409 abort ();
410 }
411 s = gen_reg_rtx (src_mode);
412 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
413 emit_insn (pat);
414 src = s;
415 }
416
417 convert_move (dst, src, unsignedp);
418}
419
420void
421spu_expand_insv (rtx ops[])
422{
423 HOST_WIDE_INT width = INTVAL (ops[1]);
424 HOST_WIDE_INT start = INTVAL (ops[2]);
425 HOST_WIDE_INT maskbits;
426 enum machine_mode dst_mode, src_mode;
427 rtx dst = ops[0], src = ops[3];
428 int dst_size, src_size;
429 rtx mask;
430 rtx shift_reg;
431 int shift;
432
433
434 if (GET_CODE (ops[0]) == MEM)
435 dst = gen_reg_rtx (TImode);
436 else
437 dst = adjust_operand (dst, &start);
438 dst_mode = GET_MODE (dst);
439 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
440
441 if (CONSTANT_P (src))
442 {
443 enum machine_mode m =
444 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
445 src = force_reg (m, convert_to_mode (m, src, 0));
446 }
447 src = adjust_operand (src, 0);
448 src_mode = GET_MODE (src);
449 src_size = GET_MODE_BITSIZE (GET_MODE (src));
450
451 mask = gen_reg_rtx (dst_mode);
452 shift_reg = gen_reg_rtx (dst_mode);
453 shift = dst_size - start - width;
454
455 /* It's not safe to use subreg here because the compiler assumes
456 that the SUBREG_REG is right justified in the SUBREG. */
457 convert_move (shift_reg, src, 1);
458
459 if (shift > 0)
460 {
461 switch (dst_mode)
462 {
463 case SImode:
464 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
465 break;
466 case DImode:
467 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
468 break;
469 case TImode:
470 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
471 break;
472 default:
473 abort ();
474 }
475 }
476 else if (shift < 0)
477 abort ();
478
479 switch (dst_size)
480 {
481 case 32:
482 maskbits = (-1ll << (32 - width - start));
483 if (start)
484 maskbits += (1ll << (32 - start));
485 emit_move_insn (mask, GEN_INT (maskbits));
486 break;
487 case 64:
488 maskbits = (-1ll << (64 - width - start));
489 if (start)
490 maskbits += (1ll << (64 - start));
491 emit_move_insn (mask, GEN_INT (maskbits));
492 break;
493 case 128:
494 {
495 unsigned char arr[16];
496 int i = start / 8;
497 memset (arr, 0, sizeof (arr));
498 arr[i] = 0xff >> (start & 7);
499 for (i++; i <= (start + width - 1) / 8; i++)
500 arr[i] = 0xff;
501 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
502 emit_move_insn (mask, array_to_constant (TImode, arr));
503 }
504 break;
505 default:
506 abort ();
507 }
508 if (GET_CODE (ops[0]) == MEM)
509 {
510 rtx aligned = gen_reg_rtx (SImode);
511 rtx low = gen_reg_rtx (SImode);
512 rtx addr = gen_reg_rtx (SImode);
513 rtx rotl = gen_reg_rtx (SImode);
514 rtx mask0 = gen_reg_rtx (TImode);
515 rtx mem;
516
517 emit_move_insn (addr, XEXP (ops[0], 0));
518 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
519 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
520 emit_insn (gen_negsi2 (rotl, low));
521 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
522 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
523 mem = change_address (ops[0], TImode, aligned);
524 set_mem_alias_set (mem, 0);
525 emit_move_insn (dst, mem);
526 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
527 emit_move_insn (mem, dst);
528 if (start + width > MEM_ALIGN (ops[0]))
529 {
530 rtx shl = gen_reg_rtx (SImode);
531 rtx mask1 = gen_reg_rtx (TImode);
532 rtx dst1 = gen_reg_rtx (TImode);
533 rtx mem1;
534 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
535 emit_insn (gen_shlqby_ti (mask1, mask, shl));
536 mem1 = adjust_address (mem, TImode, 16);
537 set_mem_alias_set (mem1, 0);
538 emit_move_insn (dst1, mem1);
539 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
540 emit_move_insn (mem1, dst1);
541 }
542 }
543 else
544 emit_insn (gen_selb (dst, dst, shift_reg, mask));
545}
546
547
548int
549spu_expand_block_move (rtx ops[])
550{
551 HOST_WIDE_INT bytes, align, offset;
552 rtx src, dst, sreg, dreg, target;
553 int i;
554 if (GET_CODE (ops[2]) != CONST_INT
555 || GET_CODE (ops[3]) != CONST_INT
556 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
557 return 0;
558
559 bytes = INTVAL (ops[2]);
560 align = INTVAL (ops[3]);
561
562 if (bytes <= 0)
563 return 1;
564
565 dst = ops[0];
566 src = ops[1];
567
568 if (align == 16)
569 {
570 for (offset = 0; offset + 16 <= bytes; offset += 16)
571 {
572 dst = adjust_address (ops[0], V16QImode, offset);
573 src = adjust_address (ops[1], V16QImode, offset);
574 emit_move_insn (dst, src);
575 }
576 if (offset < bytes)
577 {
578 rtx mask;
579 unsigned char arr[16] = { 0 };
580 for (i = 0; i < bytes - offset; i++)
581 arr[i] = 0xff;
582 dst = adjust_address (ops[0], V16QImode, offset);
583 src = adjust_address (ops[1], V16QImode, offset);
584 mask = gen_reg_rtx (V16QImode);
585 sreg = gen_reg_rtx (V16QImode);
586 dreg = gen_reg_rtx (V16QImode);
587 target = gen_reg_rtx (V16QImode);
588 emit_move_insn (mask, array_to_constant (V16QImode, arr));
589 emit_move_insn (dreg, dst);
590 emit_move_insn (sreg, src);
591 emit_insn (gen_selb (target, dreg, sreg, mask));
592 emit_move_insn (dst, target);
593 }
594 return 1;
595 }
596 return 0;
597}
598
599enum spu_comp_code
600{ SPU_EQ, SPU_GT, SPU_GTU };
601
602
603int spu_comp_icode[8][3] = {
604 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
605 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
606 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
607 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
608 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
609 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
610 {0, 0, 0},
611 {CODE_FOR_ceq_vec, 0, 0},
612};
613
614/* Generate a compare for CODE. Return a brand-new rtx that represents
615 the result of the compare. GCC can figure this out too if we don't
616 provide all variations of compares, but GCC always wants to use
617 WORD_MODE, we can generate better code in most cases if we do it
618 ourselves. */
619void
620spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
621{
622 int reverse_compare = 0;
623 int reverse_test = 0;
624 rtx compare_result;
625 rtx comp_rtx;
626 rtx target = operands[0];
627 enum machine_mode comp_mode;
628 enum machine_mode op_mode;
629 enum spu_comp_code scode;
630 int index;
631
632 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
633 and so on, to keep the constant in operand 1. */
634 if (GET_CODE (spu_compare_op1) == CONST_INT)
635 {
636 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
637 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
638 switch (code)
639 {
640 case GE:
641 spu_compare_op1 = GEN_INT (val);
642 code = GT;
643 break;
644 case LT:
645 spu_compare_op1 = GEN_INT (val);
646 code = LE;
647 break;
648 case GEU:
649 spu_compare_op1 = GEN_INT (val);
650 code = GTU;
651 break;
652 case LTU:
653 spu_compare_op1 = GEN_INT (val);
654 code = LEU;
655 break;
656 default:
657 break;
658 }
659 }
660
661 switch (code)
662 {
663 case GE:
664 reverse_compare = 1;
665 reverse_test = 1;
666 scode = SPU_GT;
667 break;
668 case LE:
669 reverse_compare = 0;
670 reverse_test = 1;
671 scode = SPU_GT;
672 break;
673 case LT:
674 reverse_compare = 1;
675 reverse_test = 0;
676 scode = SPU_GT;
677 break;
678 case GEU:
679 reverse_compare = 1;
680 reverse_test = 1;
681 scode = SPU_GTU;
682 break;
683 case LEU:
684 reverse_compare = 0;
685 reverse_test = 1;
686 scode = SPU_GTU;
687 break;
688 case LTU:
689 reverse_compare = 1;
690 reverse_test = 0;
691 scode = SPU_GTU;
692 break;
693 case NE:
694 reverse_compare = 0;
695 reverse_test = 1;
696 scode = SPU_EQ;
697 break;
698
699 case EQ:
700 scode = SPU_EQ;
701 break;
702 case GT:
703 scode = SPU_GT;
704 break;
705 case GTU:
706 scode = SPU_GTU;
707 break;
708 default:
709 scode = SPU_EQ;
710 break;
711 }
712
713 comp_mode = SImode;
714 op_mode = GET_MODE (spu_compare_op0);
715
716 switch (op_mode)
717 {
718 case QImode:
719 index = 0;
720 comp_mode = QImode;
721 break;
722 case HImode:
723 index = 1;
724 comp_mode = HImode;
725 break;
726 case SImode:
727 index = 2;
728 break;
729 case DImode:
730 index = 3;
731 break;
732 case TImode:
733 index = 4;
734 break;
735 case SFmode:
736 index = 5;
737 break;
738 case DFmode:
739 index = 6;
740 break;
741 case V16QImode:
742 case V8HImode:
743 case V4SImode:
744 case V2DImode:
745 case V4SFmode:
746 case V2DFmode:
747 index = 7;
748 break;
749 default:
750 abort ();
751 }
752
753 if (GET_MODE (spu_compare_op1) == DFmode)
754 {
755 rtx reg = gen_reg_rtx (DFmode);
756 if (!flag_unsafe_math_optimizations
757 || (scode != SPU_GT && scode != SPU_EQ))
758 abort ();
759 if (reverse_compare)
760 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
761 else
762 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
763 reverse_compare = 0;
764 spu_compare_op0 = reg;
765 spu_compare_op1 = CONST0_RTX (DFmode);
766 }
767
768 if (is_set == 0 && spu_compare_op1 == const0_rtx
769 && (GET_MODE (spu_compare_op0) == SImode
770 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
771 {
772 /* Don't need to set a register with the result when we are
773 comparing against zero and branching. */
774 reverse_test = !reverse_test;
775 compare_result = spu_compare_op0;
776 }
777 else
778 {
779 compare_result = gen_reg_rtx (comp_mode);
780
781 if (reverse_compare)
782 {
783 rtx t = spu_compare_op1;
784 spu_compare_op1 = spu_compare_op0;
785 spu_compare_op0 = t;
786 }
787
788 if (spu_comp_icode[index][scode] == 0)
789 abort ();
790
791 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
792 (spu_compare_op0, op_mode))
793 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
794 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
795 (spu_compare_op1, op_mode))
796 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
797 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
798 spu_compare_op0,
799 spu_compare_op1);
800 if (comp_rtx == 0)
801 abort ();
802 emit_insn (comp_rtx);
803
804 }
805
806 if (is_set == 0)
807 {
808 rtx bcomp;
809 rtx loc_ref;
810
811 /* We don't have branch on QI compare insns, so we convert the
812 QI compare result to a HI result. */
813 if (comp_mode == QImode)
814 {
815 rtx old_res = compare_result;
816 compare_result = gen_reg_rtx (HImode);
817 comp_mode = HImode;
818 emit_insn (gen_extendqihi2 (compare_result, old_res));
819 }
820
821 if (reverse_test)
822 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
823 else
824 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
825
826 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
827 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
828 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
829 loc_ref, pc_rtx)));
830 }
831 else if (is_set == 2)
832 {
833 int compare_size = GET_MODE_BITSIZE (comp_mode);
834 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
835 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
836 rtx select_mask;
837 rtx op_t = operands[2];
838 rtx op_f = operands[3];
839
840 /* The result of the comparison can be SI, HI or QI mode. Create a
841 mask based on that result. */
842 if (target_size > compare_size)
843 {
844 select_mask = gen_reg_rtx (mode);
845 emit_insn (gen_extend_compare (select_mask, compare_result));
846 }
847 else if (target_size < compare_size)
848 select_mask =
849 gen_rtx_SUBREG (mode, compare_result,
850 (compare_size - target_size) / BITS_PER_UNIT);
851 else if (comp_mode != mode)
852 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
853 else
854 select_mask = compare_result;
855
856 if (GET_MODE (target) != GET_MODE (op_t)
857 || GET_MODE (target) != GET_MODE (op_f))
858 abort ();
859
860 if (reverse_test)
861 emit_insn (gen_selb (target, op_t, op_f, select_mask));
862 else
863 emit_insn (gen_selb (target, op_f, op_t, select_mask));
864 }
865 else
866 {
867 if (reverse_test)
868 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
869 gen_rtx_NOT (comp_mode, compare_result)));
870 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
871 emit_insn (gen_extendhisi2 (target, compare_result));
872 else if (GET_MODE (target) == SImode
873 && GET_MODE (compare_result) == QImode)
874 emit_insn (gen_extend_compare (target, compare_result));
875 else
876 emit_move_insn (target, compare_result);
877 }
878}
879
880HOST_WIDE_INT
881const_double_to_hwint (rtx x)
882{
883 HOST_WIDE_INT val;
884 REAL_VALUE_TYPE rv;
885 if (GET_MODE (x) == SFmode)
886 {
887 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
888 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
889 }
890 else if (GET_MODE (x) == DFmode)
891 {
892 long l[2];
893 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
894 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
895 val = l[0];
896 val = (val << 32) | (l[1] & 0xffffffff);
897 }
898 else
899 abort ();
900 return val;
901}
902
903rtx
904hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
905{
906 long tv[2];
907 REAL_VALUE_TYPE rv;
908 gcc_assert (mode == SFmode || mode == DFmode);
909
910 if (mode == SFmode)
911 tv[0] = (v << 32) >> 32;
912 else if (mode == DFmode)
913 {
914 tv[1] = (v << 32) >> 32;
915 tv[0] = v >> 32;
916 }
917 real_from_target (&rv, tv, mode);
918 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
919}
920
921void
922print_operand_address (FILE * file, register rtx addr)
923{
924 rtx reg;
925 rtx offset;
926
e04cf423 927 if (GET_CODE (addr) == AND
928 && GET_CODE (XEXP (addr, 1)) == CONST_INT
929 && INTVAL (XEXP (addr, 1)) == -16)
930 addr = XEXP (addr, 0);
931
644459d0 932 switch (GET_CODE (addr))
933 {
934 case REG:
935 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
936 break;
937
938 case PLUS:
939 reg = XEXP (addr, 0);
940 offset = XEXP (addr, 1);
941 if (GET_CODE (offset) == REG)
942 {
943 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
944 reg_names[REGNO (offset)]);
945 }
946 else if (GET_CODE (offset) == CONST_INT)
947 {
948 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
949 INTVAL (offset), reg_names[REGNO (reg)]);
950 }
951 else
952 abort ();
953 break;
954
955 case CONST:
956 case LABEL_REF:
957 case SYMBOL_REF:
958 case CONST_INT:
959 output_addr_const (file, addr);
960 break;
961
962 default:
963 debug_rtx (addr);
964 abort ();
965 }
966}
967
968void
969print_operand (FILE * file, rtx x, int code)
970{
971 enum machine_mode mode = GET_MODE (x);
972 HOST_WIDE_INT val;
973 unsigned char arr[16];
974 int xcode = GET_CODE (x);
975 if (GET_MODE (x) == VOIDmode)
976 switch (code)
977 {
978 case 'H': /* 128 bits, signed */
979 case 'L': /* 128 bits, signed */
980 case 'm': /* 128 bits, signed */
981 case 'T': /* 128 bits, signed */
982 case 't': /* 128 bits, signed */
983 mode = TImode;
984 break;
985 case 'G': /* 64 bits, signed */
986 case 'K': /* 64 bits, signed */
987 case 'k': /* 64 bits, signed */
988 case 'D': /* 64 bits, signed */
989 case 'd': /* 64 bits, signed */
990 mode = DImode;
991 break;
992 case 'F': /* 32 bits, signed */
993 case 'J': /* 32 bits, signed */
994 case 'j': /* 32 bits, signed */
995 case 's': /* 32 bits, signed */
996 case 'S': /* 32 bits, signed */
997 mode = SImode;
998 break;
999 }
1000 switch (code)
1001 {
1002
1003 case 'j': /* 32 bits, signed */
1004 case 'k': /* 64 bits, signed */
1005 case 'm': /* 128 bits, signed */
1006 if (xcode == CONST_INT
1007 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1008 {
1009 gcc_assert (logical_immediate_p (x, mode));
1010 constant_to_array (mode, x, arr);
1011 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1012 val = trunc_int_for_mode (val, SImode);
1013 switch (which_logical_immediate (val))
1014 {
1015 case SPU_ORI:
1016 break;
1017 case SPU_ORHI:
1018 fprintf (file, "h");
1019 break;
1020 case SPU_ORBI:
1021 fprintf (file, "b");
1022 break;
1023 default:
1024 gcc_unreachable();
1025 }
1026 }
1027 else
1028 gcc_unreachable();
1029 return;
1030
1031 case 'J': /* 32 bits, signed */
1032 case 'K': /* 64 bits, signed */
1033 case 'L': /* 128 bits, signed */
1034 if (xcode == CONST_INT
1035 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1036 {
1037 gcc_assert (logical_immediate_p (x, mode)
1038 || iohl_immediate_p (x, mode));
1039 constant_to_array (mode, x, arr);
1040 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1041 val = trunc_int_for_mode (val, SImode);
1042 switch (which_logical_immediate (val))
1043 {
1044 case SPU_ORI:
1045 case SPU_IOHL:
1046 break;
1047 case SPU_ORHI:
1048 val = trunc_int_for_mode (val, HImode);
1049 break;
1050 case SPU_ORBI:
1051 val = trunc_int_for_mode (val, QImode);
1052 break;
1053 default:
1054 gcc_unreachable();
1055 }
1056 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1057 }
1058 else
1059 gcc_unreachable();
1060 return;
1061
1062 case 't': /* 128 bits, signed */
1063 case 'd': /* 64 bits, signed */
1064 case 's': /* 32 bits, signed */
1065 if (xcode == CONST_INT
1066 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1067 {
1068 gcc_assert (immediate_load_p (x, mode));
1069 constant_to_array (mode, x, arr);
1070 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1071 val = trunc_int_for_mode (val, SImode);
1072 switch (which_immediate_load (val))
1073 {
1074 case SPU_IL:
1075 break;
1076 case SPU_ILA:
1077 fprintf (file, "a");
1078 break;
1079 case SPU_ILH:
1080 fprintf (file, "h");
1081 break;
1082 case SPU_ILHU:
1083 fprintf (file, "hu");
1084 break;
1085 default:
1086 gcc_unreachable();
1087 }
1088 }
1089 else if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1090 fprintf (file, "a");
1091 else
1092 gcc_unreachable ();
1093 return;
1094
1095 case 'T': /* 128 bits, signed */
1096 case 'D': /* 64 bits, signed */
1097 case 'S': /* 32 bits, signed */
1098 if (xcode == CONST_INT
1099 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1100 {
1101 gcc_assert (immediate_load_p (x, mode));
1102 constant_to_array (mode, x, arr);
1103 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1104 val = trunc_int_for_mode (val, SImode);
1105 switch (which_immediate_load (val))
1106 {
1107 case SPU_IL:
1108 case SPU_ILA:
1109 break;
1110 case SPU_ILH:
1111 case SPU_ILHU:
1112 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1113 break;
1114 default:
1115 gcc_unreachable();
1116 }
1117 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1118 }
1119 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1120 output_addr_const (file, x);
1121 else
1122 gcc_unreachable ();
1123 return;
1124
1125 case 'F':
1126 case 'G':
1127 case 'H':
1128 if (xcode == CONST_INT
1129 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1130 { /* immediate operand for fsmbi */
1131 int i;
1132 HOST_WIDE_INT val = 0;
1133 unsigned char arr[16];
1134 constant_to_array (mode, x, arr);
1135 for (i = 0; i < 16; i++)
1136 {
1137 val <<= 1;
1138 val |= arr[i] & 1;
1139 }
1140 print_operand (file, GEN_INT (val), 0);
1141 }
1142 else
1143 gcc_unreachable();
1144 return;
1145
1146 case 'C':
1147 if (xcode == CONST_INT)
1148 {
1149 /* Only 4 least significant bits are relevant for generate
1150 control word instructions. */
1151 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1152 return;
1153 }
1154 break;
1155
1156 case 'M': /* print code for c*d */
1157 if (GET_CODE (x) == CONST_INT)
1158 switch (INTVAL (x))
1159 {
1160 case 1:
1161 fprintf (file, "b");
1162 break;
1163 case 2:
1164 fprintf (file, "h");
1165 break;
1166 case 4:
1167 fprintf (file, "w");
1168 break;
1169 case 8:
1170 fprintf (file, "d");
1171 break;
1172 default:
1173 gcc_unreachable();
1174 }
1175 else
1176 gcc_unreachable();
1177 return;
1178
1179 case 'N': /* Negate the operand */
1180 if (xcode == CONST_INT)
1181 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1182 else if (xcode == CONST_VECTOR)
1183 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1184 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1185 return;
1186
1187 case 'I': /* enable/disable interrupts */
1188 if (xcode == CONST_INT)
1189 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1190 return;
1191
1192 case 'b': /* branch modifiers */
1193 if (xcode == REG)
1194 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1195 else if (COMPARISON_P (x))
1196 fprintf (file, "%s", xcode == NE ? "n" : "");
1197 return;
1198
1199 case 'i': /* indirect call */
1200 if (xcode == MEM)
1201 {
1202 if (GET_CODE (XEXP (x, 0)) == REG)
1203 /* Used in indirect function calls. */
1204 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1205 else
1206 output_address (XEXP (x, 0));
1207 }
1208 return;
1209
1210 case 'p': /* load/store */
1211 if (xcode == MEM)
1212 {
1213 x = XEXP (x, 0);
1214 xcode = GET_CODE (x);
1215 }
e04cf423 1216 if (xcode == AND)
1217 {
1218 x = XEXP (x, 0);
1219 xcode = GET_CODE (x);
1220 }
644459d0 1221 if (xcode == REG)
1222 fprintf (file, "d");
1223 else if (xcode == CONST_INT)
1224 fprintf (file, "a");
1225 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1226 fprintf (file, "r");
1227 else if (xcode == PLUS || xcode == LO_SUM)
1228 {
1229 if (GET_CODE (XEXP (x, 1)) == REG)
1230 fprintf (file, "x");
1231 else
1232 fprintf (file, "d");
1233 }
1234 return;
1235
1236 case 0:
1237 if (xcode == REG)
1238 fprintf (file, "%s", reg_names[REGNO (x)]);
1239 else if (xcode == MEM)
1240 output_address (XEXP (x, 0));
1241 else if (xcode == CONST_VECTOR)
1242 output_addr_const (file, CONST_VECTOR_ELT (x, 0));
1243 else
1244 output_addr_const (file, x);
1245 return;
1246
1247 default:
1248 output_operand_lossage ("invalid %%xn code");
1249 }
1250 gcc_unreachable ();
1251}
1252
1253extern char call_used_regs[];
1254extern char regs_ever_live[];
1255
1256/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1257 caller saved register. For leaf functions it is more efficient to
1258 use a volatile register because we won't need to save and restore the
1259 pic register. This routine is only valid after register allocation
1260 is completed, so we can pick an unused register. */
1261static rtx
1262get_pic_reg (void)
1263{
1264 rtx pic_reg = pic_offset_table_rtx;
1265 if (!reload_completed && !reload_in_progress)
1266 abort ();
1267 return pic_reg;
1268}
1269
1270/* SAVING is TRUE when we are generating the actual load and store
1271 instructions for REGNO. When determining the size of the stack
1272 needed for saving register we must allocate enough space for the
1273 worst case, because we don't always have the information early enough
1274 to not allocate it. But we can at least eliminate the actual loads
1275 and stores during the prologue/epilogue. */
1276static int
1277need_to_save_reg (int regno, int saving)
1278{
1279 if (regs_ever_live[regno] && !call_used_regs[regno])
1280 return 1;
1281 if (flag_pic
1282 && regno == PIC_OFFSET_TABLE_REGNUM
1283 && (!saving || current_function_uses_pic_offset_table)
1284 && (!saving
1285 || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM]))
1286 return 1;
1287 return 0;
1288}
1289
1290/* This function is only correct starting with local register
1291 allocation */
1292int
1293spu_saved_regs_size (void)
1294{
1295 int reg_save_size = 0;
1296 int regno;
1297
1298 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1299 if (need_to_save_reg (regno, 0))
1300 reg_save_size += 0x10;
1301 return reg_save_size;
1302}
1303
1304static rtx
1305frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1306{
1307 rtx reg = gen_rtx_REG (V4SImode, regno);
1308 rtx mem =
1309 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1310 return emit_insn (gen_movv4si (mem, reg));
1311}
1312
1313static rtx
1314frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1315{
1316 rtx reg = gen_rtx_REG (V4SImode, regno);
1317 rtx mem =
1318 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1319 return emit_insn (gen_movv4si (reg, mem));
1320}
1321
1322/* This happens after reload, so we need to expand it. */
1323static rtx
1324frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1325{
1326 rtx insn;
1327 if (satisfies_constraint_K (GEN_INT (imm)))
1328 {
1329 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1330 }
1331 else
1332 {
1333 insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1334 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1335 REG_NOTES (insn));
1336 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1337 if (REGNO (src) == REGNO (scratch))
1338 abort ();
1339 }
1340 if (REGNO (dst) == REGNO (scratch))
1341 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1342 REG_NOTES (insn));
1343 return insn;
1344}
1345
1346/* Return nonzero if this function is known to have a null epilogue. */
1347
1348int
1349direct_return (void)
1350{
1351 if (reload_completed)
1352 {
1353 if (cfun->static_chain_decl == 0
1354 && (spu_saved_regs_size ()
1355 + get_frame_size ()
1356 + current_function_outgoing_args_size
1357 + current_function_pretend_args_size == 0)
1358 && current_function_is_leaf)
1359 return 1;
1360 }
1361 return 0;
1362}
1363
1364/*
1365 The stack frame looks like this:
1366 +-------------+
1367 | incoming |
1368 AP | args |
1369 +-------------+
1370 | $lr save |
1371 +-------------+
1372 prev SP | back chain |
1373 +-------------+
1374 | var args |
1375 | reg save | current_function_pretend_args_size bytes
1376 +-------------+
1377 | ... |
1378 | saved regs | spu_saved_regs_size() bytes
1379 +-------------+
1380 | ... |
1381 FP | vars | get_frame_size() bytes
1382 +-------------+
1383 | ... |
1384 | outgoing |
1385 | args | current_function_outgoing_args_size bytes
1386 +-------------+
1387 | $lr of next |
1388 | frame |
1389 +-------------+
1390 SP | back chain |
1391 +-------------+
1392
1393*/
1394void
1395spu_expand_prologue (void)
1396{
1397 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1398 HOST_WIDE_INT total_size;
1399 HOST_WIDE_INT saved_regs_size;
1400 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1401 rtx scratch_reg_0, scratch_reg_1;
1402 rtx insn, real;
1403
1404 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1405 the "toplevel" insn chain. */
1406 emit_note (NOTE_INSN_DELETED);
1407
1408 if (flag_pic && optimize == 0)
1409 current_function_uses_pic_offset_table = 1;
1410
1411 if (spu_naked_function_p (current_function_decl))
1412 return;
1413
1414 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1415 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1416
1417 saved_regs_size = spu_saved_regs_size ();
1418 total_size = size + saved_regs_size
1419 + current_function_outgoing_args_size
1420 + current_function_pretend_args_size;
1421
1422 if (!current_function_is_leaf
1423 || current_function_calls_alloca || total_size > 0)
1424 total_size += STACK_POINTER_OFFSET;
1425
1426 /* Save this first because code after this might use the link
1427 register as a scratch register. */
1428 if (!current_function_is_leaf)
1429 {
1430 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1431 RTX_FRAME_RELATED_P (insn) = 1;
1432 }
1433
1434 if (total_size > 0)
1435 {
1436 offset = -current_function_pretend_args_size;
1437 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1438 if (need_to_save_reg (regno, 1))
1439 {
1440 offset -= 16;
1441 insn = frame_emit_store (regno, sp_reg, offset);
1442 RTX_FRAME_RELATED_P (insn) = 1;
1443 }
1444 }
1445
1446 if (flag_pic && current_function_uses_pic_offset_table)
1447 {
1448 rtx pic_reg = get_pic_reg ();
1449 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1450 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1451 REG_NOTES (insn));
1452 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1453 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1454 REG_NOTES (insn));
1455 }
1456
1457 if (total_size > 0)
1458 {
1459 if (flag_stack_check)
1460 {
1461 /* We compare agains total_size-1 because
1462 ($sp >= total_size) <=> ($sp > total_size-1) */
1463 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1464 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1465 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1466 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1467 {
1468 emit_move_insn (scratch_v4si, size_v4si);
1469 size_v4si = scratch_v4si;
1470 }
1471 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1472 emit_insn (gen_vec_extractv4si
1473 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1474 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1475 }
1476
1477 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1478 the value of the previous $sp because we save it as the back
1479 chain. */
1480 if (total_size <= 2000)
1481 {
1482 /* In this case we save the back chain first. */
1483 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1484 RTX_FRAME_RELATED_P (insn) = 1;
1485 insn =
1486 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1487 }
1488 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1489 {
1490 insn = emit_move_insn (scratch_reg_0, sp_reg);
1491 RTX_FRAME_RELATED_P (insn) = 1;
1492 insn =
1493 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1494 }
1495 else
1496 {
1497 insn = emit_move_insn (scratch_reg_0, sp_reg);
1498 RTX_FRAME_RELATED_P (insn) = 1;
1499 insn =
1500 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1501 }
1502 RTX_FRAME_RELATED_P (insn) = 1;
1503 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1504 REG_NOTES (insn) =
1505 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1506
1507 if (total_size > 2000)
1508 {
1509 /* Save the back chain ptr */
1510 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1511 RTX_FRAME_RELATED_P (insn) = 1;
1512 }
1513
1514 if (frame_pointer_needed)
1515 {
1516 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1517 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1518 + current_function_outgoing_args_size;
1519 /* Set the new frame_pointer */
1520 frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1521 }
1522 }
1523
1524 emit_note (NOTE_INSN_DELETED);
1525}
1526
1527void
1528spu_expand_epilogue (bool sibcall_p)
1529{
1530 int size = get_frame_size (), offset, regno;
1531 HOST_WIDE_INT saved_regs_size, total_size;
1532 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1533 rtx jump, scratch_reg_0;
1534
1535 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1536 the "toplevel" insn chain. */
1537 emit_note (NOTE_INSN_DELETED);
1538
1539 if (spu_naked_function_p (current_function_decl))
1540 return;
1541
1542 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1543
1544 saved_regs_size = spu_saved_regs_size ();
1545 total_size = size + saved_regs_size
1546 + current_function_outgoing_args_size
1547 + current_function_pretend_args_size;
1548
1549 if (!current_function_is_leaf
1550 || current_function_calls_alloca || total_size > 0)
1551 total_size += STACK_POINTER_OFFSET;
1552
1553 if (total_size > 0)
1554 {
1555 if (current_function_calls_alloca)
1556 /* Load it from the back chain because our save_stack_block and
1557 restore_stack_block do nothing. */
1558 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1559 else
1560 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1561
1562
1563 if (saved_regs_size > 0)
1564 {
1565 offset = -current_function_pretend_args_size;
1566 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1567 if (need_to_save_reg (regno, 1))
1568 {
1569 offset -= 0x10;
1570 frame_emit_load (regno, sp_reg, offset);
1571 }
1572 }
1573 }
1574
1575 if (!current_function_is_leaf)
1576 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1577
1578 if (!sibcall_p)
1579 {
1580 emit_insn (gen_rtx_USE
1581 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1582 jump = emit_jump_insn (gen__return ());
1583 emit_barrier_after (jump);
1584 }
1585
1586 emit_note (NOTE_INSN_DELETED);
1587}
1588
1589rtx
1590spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1591{
1592 if (count != 0)
1593 return 0;
1594 /* This is inefficient because it ends up copying to a save-register
1595 which then gets saved even though $lr has already been saved. But
1596 it does generate better code for leaf functions and we don't need
1597 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1598 used for __builtin_return_address anyway, so maybe we don't care if
1599 it's inefficient. */
1600 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1601}
1602\f
1603
1604/* Given VAL, generate a constant appropriate for MODE.
1605 If MODE is a vector mode, every element will be VAL.
1606 For TImode, VAL will be zero extended to 128 bits. */
1607rtx
1608spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1609{
1610 rtx inner;
1611 rtvec v;
1612 int units, i;
1613
1614 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1615 || GET_MODE_CLASS (mode) == MODE_FLOAT
1616 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1617 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1618
1619 if (GET_MODE_CLASS (mode) == MODE_INT)
1620 return immed_double_const (val, 0, mode);
1621
1622 /* val is the bit representation of the float */
1623 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1624 return hwint_to_const_double (mode, val);
1625
1626 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1627 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1628 else
1629 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1630
1631 units = GET_MODE_NUNITS (mode);
1632
1633 v = rtvec_alloc (units);
1634
1635 for (i = 0; i < units; ++i)
1636 RTVEC_ELT (v, i) = inner;
1637
1638 return gen_rtx_CONST_VECTOR (mode, v);
1639}
1640\f
1641/* branch hint stuff */
1642
1643/* The hardware requires 8 insns between a hint and the branch it
1644 effects. This variable describes how many rtl instructions the
1645 compiler needs to see before inserting a hint. (FIXME: We should
1646 accept less and insert nops to enforce it because hinting is always
1647 profitable for performance, but we do need to be careful of code
1648 size.) */
1649int spu_hint_dist = (8 * 4);
1650
1651/* An array of these is used to propagate hints to predecessor blocks. */
1652struct spu_bb_info
1653{
1654 rtx prop_jump; /* propogated from another block */
1655 basic_block bb; /* the orignal block. */
1656};
1657
1658/* The special $hbr register is used to prevent the insn scheduler from
1659 moving hbr insns across instructions which invalidate them. It
1660 should only be used in a clobber, and this function searches for
1661 insns which clobber it. */
1662static bool
1663insn_clobbers_hbr (rtx insn)
1664{
1665 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1666 {
1667 rtx parallel = PATTERN (insn);
1668 rtx clobber;
1669 int j;
1670 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1671 {
1672 clobber = XVECEXP (parallel, 0, j);
1673 if (GET_CODE (clobber) == CLOBBER
1674 && GET_CODE (XEXP (clobber, 0)) == REG
1675 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1676 return 1;
1677 }
1678 }
1679 return 0;
1680}
1681
1682static void
1683spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1684{
1685 rtx branch_label;
1686 rtx hint, insn, prev, next;
1687
1688 if (before == 0 || branch == 0 || target == 0)
1689 return;
1690
1691 if (distance > 600)
1692 return;
1693
1694
1695 branch_label = gen_label_rtx ();
1696 LABEL_NUSES (branch_label)++;
1697 LABEL_PRESERVE_P (branch_label) = 1;
1698 insn = emit_label_before (branch_label, branch);
1699 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1700
1701 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1702 the current insn is pipe0, dual issue with it. */
1703 prev = prev_active_insn (before);
1704 if (prev && get_pipe (prev) == 0)
1705 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1706 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1707 {
1708 next = next_active_insn (before);
1709 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1710 if (next)
1711 PUT_MODE (next, TImode);
1712 }
1713 else
1714 {
1715 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1716 PUT_MODE (hint, TImode);
1717 }
1718 recog_memoized (hint);
1719}
1720
1721/* Returns 0 if we don't want a hint for this branch. Otherwise return
1722 the rtx for the branch target. */
1723static rtx
1724get_branch_target (rtx branch)
1725{
1726 if (GET_CODE (branch) == JUMP_INSN)
1727 {
1728 rtx set, src;
1729
1730 /* Return statements */
1731 if (GET_CODE (PATTERN (branch)) == RETURN)
1732 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1733
1734 /* jump table */
1735 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1736 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1737 return 0;
1738
1739 set = single_set (branch);
1740 src = SET_SRC (set);
1741 if (GET_CODE (SET_DEST (set)) != PC)
1742 abort ();
1743
1744 if (GET_CODE (src) == IF_THEN_ELSE)
1745 {
1746 rtx lab = 0;
1747 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1748 if (note)
1749 {
1750 /* If the more probable case is not a fall through, then
1751 try a branch hint. */
1752 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1753 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1754 && GET_CODE (XEXP (src, 1)) != PC)
1755 lab = XEXP (src, 1);
1756 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1757 && GET_CODE (XEXP (src, 2)) != PC)
1758 lab = XEXP (src, 2);
1759 }
1760 if (lab)
1761 {
1762 if (GET_CODE (lab) == RETURN)
1763 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1764 return lab;
1765 }
1766 return 0;
1767 }
1768
1769 return src;
1770 }
1771 else if (GET_CODE (branch) == CALL_INSN)
1772 {
1773 rtx call;
1774 /* All of our call patterns are in a PARALLEL and the CALL is
1775 the first pattern in the PARALLEL. */
1776 if (GET_CODE (PATTERN (branch)) != PARALLEL)
1777 abort ();
1778 call = XVECEXP (PATTERN (branch), 0, 0);
1779 if (GET_CODE (call) == SET)
1780 call = SET_SRC (call);
1781 if (GET_CODE (call) != CALL)
1782 abort ();
1783 return XEXP (XEXP (call, 0), 0);
1784 }
1785 return 0;
1786}
1787
1788static void
1789insert_branch_hints (void)
1790{
1791 struct spu_bb_info *spu_bb_info;
1792 rtx branch, insn, next;
1793 rtx branch_target = 0;
1794 int branch_addr = 0, insn_addr, head_addr;
1795 basic_block bb;
1796 unsigned int j;
1797
1798 spu_bb_info =
1799 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
1800 sizeof (struct spu_bb_info));
1801
1802 /* We need exact insn addresses and lengths. */
1803 shorten_branches (get_insns ());
1804
1805 FOR_EACH_BB_REVERSE (bb)
1806 {
1807 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
1808 branch = 0;
1809 if (spu_bb_info[bb->index].prop_jump)
1810 {
1811 branch = spu_bb_info[bb->index].prop_jump;
1812 branch_target = get_branch_target (branch);
1813 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
1814 }
1815 /* Search from end of a block to beginning. In this loop, find
1816 jumps which need a branch and emit them only when:
1817 - it's an indirect branch and we're at the insn which sets
1818 the register
1819 - we're at an insn that will invalidate the hint. e.g., a
1820 call, another hint insn, inline asm that clobbers $hbr, and
1821 some inlined operations (divmodsi4). Don't consider jumps
1822 because they are only at the end of a block and are
1823 considered when we are deciding whether to propagate
1824 - we're getting too far away from the branch. The hbr insns
1825 only have a signed 10 bit offset
1826 We go back as far as possible so the branch will be considered
1827 for propagation when we get to the beginning of the block. */
1828 next = 0;
1829 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
1830 {
1831 if (INSN_P (insn))
1832 {
1833 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
1834 if (branch && next
1835 && ((GET_CODE (branch_target) == REG
1836 && set_of (branch_target, insn) != NULL_RTX)
1837 || insn_clobbers_hbr (insn)
1838 || branch_addr - insn_addr > 600))
1839 {
1840 int next_addr = INSN_ADDRESSES (INSN_UID (next));
1841 if (insn != BB_END (bb)
1842 && branch_addr - next_addr >= spu_hint_dist)
1843 {
1844 if (dump_file)
1845 fprintf (dump_file,
1846 "hint for %i in block %i before %i\n",
1847 INSN_UID (branch), bb->index, INSN_UID (next));
1848 spu_emit_branch_hint (next, branch, branch_target,
1849 branch_addr - next_addr);
1850 }
1851 branch = 0;
1852 }
1853
1854 /* JUMP_P will only be true at the end of a block. When
1855 branch is already set it means we've previously decided
1856 to propagate a hint for that branch into this block. */
1857 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
1858 {
1859 branch = 0;
1860 if ((branch_target = get_branch_target (insn)))
1861 {
1862 branch = insn;
1863 branch_addr = insn_addr;
1864 }
1865 }
1866
1867 /* When a branch hint is emitted it will be inserted
1868 before "next". Make sure next is the beginning of a
1869 cycle to minimize impact on the scheduled insns. */
1870 if (GET_MODE (insn) == TImode)
1871 next = insn;
1872 }
1873 if (insn == BB_HEAD (bb))
1874 break;
1875 }
1876
1877 if (branch)
1878 {
1879 /* If we haven't emitted a hint for this branch yet, it might
1880 be profitable to emit it in one of the predecessor blocks,
1881 especially for loops. */
1882 rtx bbend;
1883 basic_block prev = 0, prop = 0, prev2 = 0;
1884 int loop_exit = 0, simple_loop = 0;
1885 int next_addr = 0;
1886 if (next)
1887 next_addr = INSN_ADDRESSES (INSN_UID (next));
1888
1889 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
1890 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
1891 prev = EDGE_PRED (bb, j)->src;
1892 else
1893 prev2 = EDGE_PRED (bb, j)->src;
1894
1895 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
1896 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
1897 loop_exit = 1;
1898 else if (EDGE_SUCC (bb, j)->dest == bb)
1899 simple_loop = 1;
1900
1901 /* If this branch is a loop exit then propagate to previous
1902 fallthru block. This catches the cases when it is a simple
1903 loop or when there is an initial branch into the loop. */
1904 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
1905 prop = prev;
1906
1907 /* If there is only one adjacent predecessor. Don't propagate
1908 outside this loop. This loop_depth test isn't perfect, but
1909 I'm not sure the loop_father member is valid at this point. */
1910 else if (prev && single_pred_p (bb)
1911 && prev->loop_depth == bb->loop_depth)
1912 prop = prev;
1913
1914 /* If this is the JOIN block of a simple IF-THEN then
1915 propogate the hint to the HEADER block. */
1916 else if (prev && prev2
1917 && EDGE_COUNT (bb->preds) == 2
1918 && EDGE_COUNT (prev->preds) == 1
1919 && EDGE_PRED (prev, 0)->src == prev2
1920 && prev2->loop_depth == bb->loop_depth
1921 && GET_CODE (branch_target) != REG)
1922 prop = prev;
1923
1924 /* Don't propagate when:
1925 - this is a simple loop and the hint would be too far
1926 - this is not a simple loop and there are 16 insns in
1927 this block already
1928 - the predecessor block ends in a branch that will be
1929 hinted
1930 - the predecessor block ends in an insn that invalidates
1931 the hint */
1932 if (prop
1933 && prop->index >= 0
1934 && (bbend = BB_END (prop))
1935 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
1936 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
1937 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
1938 {
1939 if (dump_file)
1940 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
1941 "for %i (loop_exit %i simple_loop %i dist %i)\n",
1942 bb->index, prop->index, bb->loop_depth,
1943 INSN_UID (branch), loop_exit, simple_loop,
1944 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
1945
1946 spu_bb_info[prop->index].prop_jump = branch;
1947 spu_bb_info[prop->index].bb = bb;
1948 }
1949 else if (next && branch_addr - next_addr >= spu_hint_dist)
1950 {
1951 if (dump_file)
1952 fprintf (dump_file, "hint for %i in block %i before %i\n",
1953 INSN_UID (branch), bb->index, INSN_UID (next));
1954 spu_emit_branch_hint (next, branch, branch_target,
1955 branch_addr - next_addr);
1956 }
1957 branch = 0;
1958 }
1959 }
1960 free (spu_bb_info);
1961}
1962\f
1963/* Emit a nop for INSN such that the two will dual issue. This assumes
1964 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1965 We check for TImode to handle a MULTI1 insn which has dual issued its
1966 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
1967 ADDR_VEC insns. */
1968static void
1969emit_nop_for_insn (rtx insn)
1970{
1971 int p;
1972 rtx new_insn;
1973 p = get_pipe (insn);
1974 if (p == 1 && GET_MODE (insn) == TImode)
1975 {
1976 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
1977 PUT_MODE (new_insn, TImode);
1978 PUT_MODE (insn, VOIDmode);
1979 }
1980 else
1981 new_insn = emit_insn_after (gen_lnop (), insn);
1982}
1983
1984/* Insert nops in basic blocks to meet dual issue alignment
1985 requirements. */
1986static void
1987insert_nops (void)
1988{
1989 rtx insn, next_insn, prev_insn;
1990 int length;
1991 int addr;
1992
1993 /* This sets up INSN_ADDRESSES. */
1994 shorten_branches (get_insns ());
1995
1996 /* Keep track of length added by nops. */
1997 length = 0;
1998
1999 prev_insn = 0;
2000 for (insn = get_insns (); insn; insn = next_insn)
2001 {
2002 next_insn = next_active_insn (insn);
2003 addr = INSN_ADDRESSES (INSN_UID (insn));
2004 if (GET_MODE (insn) == TImode
2005 && next_insn
2006 && GET_MODE (next_insn) != TImode
2007 && ((addr + length) & 7) != 0)
2008 {
2009 /* prev_insn will always be set because the first insn is
2010 always 8-byte aligned. */
2011 emit_nop_for_insn (prev_insn);
2012 length += 4;
2013 }
2014 prev_insn = insn;
2015 }
2016}
2017
2018static void
2019spu_machine_dependent_reorg (void)
2020{
2021 if (optimize > 0)
2022 {
2023 if (TARGET_BRANCH_HINTS)
2024 insert_branch_hints ();
2025 insert_nops ();
2026 }
2027}
2028\f
2029
2030/* Insn scheduling routines, primarily for dual issue. */
2031static int
2032spu_sched_issue_rate (void)
2033{
2034 return 2;
2035}
2036
2037static int
2038spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2039 int verbose ATTRIBUTE_UNUSED, rtx insn,
2040 int can_issue_more)
2041{
2042 if (GET_CODE (PATTERN (insn)) != USE
2043 && GET_CODE (PATTERN (insn)) != CLOBBER
2044 && get_pipe (insn) != -2)
2045 can_issue_more--;
2046 return can_issue_more;
2047}
2048
2049static int
2050get_pipe (rtx insn)
2051{
2052 enum attr_type t;
2053 /* Handle inline asm */
2054 if (INSN_CODE (insn) == -1)
2055 return -1;
2056 t = get_attr_type (insn);
2057 switch (t)
2058 {
2059 case TYPE_CONVERT:
2060 return -2;
2061 case TYPE_MULTI0:
2062 return -1;
2063
2064 case TYPE_FX2:
2065 case TYPE_FX3:
2066 case TYPE_SPR:
2067 case TYPE_NOP:
2068 case TYPE_FXB:
2069 case TYPE_FPD:
2070 case TYPE_FP6:
2071 case TYPE_FP7:
2072 case TYPE_IPREFETCH:
2073 return 0;
2074
2075 case TYPE_LNOP:
2076 case TYPE_SHUF:
2077 case TYPE_LOAD:
2078 case TYPE_STORE:
2079 case TYPE_BR:
2080 case TYPE_MULTI1:
2081 case TYPE_HBR:
2082 return 1;
2083 default:
2084 abort ();
2085 }
2086}
2087
2088static int
2089spu_sched_adjust_priority (rtx insn, int pri)
2090{
2091 int p = get_pipe (insn);
2092 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2093 * scheduling. */
2094 if (GET_CODE (PATTERN (insn)) == USE
2095 || GET_CODE (PATTERN (insn)) == CLOBBER
2096 || p == -2)
2097 return pri + 100;
2098 /* Schedule pipe0 insns early for greedier dual issue. */
2099 if (p != 1)
2100 return pri + 50;
2101 return pri;
2102}
2103
2104/* INSN is dependent on DEP_INSN. */
2105static int
2106spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2107 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2108{
2109 if (GET_CODE (insn) == CALL_INSN)
2110 return cost - 2;
2111 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2112 scheduler makes every insn in a block anti-dependent on the final
2113 jump_insn. We adjust here so higher cost insns will get scheduled
2114 earlier. */
2115 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2116 return INSN_COST (dep_insn) - 3;
2117 return cost;
2118}
2119\f
2120/* Create a CONST_DOUBLE from a string. */
2121struct rtx_def *
2122spu_float_const (const char *string, enum machine_mode mode)
2123{
2124 REAL_VALUE_TYPE value;
2125 value = REAL_VALUE_ATOF (string, mode);
2126 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2127}
2128
2129/* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2130 CONST_INT fits constraint 'K', i.e., is small. */
2131int
2132legitimate_const (rtx x, int aligned)
2133{
2134 /* We can never know if the resulting address fits in 18 bits and can be
2135 loaded with ila. Instead we should use the HI and LO relocations to
2136 load a 32 bit address. */
2137 rtx sym, cst;
2138
2139 gcc_assert (GET_CODE (x) == CONST);
2140
2141 if (GET_CODE (XEXP (x, 0)) != PLUS)
2142 return 0;
2143 sym = XEXP (XEXP (x, 0), 0);
2144 cst = XEXP (XEXP (x, 0), 1);
2145 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2146 return 0;
2147 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2148 return 0;
2149 return satisfies_constraint_K (cst);
2150}
2151
2152int
2153spu_constant_address_p (rtx x)
2154{
2155 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2156 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2157 || GET_CODE (x) == HIGH);
2158}
2159
2160static enum spu_immediate
2161which_immediate_load (HOST_WIDE_INT val)
2162{
2163 gcc_assert (val == trunc_int_for_mode (val, SImode));
2164
2165 if (val >= -0x8000 && val <= 0x7fff)
2166 return SPU_IL;
2167 if (val >= 0 && val <= 0x3ffff)
2168 return SPU_ILA;
2169 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2170 return SPU_ILH;
2171 if ((val & 0xffff) == 0)
2172 return SPU_ILHU;
2173
2174 return SPU_NONE;
2175}
2176
2177int
2178immediate_load_p (rtx op, enum machine_mode mode)
2179{
2180 HOST_WIDE_INT val;
2181 unsigned char arr[16];
2182 int i, j;
2183 if (GET_MODE (op) != VOIDmode)
2184 mode = GET_MODE (op);
2185
2186 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2187 || GET_CODE (op) == CONST_VECTOR);
2188
2189 /* V4SI with all identical symbols is valid. */
2190 if (mode == V4SImode
2191 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == SYMBOL_REF)
2192 return !TARGET_LARGE_MEM && !flag_pic
2193 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2194 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2195 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3);
2196
2197 constant_to_array (mode, op, arr);
2198
2199 /* Check that bytes are repeated. */
2200 for (i = 4; i < 16; i += 4)
2201 for (j = 0; j < 4; j++)
2202 if (arr[j] != arr[i + j])
2203 return 0;
2204
2205 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2206 val = trunc_int_for_mode (val, SImode);
2207
2208 return which_immediate_load (val) != SPU_NONE;
2209}
2210
2211static enum spu_immediate
2212which_logical_immediate (HOST_WIDE_INT val)
2213{
2214 gcc_assert (val == trunc_int_for_mode (val, SImode));
2215
2216 if (val >= -0x200 && val <= 0x1ff)
2217 return SPU_ORI;
2218 if (val >= 0 && val <= 0xffff)
2219 return SPU_IOHL;
2220 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2221 {
2222 val = trunc_int_for_mode (val, HImode);
2223 if (val >= -0x200 && val <= 0x1ff)
2224 return SPU_ORHI;
2225 if ((val & 0xff) == ((val >> 8) & 0xff))
2226 {
2227 val = trunc_int_for_mode (val, QImode);
2228 if (val >= -0x200 && val <= 0x1ff)
2229 return SPU_ORBI;
2230 }
2231 }
2232 return SPU_NONE;
2233}
2234
2235int
2236logical_immediate_p (rtx op, enum machine_mode mode)
2237{
2238 HOST_WIDE_INT val;
2239 unsigned char arr[16];
2240 int i, j;
2241
2242 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2243 || GET_CODE (op) == CONST_VECTOR);
2244
2245 if (GET_MODE (op) != VOIDmode)
2246 mode = GET_MODE (op);
2247
2248 constant_to_array (mode, op, arr);
2249
2250 /* Check that bytes are repeated. */
2251 for (i = 4; i < 16; i += 4)
2252 for (j = 0; j < 4; j++)
2253 if (arr[j] != arr[i + j])
2254 return 0;
2255
2256 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2257 val = trunc_int_for_mode (val, SImode);
2258
2259 i = which_logical_immediate (val);
2260 return i != SPU_NONE && i != SPU_IOHL;
2261}
2262
2263int
2264iohl_immediate_p (rtx op, enum machine_mode mode)
2265{
2266 HOST_WIDE_INT val;
2267 unsigned char arr[16];
2268 int i, j;
2269
2270 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2271 || GET_CODE (op) == CONST_VECTOR);
2272
2273 if (GET_MODE (op) != VOIDmode)
2274 mode = GET_MODE (op);
2275
2276 constant_to_array (mode, op, arr);
2277
2278 /* Check that bytes are repeated. */
2279 for (i = 4; i < 16; i += 4)
2280 for (j = 0; j < 4; j++)
2281 if (arr[j] != arr[i + j])
2282 return 0;
2283
2284 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2285 val = trunc_int_for_mode (val, SImode);
2286
2287 return val >= 0 && val <= 0xffff;
2288}
2289
2290int
2291arith_immediate_p (rtx op, enum machine_mode mode,
2292 HOST_WIDE_INT low, HOST_WIDE_INT high)
2293{
2294 HOST_WIDE_INT val;
2295 unsigned char arr[16];
2296 int bytes, i, j;
2297
2298 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2299 || GET_CODE (op) == CONST_VECTOR);
2300
2301 if (GET_MODE (op) != VOIDmode)
2302 mode = GET_MODE (op);
2303
2304 constant_to_array (mode, op, arr);
2305
2306 if (VECTOR_MODE_P (mode))
2307 mode = GET_MODE_INNER (mode);
2308
2309 bytes = GET_MODE_SIZE (mode);
2310 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2311
2312 /* Check that bytes are repeated. */
2313 for (i = bytes; i < 16; i += bytes)
2314 for (j = 0; j < bytes; j++)
2315 if (arr[j] != arr[i + j])
2316 return 0;
2317
2318 val = arr[0];
2319 for (j = 1; j < bytes; j++)
2320 val = (val << 8) | arr[j];
2321
2322 val = trunc_int_for_mode (val, mode);
2323
2324 return val >= low && val <= high;
2325}
2326
2327/* We accept:
2328 - any 32 bit constant (SImode, SFmode)
2329 - any constant that can be generated with fsmbi (any mode)
2330 - a 64 bit constant where the high and low bits are identical
2331 (DImode, DFmode)
2332 - a 128 bit constant where the four 32 bit words match. */
2333int
2334spu_legitimate_constant_p (rtx x)
2335{
2336 unsigned char arr[16];
2337 int i, j;
2338
2339 if (GET_CODE (x) == HIGH
2340 || GET_CODE (x) == CONST
2341 || GET_CODE (x) == SYMBOL_REF
2342 || GET_CODE (x) == LABEL_REF)
2343 return 1;
2344
2345 if (fsmbi_const_p (x))
2346 return 1;
2347
2348 if (GET_CODE (x) == CONST_INT)
2349 return (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0x7fffffffll)
2350 || ((INTVAL (x) >> 32) & 0xffffffffll) == (INTVAL (x) & 0xffffffffll);
2351
2352 if (GET_MODE (x) == SFmode)
2353 return 1;
2354
2355 if (GET_MODE (x) == DFmode)
2356 {
2357 HOST_WIDE_INT val = const_double_to_hwint (x);
2358 return ((val >> 32) & 0xffffffffll) == (val & 0xffffffffll);
2359 }
2360
2361 /* V4SI with all identical symbols is valid. */
2362 if (GET_MODE (x) == V4SImode
2363 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2364 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2365 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST
2366 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH))
2367 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2368 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2369 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2370
2371 if (VECTOR_MODE_P (GET_MODE (x)))
2372 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2373 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2374 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2375 return 0;
2376
2377 constant_to_array (SImode, x, arr);
2378
2379 /* Check that bytes are repeated. */
2380 for (i = 4; i < 16; i += 4)
2381 for (j = 0; j < 4; j++)
2382 if (arr[j] != arr[i + j])
2383 return 0;
2384
2385 return 1;
2386}
2387
2388/* Valid address are:
2389 - symbol_ref, label_ref, const
2390 - reg
2391 - reg + const, where either reg or const is 16 byte aligned
2392 - reg + reg, alignment doesn't matter
2393 The alignment matters in the reg+const case because lqd and stqd
2394 ignore the 4 least significant bits of the const. (TODO: It might be
2395 preferable to allow any alignment and fix it up when splitting.) */
2396int
2397spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2398 rtx x, int reg_ok_strict)
2399{
2400 if (mode == TImode && GET_CODE (x) == AND
2401 && GET_CODE (XEXP (x, 1)) == CONST_INT
2402 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2403 x = XEXP (x, 0);
2404 switch (GET_CODE (x))
2405 {
2406 case SYMBOL_REF:
2407 case LABEL_REF:
2408 return !TARGET_LARGE_MEM;
2409
2410 case CONST:
2411 return !TARGET_LARGE_MEM && legitimate_const (x, 1);
2412
2413 case CONST_INT:
2414 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2415
2416 case SUBREG:
2417 x = XEXP (x, 0);
2418 gcc_assert (GET_CODE (x) == REG);
2419
2420 case REG:
2421 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2422
2423 case PLUS:
2424 case LO_SUM:
2425 {
2426 rtx op0 = XEXP (x, 0);
2427 rtx op1 = XEXP (x, 1);
2428 if (GET_CODE (op0) == SUBREG)
2429 op0 = XEXP (op0, 0);
2430 if (GET_CODE (op1) == SUBREG)
2431 op1 = XEXP (op1, 0);
2432 /* We can't just accept any aligned register because CSE can
2433 change it to a register that is not marked aligned and then
2434 recog will fail. So we only accept frame registers because
2435 they will only be changed to other frame registers. */
2436 if (GET_CODE (op0) == REG
2437 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2438 && GET_CODE (op1) == CONST_INT
2439 && INTVAL (op1) >= -0x2000
2440 && INTVAL (op1) <= 0x1fff
2441 && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2442 return 1;
2443 if (GET_CODE (op0) == REG
2444 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2445 && GET_CODE (op1) == REG
2446 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2447 return 1;
2448 }
2449 break;
2450
2451 default:
2452 break;
2453 }
2454 return 0;
2455}
2456
2457/* When the address is reg + const_int, force the const_int into a
2458 regiser. */
2459rtx
2460spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2461 enum machine_mode mode)
2462{
2463 rtx op0, op1;
2464 /* Make sure both operands are registers. */
2465 if (GET_CODE (x) == PLUS)
2466 {
2467 op0 = XEXP (x, 0);
2468 op1 = XEXP (x, 1);
2469 if (ALIGNED_SYMBOL_REF_P (op0))
2470 {
2471 op0 = force_reg (Pmode, op0);
2472 mark_reg_pointer (op0, 128);
2473 }
2474 else if (GET_CODE (op0) != REG)
2475 op0 = force_reg (Pmode, op0);
2476 if (ALIGNED_SYMBOL_REF_P (op1))
2477 {
2478 op1 = force_reg (Pmode, op1);
2479 mark_reg_pointer (op1, 128);
2480 }
2481 else if (GET_CODE (op1) != REG)
2482 op1 = force_reg (Pmode, op1);
2483 x = gen_rtx_PLUS (Pmode, op0, op1);
2484 if (spu_legitimate_address (mode, x, 0))
2485 return x;
2486 }
2487 return NULL_RTX;
2488}
2489
2490/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2491 struct attribute_spec.handler. */
2492static tree
2493spu_handle_fndecl_attribute (tree * node,
2494 tree name,
2495 tree args ATTRIBUTE_UNUSED,
2496 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2497{
2498 if (TREE_CODE (*node) != FUNCTION_DECL)
2499 {
2500 warning (0, "`%s' attribute only applies to functions",
2501 IDENTIFIER_POINTER (name));
2502 *no_add_attrs = true;
2503 }
2504
2505 return NULL_TREE;
2506}
2507
2508/* Handle the "vector" attribute. */
2509static tree
2510spu_handle_vector_attribute (tree * node, tree name,
2511 tree args ATTRIBUTE_UNUSED,
2512 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2513{
2514 tree type = *node, result = NULL_TREE;
2515 enum machine_mode mode;
2516 int unsigned_p;
2517
2518 while (POINTER_TYPE_P (type)
2519 || TREE_CODE (type) == FUNCTION_TYPE
2520 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2521 type = TREE_TYPE (type);
2522
2523 mode = TYPE_MODE (type);
2524
2525 unsigned_p = TYPE_UNSIGNED (type);
2526 switch (mode)
2527 {
2528 case DImode:
2529 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2530 break;
2531 case SImode:
2532 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2533 break;
2534 case HImode:
2535 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2536 break;
2537 case QImode:
2538 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2539 break;
2540 case SFmode:
2541 result = V4SF_type_node;
2542 break;
2543 case DFmode:
2544 result = V2DF_type_node;
2545 break;
2546 default:
2547 break;
2548 }
2549
2550 /* Propagate qualifiers attached to the element type
2551 onto the vector type. */
2552 if (result && result != type && TYPE_QUALS (type))
2553 result = build_qualified_type (result, TYPE_QUALS (type));
2554
2555 *no_add_attrs = true; /* No need to hang on to the attribute. */
2556
2557 if (!result)
2558 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2559 else
2560 *node = reconstruct_complex_type (*node, result);
2561
2562 return NULL_TREE;
2563}
2564
2565/* Return non-zero if FUNC is a naked function. */
2566static int
2567spu_naked_function_p (tree func)
2568{
2569 tree a;
2570
2571 if (TREE_CODE (func) != FUNCTION_DECL)
2572 abort ();
2573
2574 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2575 return a != NULL_TREE;
2576}
2577
2578int
2579spu_initial_elimination_offset (int from, int to)
2580{
2581 int saved_regs_size = spu_saved_regs_size ();
2582 int sp_offset = 0;
2583 if (!current_function_is_leaf || current_function_outgoing_args_size
2584 || get_frame_size () || saved_regs_size)
2585 sp_offset = STACK_POINTER_OFFSET;
2586 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2587 return (sp_offset + current_function_outgoing_args_size);
2588 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2589 return 0;
2590 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2591 return sp_offset + current_function_outgoing_args_size
2592 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2593 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2594 return get_frame_size () + saved_regs_size + sp_offset;
2595 return 0;
2596}
2597
2598rtx
2599spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2600{
2601 enum machine_mode mode = TYPE_MODE (type);
2602 int byte_size = ((mode == BLKmode)
2603 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2604
2605 /* Make sure small structs are left justified in a register. */
2606 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2607 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2608 {
2609 enum machine_mode smode;
2610 rtvec v;
2611 int i;
2612 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2613 int n = byte_size / UNITS_PER_WORD;
2614 v = rtvec_alloc (nregs);
2615 for (i = 0; i < n; i++)
2616 {
2617 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2618 gen_rtx_REG (TImode,
2619 FIRST_RETURN_REGNUM
2620 + i),
2621 GEN_INT (UNITS_PER_WORD * i));
2622 byte_size -= UNITS_PER_WORD;
2623 }
2624
2625 if (n < nregs)
2626 {
2627 if (byte_size < 4)
2628 byte_size = 4;
2629 smode =
2630 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2631 RTVEC_ELT (v, n) =
2632 gen_rtx_EXPR_LIST (VOIDmode,
2633 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2634 GEN_INT (UNITS_PER_WORD * n));
2635 }
2636 return gen_rtx_PARALLEL (mode, v);
2637 }
2638 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2639}
2640
2641rtx
2642spu_function_arg (CUMULATIVE_ARGS cum,
2643 enum machine_mode mode,
2644 tree type, int named ATTRIBUTE_UNUSED)
2645{
2646 int byte_size;
2647
2648 if (cum >= MAX_REGISTER_ARGS)
2649 return 0;
2650
2651 byte_size = ((mode == BLKmode)
2652 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2653
2654 /* The ABI does not allow parameters to be passed partially in
2655 reg and partially in stack. */
2656 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
2657 return 0;
2658
2659 /* Make sure small structs are left justified in a register. */
2660 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2661 && byte_size < UNITS_PER_WORD && byte_size > 0)
2662 {
2663 enum machine_mode smode;
2664 rtx gr_reg;
2665 if (byte_size < 4)
2666 byte_size = 4;
2667 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2668 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2669 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
2670 const0_rtx);
2671 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
2672 }
2673 else
2674 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
2675}
2676
2677/* Variable sized types are passed by reference. */
2678static bool
2679spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
2680 enum machine_mode mode ATTRIBUTE_UNUSED,
2681 tree type, bool named ATTRIBUTE_UNUSED)
2682{
2683 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2684}
2685\f
2686
2687/* Var args. */
2688
2689/* Create and return the va_list datatype.
2690
2691 On SPU, va_list is an array type equivalent to
2692
2693 typedef struct __va_list_tag
2694 {
2695 void *__args __attribute__((__aligned(16)));
2696 void *__skip __attribute__((__aligned(16)));
2697
2698 } va_list[1];
2699
2700 wheare __args points to the arg that will be returned by the next
2701 va_arg(), and __skip points to the previous stack frame such that
2702 when __args == __skip we should advance __args by 32 bytes. */
2703static tree
2704spu_build_builtin_va_list (void)
2705{
2706 tree f_args, f_skip, record, type_decl;
2707 bool owp;
2708
2709 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2710
2711 type_decl =
2712 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2713
2714 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
2715 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
2716
2717 DECL_FIELD_CONTEXT (f_args) = record;
2718 DECL_ALIGN (f_args) = 128;
2719 DECL_USER_ALIGN (f_args) = 1;
2720
2721 DECL_FIELD_CONTEXT (f_skip) = record;
2722 DECL_ALIGN (f_skip) = 128;
2723 DECL_USER_ALIGN (f_skip) = 1;
2724
2725 TREE_CHAIN (record) = type_decl;
2726 TYPE_NAME (record) = type_decl;
2727 TYPE_FIELDS (record) = f_args;
2728 TREE_CHAIN (f_args) = f_skip;
2729
2730 /* We know this is being padded and we want it too. It is an internal
2731 type so hide the warnings from the user. */
2732 owp = warn_padded;
2733 warn_padded = false;
2734
2735 layout_type (record);
2736
2737 warn_padded = owp;
2738
2739 /* The correct type is an array type of one element. */
2740 return build_array_type (record, build_index_type (size_zero_node));
2741}
2742
2743/* Implement va_start by filling the va_list structure VALIST.
2744 NEXTARG points to the first anonymous stack argument.
2745
2746 The following global variables are used to initialize
2747 the va_list structure:
2748
2749 current_function_args_info;
2750 the CUMULATIVE_ARGS for this function
2751
2752 current_function_arg_offset_rtx:
2753 holds the offset of the first anonymous stack argument
2754 (relative to the virtual arg pointer). */
2755
2756void
2757spu_va_start (tree valist, rtx nextarg)
2758{
2759 tree f_args, f_skip;
2760 tree args, skip, t;
2761
2762 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2763 f_skip = TREE_CHAIN (f_args);
2764
2765 valist = build_va_arg_indirect_ref (valist);
2766 args =
2767 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2768 skip =
2769 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2770
2771 /* Find the __args area. */
2772 t = make_tree (TREE_TYPE (args), nextarg);
2773 if (current_function_pretend_args_size > 0)
2774 t = build2 (PLUS_EXPR, TREE_TYPE (args), t,
2775 build_int_cst (integer_type_node, -STACK_POINTER_OFFSET));
2776 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
2777 TREE_SIDE_EFFECTS (t) = 1;
2778 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2779
2780 /* Find the __skip area. */
2781 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2782 t = build2 (PLUS_EXPR, TREE_TYPE (skip), t,
2783 build_int_cst (integer_type_node,
2784 (current_function_pretend_args_size
2785 - STACK_POINTER_OFFSET)));
2786 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
2787 TREE_SIDE_EFFECTS (t) = 1;
2788 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2789}
2790
2791/* Gimplify va_arg by updating the va_list structure
2792 VALIST as required to retrieve an argument of type
2793 TYPE, and returning that argument.
2794
2795 ret = va_arg(VALIST, TYPE);
2796
2797 generates code equivalent to:
2798
2799 paddedsize = (sizeof(TYPE) + 15) & -16;
2800 if (VALIST.__args + paddedsize > VALIST.__skip
2801 && VALIST.__args <= VALIST.__skip)
2802 addr = VALIST.__skip + 32;
2803 else
2804 addr = VALIST.__args;
2805 VALIST.__args = addr + paddedsize;
2806 ret = *(TYPE *)addr;
2807 */
2808static tree
2809spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
2810 tree * post_p ATTRIBUTE_UNUSED)
2811{
2812 tree f_args, f_skip;
2813 tree args, skip;
2814 HOST_WIDE_INT size, rsize;
2815 tree paddedsize, addr, tmp;
2816 bool pass_by_reference_p;
2817
2818 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2819 f_skip = TREE_CHAIN (f_args);
2820
2821 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2822 args =
2823 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2824 skip =
2825 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2826
2827 addr = create_tmp_var (ptr_type_node, "va_arg");
2828 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
2829
2830 /* if an object is dynamically sized, a pointer to it is passed
2831 instead of the object itself. */
2832 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
2833 false);
2834 if (pass_by_reference_p)
2835 type = build_pointer_type (type);
2836 size = int_size_in_bytes (type);
2837 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
2838
2839 /* build conditional expression to calculate addr. The expression
2840 will be gimplified later. */
2841 paddedsize = fold_convert (ptr_type_node, size_int (rsize));
2842 tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize);
2843 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
2844 build2 (GT_EXPR, boolean_type_node, tmp, skip),
2845 build2 (LE_EXPR, boolean_type_node, args, skip));
2846
2847 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2848 build2 (PLUS_EXPR, ptr_type_node, skip,
2849 fold_convert (ptr_type_node, size_int (32))), args);
2850
2851 tmp = build2 (MODIFY_EXPR, ptr_type_node, addr, tmp);
2852 gimplify_and_add (tmp, pre_p);
2853
2854 /* update VALIST.__args */
2855 tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize);
2856 tmp = build2 (MODIFY_EXPR, TREE_TYPE (args), args, tmp);
2857 gimplify_and_add (tmp, pre_p);
2858
2859 addr = fold_convert (build_pointer_type (type), addr);
2860
2861 if (pass_by_reference_p)
2862 addr = build_va_arg_indirect_ref (addr);
2863
2864 return build_va_arg_indirect_ref (addr);
2865}
2866
2867/* Save parameter registers starting with the register that corresponds
2868 to the first unnamed parameters. If the first unnamed parameter is
2869 in the stack then save no registers. Set pretend_args_size to the
2870 amount of space needed to save the registers. */
2871void
2872spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
2873 tree type, int *pretend_size, int no_rtl)
2874{
2875 if (!no_rtl)
2876 {
2877 rtx tmp;
2878 int regno;
2879 int offset;
2880 int ncum = *cum;
2881
2882 /* cum currently points to the last named argument, we want to
2883 start at the next argument. */
2884 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
2885
2886 offset = -STACK_POINTER_OFFSET;
2887 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
2888 {
2889 tmp = gen_frame_mem (V4SImode,
2890 plus_constant (virtual_incoming_args_rtx,
2891 offset));
2892 emit_move_insn (tmp,
2893 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
2894 offset += 16;
2895 }
2896 *pretend_size = offset + STACK_POINTER_OFFSET;
2897 }
2898}
2899\f
2900void
2901spu_conditional_register_usage (void)
2902{
2903 if (flag_pic)
2904 {
2905 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2906 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2907 }
2908 global_regs[INTR_REGNUM] = 1;
2909}
2910
2911/* This is called to decide when we can simplify a load instruction. We
2912 must only return true for registers which we know will always be
2913 aligned. Taking into account that CSE might replace this reg with
2914 another one that has not been marked aligned.
2915 So this is really only true for frame, stack and virtual registers,
2916 which we know are always aligned and should not be adversly effected
2917 by CSE. */
2918static int
2919regno_aligned_for_load (int regno)
2920{
2921 return regno == FRAME_POINTER_REGNUM
2922 || regno == HARD_FRAME_POINTER_REGNUM
2923 || regno == STACK_POINTER_REGNUM
2924 || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER);
2925}
2926
2927/* Return TRUE when mem is known to be 16-byte aligned. */
2928int
2929aligned_mem_p (rtx mem)
2930{
2931 if (MEM_ALIGN (mem) >= 128)
2932 return 1;
2933 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
2934 return 1;
2935 if (GET_CODE (XEXP (mem, 0)) == PLUS)
2936 {
2937 rtx p0 = XEXP (XEXP (mem, 0), 0);
2938 rtx p1 = XEXP (XEXP (mem, 0), 1);
2939 if (regno_aligned_for_load (REGNO (p0)))
2940 {
2941 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
2942 return 1;
2943 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2944 return 1;
2945 }
2946 }
2947 else if (GET_CODE (XEXP (mem, 0)) == REG)
2948 {
2949 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
2950 return 1;
2951 }
2952 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
2953 return 1;
2954 else if (GET_CODE (XEXP (mem, 0)) == CONST)
2955 {
2956 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
2957 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
2958 if (GET_CODE (p0) == SYMBOL_REF
2959 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2960 return 1;
2961 }
2962 return 0;
2963}
2964
2965/* Return TRUE if we are certain the mem refers to a complete object
2966 which is both 16-byte aligned and padded to a 16-byte boundary. This
2967 would make it safe to store with a single instruction.
2968 We guarantee the alignment and padding for static objects by aligning
2969 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
2970 FIXME: We currently cannot guarantee this for objects on the stack
2971 because assign_parm_setup_stack calls assign_stack_local with the
2972 alignment of the parameter mode and in that case the alignment never
2973 gets adjusted by LOCAL_ALIGNMENT. */
2974static int
2975store_with_one_insn_p (rtx mem)
2976{
2977 rtx addr = XEXP (mem, 0);
2978 if (GET_MODE (mem) == BLKmode)
2979 return 0;
2980 /* Only static objects. */
2981 if (GET_CODE (addr) == SYMBOL_REF)
2982 {
2983 /* We use the associated declaration to make sure the access is
2984 refering to the whole object.
2985 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
2986 if it is necessary. Will there be cases where one exists, and
2987 the other does not? Will there be cases where both exist, but
2988 have different types? */
2989 tree decl = MEM_EXPR (mem);
2990 if (decl
2991 && TREE_CODE (decl) == VAR_DECL
2992 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
2993 return 1;
2994 decl = SYMBOL_REF_DECL (addr);
2995 if (decl
2996 && TREE_CODE (decl) == VAR_DECL
2997 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
2998 return 1;
2999 }
3000 return 0;
3001}
3002
3003int
3004spu_expand_mov (rtx * ops, enum machine_mode mode)
3005{
3006 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3007 abort ();
3008
3009 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3010 {
3011 rtx from = SUBREG_REG (ops[1]);
3012 enum machine_mode imode = GET_MODE (from);
3013
3014 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3015 && GET_MODE_CLASS (imode) == MODE_INT
3016 && subreg_lowpart_p (ops[1]));
3017
3018 if (GET_MODE_SIZE (imode) < 4)
3019 {
3020 from = gen_rtx_SUBREG (SImode, from, 0);
3021 imode = SImode;
3022 }
3023
3024 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3025 {
3026 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3027 emit_insn (GEN_FCN (icode) (ops[0], from));
3028 }
3029 else
3030 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3031 return 1;
3032 }
3033
3034 /* At least one of the operands needs to be a register. */
3035 if ((reload_in_progress | reload_completed) == 0
3036 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3037 {
3038 rtx temp = force_reg (mode, ops[1]);
3039 emit_move_insn (ops[0], temp);
3040 return 1;
3041 }
3042 if (reload_in_progress || reload_completed)
3043 {
3044 enum machine_mode mode = GET_MODE (ops[0]);
3045 if (GET_CODE (ops[1]) == CONST_INT
3046 && (mode == DImode || mode == TImode)
3047 && ((INTVAL (ops[1]) >> 32) & 0xffffffffll) !=
3048 (INTVAL (ops[1]) & 0xffffffffll))
3049 {
3050 rtx mem = force_const_mem (mode, ops[1]);
3051 if (TARGET_LARGE_MEM)
3052 {
3053 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
3054 emit_move_insn (addr, XEXP (mem, 0));
3055 mem = replace_equiv_address (mem, addr);
3056 }
3057 emit_move_insn (ops[0], mem);
3058 return 1;
3059 }
3060 else if ((GET_CODE (ops[1]) == CONST_INT
3061 || GET_CODE (ops[1]) == CONST_DOUBLE
3062 || GET_CODE (ops[1]) == CONST_VECTOR)
3063 && !immediate_load_p (ops[1], mode)
3064 && !fsmbi_const_p (ops[1]))
3065 {
3066 unsigned char arrlo[16];
3067 unsigned char arrhi[16];
3068 rtx to = ops[0], hi, lo;
3069 int i;
3070 constant_to_array (mode, ops[1], arrhi);
3071 for (i = 0; i < 16; i += 4)
3072 {
3073 arrlo[i + 2] = arrhi[i + 2];
3074 arrlo[i + 3] = arrhi[i + 3];
3075 arrlo[i + 0] = arrlo[i + 1] = 0;
3076 arrhi[i + 2] = arrhi[i + 3] = 0;
3077 }
3078 if (mode == SFmode)
3079 {
3080 to = spu_gen_subreg (SImode, ops[0]);
3081 mode = SImode;
3082 }
3083 else if (mode == V4SFmode)
3084 {
3085 to = spu_gen_subreg (V4SImode, ops[0]);
3086 mode = V4SImode;
3087 }
3088 hi = array_to_constant (mode, arrhi);
3089 lo = array_to_constant (mode, arrlo);
3090 emit_move_insn (to, hi);
3091 emit_insn (gen_rtx_SET (VOIDmode, to, gen_rtx_IOR (mode, to, lo)));
3092 return 1;
3093 }
3094 if ((GET_CODE (ops[1]) == CONST
3095 && !legitimate_const (ops[1], 0))
3096 || (TARGET_LARGE_MEM
3097 && (GET_CODE (ops[1]) == CONST
3098 || GET_CODE (ops[1]) == SYMBOL_REF
3099 || GET_CODE (ops[1]) == LABEL_REF)))
3100 {
3101 emit_insn (gen_high (ops[0], ops[1]));
3102 emit_insn (gen_low (ops[0], ops[0], ops[1]));
3103 if (flag_pic)
3104 {
3105 rtx pic_reg = get_pic_reg ();
3106 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
3107 current_function_uses_pic_offset_table = 1;
3108 }
3109 return 1;
3110 }
3111 if (flag_pic
3112 && (GET_CODE (ops[1]) == SYMBOL_REF
3113 || GET_CODE (ops[1]) == LABEL_REF
3114 || GET_CODE (ops[1]) == CONST))
3115 {
3116 rtx pic_reg = get_pic_reg ();
3117 emit_insn (gen_pic (ops[0], ops[1]));
3118 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
3119 current_function_uses_pic_offset_table = 1;
3120 return 1;
3121 }
3122 return 0;
3123 }
3124 else
3125 {
3126 if (GET_CODE (ops[0]) == MEM)
3127 {
3128 if (!spu_valid_move (ops))
3129 {
3130 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3131 gen_reg_rtx (TImode)));
3132 return 1;
3133 }
3134 }
3135 else if (GET_CODE (ops[1]) == MEM)
3136 {
3137 if (!spu_valid_move (ops))
3138 {
3139 emit_insn (gen_load
3140 (ops[0], ops[1], gen_reg_rtx (TImode),
3141 gen_reg_rtx (SImode)));
3142 return 1;
3143 }
3144 }
3145 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3146 extend them. */
3147 if (GET_CODE (ops[1]) == CONST_INT)
3148 {
3149 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3150 if (val != INTVAL (ops[1]))
3151 {
3152 emit_move_insn (ops[0], GEN_INT (val));
3153 return 1;
3154 }
3155 }
3156 }
3157 return 0;
3158}
3159
3160static int
3161reg_align (rtx reg)
3162{
3163 /* For now, only frame registers are known to be aligned at all times.
3164 We can't trust REGNO_POINTER_ALIGN because optimization will move
3165 registers around, potentially changing an "aligned" register in an
3166 address to an unaligned register, which would result in an invalid
3167 address. */
3168 int regno = REGNO (reg);
3169 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3170}
3171
3172void
3173spu_split_load (rtx * ops)
3174{
3175 enum machine_mode mode = GET_MODE (ops[0]);
3176 rtx addr, load, rot, mem, p0, p1;
3177 int rot_amt;
3178
3179 addr = XEXP (ops[1], 0);
3180
3181 rot = 0;
3182 rot_amt = 0;
3183 if (GET_CODE (addr) == PLUS)
3184 {
3185 /* 8 cases:
3186 aligned reg + aligned reg => lqx
3187 aligned reg + unaligned reg => lqx, rotqby
3188 aligned reg + aligned const => lqd
3189 aligned reg + unaligned const => lqd, rotqbyi
3190 unaligned reg + aligned reg => lqx, rotqby
3191 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3192 unaligned reg + aligned const => lqd, rotqby
3193 unaligned reg + unaligned const -> not allowed by legitimate address
3194 */
3195 p0 = XEXP (addr, 0);
3196 p1 = XEXP (addr, 1);
3197 if (reg_align (p0) < 128)
3198 {
3199 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3200 {
3201 emit_insn (gen_addsi3 (ops[3], p0, p1));
3202 rot = ops[3];
3203 }
3204 else
3205 rot = p0;
3206 }
3207 else
3208 {
3209 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3210 {
3211 rot_amt = INTVAL (p1) & 15;
3212 p1 = GEN_INT (INTVAL (p1) & -16);
3213 addr = gen_rtx_PLUS (SImode, p0, p1);
3214 }
3215 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3216 rot = p1;
3217 }
3218 }
3219 else if (GET_CODE (addr) == REG)
3220 {
3221 if (reg_align (addr) < 128)
3222 rot = addr;
3223 }
3224 else if (GET_CODE (addr) == CONST)
3225 {
3226 if (GET_CODE (XEXP (addr, 0)) == PLUS
3227 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3228 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3229 {
3230 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3231 if (rot_amt & -16)
3232 addr = gen_rtx_CONST (Pmode,
3233 gen_rtx_PLUS (Pmode,
3234 XEXP (XEXP (addr, 0), 0),
3235 GEN_INT (rot_amt & -16)));
3236 else
3237 addr = XEXP (XEXP (addr, 0), 0);
3238 }
3239 else
3240 rot = addr;
3241 }
3242 else if (GET_CODE (addr) == CONST_INT)
3243 {
3244 rot_amt = INTVAL (addr);
3245 addr = GEN_INT (rot_amt & -16);
3246 }
3247 else if (!ALIGNED_SYMBOL_REF_P (addr))
3248 rot = addr;
3249
3250 if (GET_MODE_SIZE (mode) < 4)
3251 rot_amt += GET_MODE_SIZE (mode) - 4;
3252
3253 rot_amt &= 15;
3254
3255 if (rot && rot_amt)
3256 {
3257 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3258 rot = ops[3];
3259 rot_amt = 0;
3260 }
3261
3262 load = ops[2];
3263
3264 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3265 mem = change_address (ops[1], TImode, addr);
3266
e04cf423 3267 emit_insn (gen_movti (load, mem));
644459d0 3268
3269 if (rot)
3270 emit_insn (gen_rotqby_ti (load, load, rot));
3271 else if (rot_amt)
3272 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3273
3274 if (reload_completed)
3275 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3276 else
3277 emit_insn (gen_spu_convert (ops[0], load));
3278}
3279
3280void
3281spu_split_store (rtx * ops)
3282{
3283 enum machine_mode mode = GET_MODE (ops[0]);
3284 rtx pat = ops[2];
3285 rtx reg = ops[3];
3286 rtx addr, p0, p1, p1_lo, smem;
3287 int aform;
3288 int scalar;
3289
3290 addr = XEXP (ops[0], 0);
3291
3292 if (GET_CODE (addr) == PLUS)
3293 {
3294 /* 8 cases:
3295 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3296 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3297 aligned reg + aligned const => lqd, c?d, shuf, stqx
3298 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3299 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3300 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3301 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3302 unaligned reg + unaligned const -> not allowed by legitimate address
3303 */
3304 aform = 0;
3305 p0 = XEXP (addr, 0);
3306 p1 = p1_lo = XEXP (addr, 1);
3307 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3308 {
3309 p1_lo = GEN_INT (INTVAL (p1) & 15);
3310 p1 = GEN_INT (INTVAL (p1) & -16);
3311 addr = gen_rtx_PLUS (SImode, p0, p1);
3312 }
3313 }
3314 else if (GET_CODE (addr) == REG)
3315 {
3316 aform = 0;
3317 p0 = addr;
3318 p1 = p1_lo = const0_rtx;
3319 }
3320 else
3321 {
3322 aform = 1;
3323 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3324 p1 = 0; /* aform doesn't use p1 */
3325 p1_lo = addr;
3326 if (ALIGNED_SYMBOL_REF_P (addr))
3327 p1_lo = const0_rtx;
3328 else if (GET_CODE (addr) == CONST)
3329 {
3330 if (GET_CODE (XEXP (addr, 0)) == PLUS
3331 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3332 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3333 {
3334 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3335 if ((v & -16) != 0)
3336 addr = gen_rtx_CONST (Pmode,
3337 gen_rtx_PLUS (Pmode,
3338 XEXP (XEXP (addr, 0), 0),
3339 GEN_INT (v & -16)));
3340 else
3341 addr = XEXP (XEXP (addr, 0), 0);
3342 p1_lo = GEN_INT (v & 15);
3343 }
3344 }
3345 else if (GET_CODE (addr) == CONST_INT)
3346 {
3347 p1_lo = GEN_INT (INTVAL (addr) & 15);
3348 addr = GEN_INT (INTVAL (addr) & -16);
3349 }
3350 }
3351
e04cf423 3352 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3353
644459d0 3354 scalar = store_with_one_insn_p (ops[0]);
3355 if (!scalar)
3356 {
3357 /* We could copy the flags from the ops[0] MEM to mem here,
3358 We don't because we want this load to be optimized away if
3359 possible, and copying the flags will prevent that in certain
3360 cases, e.g. consider the volatile flag. */
3361
e04cf423 3362 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3363 set_mem_alias_set (lmem, 0);
3364 emit_insn (gen_movti (reg, lmem));
644459d0 3365
3366 if (!p0 || reg_align (p0) >= 128)
3367 p0 = stack_pointer_rtx;
3368 if (!p1_lo)
3369 p1_lo = const0_rtx;
3370
3371 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3372 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3373 }
3374 else if (reload_completed)
3375 {
3376 if (GET_CODE (ops[1]) == REG)
3377 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3378 else if (GET_CODE (ops[1]) == SUBREG)
3379 emit_move_insn (reg,
3380 gen_rtx_REG (GET_MODE (reg),
3381 REGNO (SUBREG_REG (ops[1]))));
3382 else
3383 abort ();
3384 }
3385 else
3386 {
3387 if (GET_CODE (ops[1]) == REG)
3388 emit_insn (gen_spu_convert (reg, ops[1]));
3389 else if (GET_CODE (ops[1]) == SUBREG)
3390 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3391 else
3392 abort ();
3393 }
3394
3395 if (GET_MODE_SIZE (mode) < 4 && scalar)
3396 emit_insn (gen_shlqby_ti
3397 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3398
644459d0 3399 smem = change_address (ops[0], TImode, addr);
3400 /* We can't use the previous alias set because the memory has changed
3401 size and can potentially overlap objects of other types. */
3402 set_mem_alias_set (smem, 0);
3403
e04cf423 3404 emit_insn (gen_movti (smem, reg));
644459d0 3405}
3406
3407/* Return TRUE if X is MEM which is a struct member reference
3408 and the member can safely be loaded and stored with a single
3409 instruction because it is padded. */
3410static int
3411mem_is_padded_component_ref (rtx x)
3412{
3413 tree t = MEM_EXPR (x);
3414 tree r;
3415 if (!t || TREE_CODE (t) != COMPONENT_REF)
3416 return 0;
3417 t = TREE_OPERAND (t, 1);
3418 if (!t || TREE_CODE (t) != FIELD_DECL
3419 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3420 return 0;
3421 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3422 r = DECL_FIELD_CONTEXT (t);
3423 if (!r || TREE_CODE (r) != RECORD_TYPE)
3424 return 0;
3425 /* Make sure they are the same mode */
3426 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3427 return 0;
3428 /* If there are no following fields then the field alignment assures
3429 the structure is padded to the alignement which means this field is
3430 padded too. */
3431 if (TREE_CHAIN (t) == 0)
3432 return 1;
3433 /* If the following field is also aligned then this field will be
3434 padded. */
3435 t = TREE_CHAIN (t);
3436 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3437 return 1;
3438 return 0;
3439}
3440
3441int
3442spu_valid_move (rtx * ops)
3443{
3444 enum machine_mode mode = GET_MODE (ops[0]);
3445 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3446 return 0;
3447
3448 /* init_expr_once tries to recog against load and store insns to set
3449 the direct_load[] and direct_store[] arrays. We always want to
3450 consider those loads and stores valid. init_expr_once is called in
3451 the context of a dummy function which does not have a decl. */
3452 if (cfun->decl == 0)
3453 return 1;
3454
3455 /* Don't allows loads/stores which would require more than 1 insn.
3456 During and after reload we assume loads and stores only take 1
3457 insn. */
3458 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3459 {
3460 if (GET_CODE (ops[0]) == MEM
3461 && (GET_MODE_SIZE (mode) < 4
3462 || !(store_with_one_insn_p (ops[0])
3463 || mem_is_padded_component_ref (ops[0]))))
3464 return 0;
3465 if (GET_CODE (ops[1]) == MEM
3466 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3467 return 0;
3468 }
3469 return 1;
3470}
3471
3472/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3473 can be generated using the fsmbi instruction. */
3474int
3475fsmbi_const_p (rtx x)
3476{
3477 enum machine_mode mode;
3478 unsigned char arr[16];
3479 int i;
3480
3481 /* We can always choose DImode for CONST_INT because the high bits of
3482 an SImode will always be all 1s, i.e., valid for fsmbi. */
3483 mode = GET_CODE (x) == CONST_INT ? DImode : GET_MODE (x);
3484 constant_to_array (mode, x, arr);
3485
3486 for (i = 0; i < 16; i++)
3487 if (arr[i] != 0 && arr[i] != 0xff)
3488 return 0;
3489 return 1;
3490}
3491
3492/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3493 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3494 than 16 bytes, the value is repeated across the rest of the array. */
3495void
3496constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3497{
3498 HOST_WIDE_INT val;
3499 int i, j, first;
3500
3501 memset (arr, 0, 16);
3502 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3503 if (GET_CODE (x) == CONST_INT
3504 || (GET_CODE (x) == CONST_DOUBLE
3505 && (mode == SFmode || mode == DFmode)))
3506 {
3507 gcc_assert (mode != VOIDmode && mode != BLKmode);
3508
3509 if (GET_CODE (x) == CONST_DOUBLE)
3510 val = const_double_to_hwint (x);
3511 else
3512 val = INTVAL (x);
3513 first = GET_MODE_SIZE (mode) - 1;
3514 for (i = first; i >= 0; i--)
3515 {
3516 arr[i] = val & 0xff;
3517 val >>= 8;
3518 }
3519 /* Splat the constant across the whole array. */
3520 for (j = 0, i = first + 1; i < 16; i++)
3521 {
3522 arr[i] = arr[j];
3523 j = (j == first) ? 0 : j + 1;
3524 }
3525 }
3526 else if (GET_CODE (x) == CONST_DOUBLE)
3527 {
3528 val = CONST_DOUBLE_LOW (x);
3529 for (i = 15; i >= 8; i--)
3530 {
3531 arr[i] = val & 0xff;
3532 val >>= 8;
3533 }
3534 val = CONST_DOUBLE_HIGH (x);
3535 for (i = 7; i >= 0; i--)
3536 {
3537 arr[i] = val & 0xff;
3538 val >>= 8;
3539 }
3540 }
3541 else if (GET_CODE (x) == CONST_VECTOR)
3542 {
3543 int units;
3544 rtx elt;
3545 mode = GET_MODE_INNER (mode);
3546 units = CONST_VECTOR_NUNITS (x);
3547 for (i = 0; i < units; i++)
3548 {
3549 elt = CONST_VECTOR_ELT (x, i);
3550 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3551 {
3552 if (GET_CODE (elt) == CONST_DOUBLE)
3553 val = const_double_to_hwint (elt);
3554 else
3555 val = INTVAL (elt);
3556 first = GET_MODE_SIZE (mode) - 1;
3557 if (first + i * GET_MODE_SIZE (mode) > 16)
3558 abort ();
3559 for (j = first; j >= 0; j--)
3560 {
3561 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3562 val >>= 8;
3563 }
3564 }
3565 }
3566 }
3567 else
3568 gcc_unreachable();
3569}
3570
3571/* Convert a 16 byte array to a constant of mode MODE. When MODE is
3572 smaller than 16 bytes, use the bytes that would represent that value
3573 in a register, e.g., for QImode return the value of arr[3]. */
3574rtx
3575array_to_constant (enum machine_mode mode, unsigned char arr[16])
3576{
3577 enum machine_mode inner_mode;
3578 rtvec v;
3579 int units, size, i, j, k;
3580 HOST_WIDE_INT val;
3581
3582 if (GET_MODE_CLASS (mode) == MODE_INT
3583 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3584 {
3585 j = GET_MODE_SIZE (mode);
3586 i = j < 4 ? 4 - j : 0;
3587 for (val = 0; i < j; i++)
3588 val = (val << 8) | arr[i];
3589 val = trunc_int_for_mode (val, mode);
3590 return GEN_INT (val);
3591 }
3592
3593 if (mode == TImode)
3594 {
3595 HOST_WIDE_INT high;
3596 for (i = high = 0; i < 8; i++)
3597 high = (high << 8) | arr[i];
3598 for (i = 8, val = 0; i < 16; i++)
3599 val = (val << 8) | arr[i];
3600 return immed_double_const (val, high, TImode);
3601 }
3602 if (mode == SFmode)
3603 {
3604 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3605 val = trunc_int_for_mode (val, SImode);
3606 return hwint_to_const_double (val, SFmode);
3607 }
3608 if (mode == DFmode)
3609 {
3610 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3611 val <<= 32;
3612 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
3613 return hwint_to_const_double (val, DFmode);
3614 }
3615
3616 if (!VECTOR_MODE_P (mode))
3617 abort ();
3618
3619 units = GET_MODE_NUNITS (mode);
3620 size = GET_MODE_UNIT_SIZE (mode);
3621 inner_mode = GET_MODE_INNER (mode);
3622 v = rtvec_alloc (units);
3623
3624 for (k = i = 0; i < units; ++i)
3625 {
3626 val = 0;
3627 for (j = 0; j < size; j++, k++)
3628 val = (val << 8) | arr[k];
3629
3630 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
3631 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
3632 else
3633 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
3634 }
3635 if (k > 16)
3636 abort ();
3637
3638 return gen_rtx_CONST_VECTOR (mode, v);
3639}
3640
3641static void
3642reloc_diagnostic (rtx x)
3643{
3644 tree loc_decl, decl = 0;
3645 const char *msg;
3646 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
3647 return;
3648
3649 if (GET_CODE (x) == SYMBOL_REF)
3650 decl = SYMBOL_REF_DECL (x);
3651 else if (GET_CODE (x) == CONST
3652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3653 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
3654
3655 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3656 if (decl && !DECL_P (decl))
3657 decl = 0;
3658
3659 /* We use last_assemble_variable_decl to get line information. It's
3660 not always going to be right and might not even be close, but will
3661 be right for the more common cases. */
3662 if (!last_assemble_variable_decl)
3663 loc_decl = decl;
3664 else
3665 loc_decl = last_assemble_variable_decl;
3666
3667 /* The decl could be a string constant. */
3668 if (decl && DECL_P (decl))
3669 msg = "%Jcreating run-time relocation for %qD";
3670 else
3671 msg = "creating run-time relocation";
3672
99369027 3673 if (TARGET_WARN_RELOC)
644459d0 3674 warning (0, msg, loc_decl, decl);
99369027 3675 else
3676 error (msg, loc_decl, decl);
644459d0 3677}
3678
3679/* Hook into assemble_integer so we can generate an error for run-time
3680 relocations. The SPU ABI disallows them. */
3681static bool
3682spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
3683{
3684 /* By default run-time relocations aren't supported, but we allow them
3685 in case users support it in their own run-time loader. And we provide
3686 a warning for those users that don't. */
3687 if ((GET_CODE (x) == SYMBOL_REF)
3688 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
3689 reloc_diagnostic (x);
3690
3691 return default_assemble_integer (x, size, aligned_p);
3692}
3693
3694static void
3695spu_asm_globalize_label (FILE * file, const char *name)
3696{
3697 fputs ("\t.global\t", file);
3698 assemble_name (file, name);
3699 fputs ("\n", file);
3700}
3701
3702static bool
3703spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
3704{
3705 enum machine_mode mode = GET_MODE (x);
3706 int cost = COSTS_N_INSNS (2);
3707
3708 /* Folding to a CONST_VECTOR will use extra space but there might
3709 be only a small savings in cycles. We'd like to use a CONST_VECTOR
3710 only if it allows us to fold away multiple insns. Changin the cost
3711 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3712 because this cost will only be compared against a single insn.
3713 if (code == CONST_VECTOR)
3714 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3715 */
3716
3717 /* Use defaults for float operations. Not accurate but good enough. */
3718 if (mode == DFmode)
3719 {
3720 *total = COSTS_N_INSNS (13);
3721 return true;
3722 }
3723 if (mode == SFmode)
3724 {
3725 *total = COSTS_N_INSNS (6);
3726 return true;
3727 }
3728 switch (code)
3729 {
3730 case CONST_INT:
3731 if (satisfies_constraint_K (x))
3732 *total = 0;
3733 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
3734 *total = COSTS_N_INSNS (1);
3735 else
3736 *total = COSTS_N_INSNS (3);
3737 return true;
3738
3739 case CONST:
3740 *total = COSTS_N_INSNS (3);
3741 return true;
3742
3743 case LABEL_REF:
3744 case SYMBOL_REF:
3745 *total = COSTS_N_INSNS (0);
3746 return true;
3747
3748 case CONST_DOUBLE:
3749 *total = COSTS_N_INSNS (5);
3750 return true;
3751
3752 case FLOAT_EXTEND:
3753 case FLOAT_TRUNCATE:
3754 case FLOAT:
3755 case UNSIGNED_FLOAT:
3756 case FIX:
3757 case UNSIGNED_FIX:
3758 *total = COSTS_N_INSNS (7);
3759 return true;
3760
3761 case PLUS:
3762 if (mode == TImode)
3763 {
3764 *total = COSTS_N_INSNS (9);
3765 return true;
3766 }
3767 break;
3768
3769 case MULT:
3770 cost =
3771 GET_CODE (XEXP (x, 0)) ==
3772 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
3773 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
3774 {
3775 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3776 {
3777 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3778 cost = COSTS_N_INSNS (14);
3779 if ((val & 0xffff) == 0)
3780 cost = COSTS_N_INSNS (9);
3781 else if (val > 0 && val < 0x10000)
3782 cost = COSTS_N_INSNS (11);
3783 }
3784 }
3785 *total = cost;
3786 return true;
3787 case DIV:
3788 case UDIV:
3789 case MOD:
3790 case UMOD:
3791 *total = COSTS_N_INSNS (20);
3792 return true;
3793 case ROTATE:
3794 case ROTATERT:
3795 case ASHIFT:
3796 case ASHIFTRT:
3797 case LSHIFTRT:
3798 *total = COSTS_N_INSNS (4);
3799 return true;
3800 case UNSPEC:
3801 if (XINT (x, 1) == UNSPEC_CONVERT)
3802 *total = COSTS_N_INSNS (0);
3803 else
3804 *total = COSTS_N_INSNS (4);
3805 return true;
3806 }
3807 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
3808 if (GET_MODE_CLASS (mode) == MODE_INT
3809 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
3810 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
3811 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
3812 *total = cost;
3813 return true;
3814}
3815
3816enum machine_mode
3817spu_eh_return_filter_mode (void)
3818{
3819 /* We would like this to be SImode, but sjlj exceptions seems to work
3820 only with word_mode. */
3821 return TImode;
3822}
3823
3824/* Decide whether we can make a sibling call to a function. DECL is the
3825 declaration of the function being targeted by the call and EXP is the
3826 CALL_EXPR representing the call. */
3827static bool
3828spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3829{
3830 return decl && !TARGET_LARGE_MEM;
3831}
3832
3833/* We need to correctly update the back chain pointer and the Available
3834 Stack Size (which is in the second slot of the sp register.) */
3835void
3836spu_allocate_stack (rtx op0, rtx op1)
3837{
3838 HOST_WIDE_INT v;
3839 rtx chain = gen_reg_rtx (V4SImode);
3840 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
3841 rtx sp = gen_reg_rtx (V4SImode);
3842 rtx splatted = gen_reg_rtx (V4SImode);
3843 rtx pat = gen_reg_rtx (TImode);
3844
3845 /* copy the back chain so we can save it back again. */
3846 emit_move_insn (chain, stack_bot);
3847
3848 op1 = force_reg (SImode, op1);
3849
3850 v = 0x1020300010203ll;
3851 emit_move_insn (pat, immed_double_const (v, v, TImode));
3852 emit_insn (gen_shufb (splatted, op1, op1, pat));
3853
3854 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
3855 emit_insn (gen_subv4si3 (sp, sp, splatted));
3856
3857 if (flag_stack_check)
3858 {
3859 rtx avail = gen_reg_rtx(SImode);
3860 rtx result = gen_reg_rtx(SImode);
3861 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
3862 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
3863 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
3864 }
3865
3866 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
3867
3868 emit_move_insn (stack_bot, chain);
3869
3870 emit_move_insn (op0, virtual_stack_dynamic_rtx);
3871}
3872
3873void
3874spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
3875{
3876 static unsigned char arr[16] =
3877 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
3878 rtx temp = gen_reg_rtx (SImode);
3879 rtx temp2 = gen_reg_rtx (SImode);
3880 rtx temp3 = gen_reg_rtx (V4SImode);
3881 rtx temp4 = gen_reg_rtx (V4SImode);
3882 rtx pat = gen_reg_rtx (TImode);
3883 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
3884
3885 /* Restore the backchain from the first word, sp from the second. */
3886 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
3887 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
3888
3889 emit_move_insn (pat, array_to_constant (TImode, arr));
3890
3891 /* Compute Available Stack Size for sp */
3892 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
3893 emit_insn (gen_shufb (temp3, temp, temp, pat));
3894
3895 /* Compute Available Stack Size for back chain */
3896 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
3897 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
3898 emit_insn (gen_addv4si3 (temp4, sp, temp4));
3899
3900 emit_insn (gen_addv4si3 (sp, sp, temp3));
3901 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
3902}
3903
3904static void
3905spu_init_libfuncs (void)
3906{
3907 set_optab_libfunc (smul_optab, DImode, "__muldi3");
3908 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
3909 set_optab_libfunc (smod_optab, DImode, "__moddi3");
3910 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
3911 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
3912 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
3913 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
3914 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
3915 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
3916 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
3917 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
3918
3919 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
3920 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
3921}
3922
3923/* Make a subreg, stripping any existing subreg. We could possibly just
3924 call simplify_subreg, but in this case we know what we want. */
3925rtx
3926spu_gen_subreg (enum machine_mode mode, rtx x)
3927{
3928 if (GET_CODE (x) == SUBREG)
3929 x = SUBREG_REG (x);
3930 if (GET_MODE (x) == mode)
3931 return x;
3932 return gen_rtx_SUBREG (mode, x, 0);
3933}
3934
3935static bool
3936spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
3937{
3938 return (TYPE_MODE (type) == BLKmode
3939 && ((type) == 0
3940 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3941 || int_size_in_bytes (type) >
3942 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
3943}
3944\f
3945/* Create the built-in types and functions */
3946
3947struct spu_builtin_description spu_builtins[] = {
3948#define DEF_BUILTIN(fcode, icode, name, type, params) \
3949 {fcode, icode, name, type, params, NULL_TREE},
3950#include "spu-builtins.def"
3951#undef DEF_BUILTIN
3952};
3953
3954static void
3955spu_init_builtins (void)
3956{
3957 struct spu_builtin_description *d;
3958 unsigned int i;
3959
3960 V16QI_type_node = build_vector_type (intQI_type_node, 16);
3961 V8HI_type_node = build_vector_type (intHI_type_node, 8);
3962 V4SI_type_node = build_vector_type (intSI_type_node, 4);
3963 V2DI_type_node = build_vector_type (intDI_type_node, 2);
3964 V4SF_type_node = build_vector_type (float_type_node, 4);
3965 V2DF_type_node = build_vector_type (double_type_node, 2);
3966
3967 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
3968 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
3969 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
3970 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
3971
3972 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
3973
3974 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
3975 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
3976 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
3977 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
3978 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
3979 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
3980 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
3981 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
3982 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
3983 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
3984 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
3985 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
3986
3987 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
3988 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
3989 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
3990 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
3991 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
3992 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
3993 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
3994 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
3995
3996 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
3997 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
3998
3999 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4000
4001 spu_builtin_types[SPU_BTI_PTR] =
4002 build_pointer_type (build_qualified_type
4003 (void_type_node,
4004 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4005
4006 /* For each builtin we build a new prototype. The tree code will make
4007 sure nodes are shared. */
4008 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4009 {
4010 tree p;
4011 char name[64]; /* build_function will make a copy. */
4012 int parm;
4013
4014 if (d->name == 0)
4015 continue;
4016
4017 /* find last parm */
4018 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4019 {
4020 }
4021
4022 p = void_list_node;
4023 while (parm > 1)
4024 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4025
4026 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4027
4028 sprintf (name, "__builtin_%s", d->name);
4029 d->fndecl =
4030 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4031 NULL, NULL_TREE);
4032 }
4033}
4034
4035int
4036spu_safe_dma (HOST_WIDE_INT channel)
4037{
4038 return (channel >= 21 && channel <= 27);
4039}
4040
4041void
4042spu_builtin_splats (rtx ops[])
4043{
4044 enum machine_mode mode = GET_MODE (ops[0]);
4045 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4046 {
4047 unsigned char arr[16];
4048 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4049 emit_move_insn (ops[0], array_to_constant (mode, arr));
4050 }
4051 else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4052 {
4053 rtvec v = rtvec_alloc (4);
4054 RTVEC_ELT (v, 0) = ops[1];
4055 RTVEC_ELT (v, 1) = ops[1];
4056 RTVEC_ELT (v, 2) = ops[1];
4057 RTVEC_ELT (v, 3) = ops[1];
4058 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4059 }
4060 else
4061 {
4062 rtx reg = gen_reg_rtx (TImode);
4063 rtx shuf;
4064 if (GET_CODE (ops[1]) != REG
4065 && GET_CODE (ops[1]) != SUBREG)
4066 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4067 switch (mode)
4068 {
4069 case V2DImode:
4070 case V2DFmode:
4071 shuf =
4072 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4073 TImode);
4074 break;
4075 case V4SImode:
4076 case V4SFmode:
4077 shuf =
4078 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4079 TImode);
4080 break;
4081 case V8HImode:
4082 shuf =
4083 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4084 TImode);
4085 break;
4086 case V16QImode:
4087 shuf =
4088 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4089 TImode);
4090 break;
4091 default:
4092 abort ();
4093 }
4094 emit_move_insn (reg, shuf);
4095 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4096 }
4097}
4098
4099void
4100spu_builtin_extract (rtx ops[])
4101{
4102 enum machine_mode mode;
4103 rtx rot, from, tmp;
4104
4105 mode = GET_MODE (ops[1]);
4106
4107 if (GET_CODE (ops[2]) == CONST_INT)
4108 {
4109 switch (mode)
4110 {
4111 case V16QImode:
4112 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4113 break;
4114 case V8HImode:
4115 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4116 break;
4117 case V4SFmode:
4118 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4119 break;
4120 case V4SImode:
4121 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4122 break;
4123 case V2DImode:
4124 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4125 break;
4126 case V2DFmode:
4127 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4128 break;
4129 default:
4130 abort ();
4131 }
4132 return;
4133 }
4134
4135 from = spu_gen_subreg (TImode, ops[1]);
4136 rot = gen_reg_rtx (TImode);
4137 tmp = gen_reg_rtx (SImode);
4138
4139 switch (mode)
4140 {
4141 case V16QImode:
4142 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4143 break;
4144 case V8HImode:
4145 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4146 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4147 break;
4148 case V4SFmode:
4149 case V4SImode:
4150 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4151 break;
4152 case V2DImode:
4153 case V2DFmode:
4154 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4155 break;
4156 default:
4157 abort ();
4158 }
4159 emit_insn (gen_rotqby_ti (rot, from, tmp));
4160
4161 emit_insn (gen_spu_convert (ops[0], rot));
4162}
4163
4164void
4165spu_builtin_insert (rtx ops[])
4166{
4167 enum machine_mode mode = GET_MODE (ops[0]);
4168 enum machine_mode imode = GET_MODE_INNER (mode);
4169 rtx mask = gen_reg_rtx (TImode);
4170 rtx offset;
4171
4172 if (GET_CODE (ops[3]) == CONST_INT)
4173 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4174 else
4175 {
4176 offset = gen_reg_rtx (SImode);
4177 emit_insn (gen_mulsi3
4178 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4179 }
4180 emit_insn (gen_cpat
4181 (mask, stack_pointer_rtx, offset,
4182 GEN_INT (GET_MODE_SIZE (imode))));
4183 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4184}
4185
4186void
4187spu_builtin_promote (rtx ops[])
4188{
4189 enum machine_mode mode, imode;
4190 rtx rot, from, offset;
4191 HOST_WIDE_INT pos;
4192
4193 mode = GET_MODE (ops[0]);
4194 imode = GET_MODE_INNER (mode);
4195
4196 from = gen_reg_rtx (TImode);
4197 rot = spu_gen_subreg (TImode, ops[0]);
4198
4199 emit_insn (gen_spu_convert (from, ops[1]));
4200
4201 if (GET_CODE (ops[2]) == CONST_INT)
4202 {
4203 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4204 if (GET_MODE_SIZE (imode) < 4)
4205 pos += 4 - GET_MODE_SIZE (imode);
4206 offset = GEN_INT (pos & 15);
4207 }
4208 else
4209 {
4210 offset = gen_reg_rtx (SImode);
4211 switch (mode)
4212 {
4213 case V16QImode:
4214 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4215 break;
4216 case V8HImode:
4217 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4218 emit_insn (gen_addsi3 (offset, offset, offset));
4219 break;
4220 case V4SFmode:
4221 case V4SImode:
4222 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4223 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4224 break;
4225 case V2DImode:
4226 case V2DFmode:
4227 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4228 break;
4229 default:
4230 abort ();
4231 }
4232 }
4233 emit_insn (gen_rotqby_ti (rot, from, offset));
4234}
4235
4236void
4237spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4238{
4239 rtx shuf = gen_reg_rtx (V4SImode);
4240 rtx insn = gen_reg_rtx (V4SImode);
4241 rtx shufc;
4242 rtx insnc;
4243 rtx mem;
4244
4245 fnaddr = force_reg (SImode, fnaddr);
4246 cxt = force_reg (SImode, cxt);
4247
4248 if (TARGET_LARGE_MEM)
4249 {
4250 rtx rotl = gen_reg_rtx (V4SImode);
4251 rtx mask = gen_reg_rtx (V4SImode);
4252 rtx bi = gen_reg_rtx (SImode);
4253 unsigned char shufa[16] = {
4254 2, 3, 0, 1, 18, 19, 16, 17,
4255 0, 1, 2, 3, 16, 17, 18, 19
4256 };
4257 unsigned char insna[16] = {
4258 0x41, 0, 0, 79,
4259 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4260 0x60, 0x80, 0, 79,
4261 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4262 };
4263
4264 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4265 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4266
4267 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4268 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4269 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4270 emit_insn (gen_selb (insn, insnc, rotl, mask));
4271
4272 mem = memory_address (Pmode, tramp);
4273 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4274
4275 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4276 mem = memory_address (Pmode, plus_constant (tramp, 16));
4277 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4278 }
4279 else
4280 {
4281 rtx scxt = gen_reg_rtx (SImode);
4282 rtx sfnaddr = gen_reg_rtx (SImode);
4283 unsigned char insna[16] = {
4284 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4285 0x30, 0, 0, 0,
4286 0, 0, 0, 0,
4287 0, 0, 0, 0
4288 };
4289
4290 shufc = gen_reg_rtx (TImode);
4291 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4292
4293 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4294 fits 18 bits and the last 4 are zeros. This will be true if
4295 the stack pointer is initialized to 0x3fff0 at program start,
4296 otherwise the ila instruction will be garbage. */
4297
4298 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4299 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4300 emit_insn (gen_cpat
4301 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4302 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4303 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4304
4305 mem = memory_address (Pmode, tramp);
4306 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4307
4308 }
4309 emit_insn (gen_sync ());
4310}
4311
4312void
4313spu_expand_sign_extend (rtx ops[])
4314{
4315 unsigned char arr[16];
4316 rtx pat = gen_reg_rtx (TImode);
4317 rtx sign, c;
4318 int i, last;
4319 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4320 if (GET_MODE (ops[1]) == QImode)
4321 {
4322 sign = gen_reg_rtx (HImode);
4323 emit_insn (gen_extendqihi2 (sign, ops[1]));
4324 for (i = 0; i < 16; i++)
4325 arr[i] = 0x12;
4326 arr[last] = 0x13;
4327 }
4328 else
4329 {
4330 for (i = 0; i < 16; i++)
4331 arr[i] = 0x10;
4332 switch (GET_MODE (ops[1]))
4333 {
4334 case HImode:
4335 sign = gen_reg_rtx (SImode);
4336 emit_insn (gen_extendhisi2 (sign, ops[1]));
4337 arr[last] = 0x03;
4338 arr[last - 1] = 0x02;
4339 break;
4340 case SImode:
4341 sign = gen_reg_rtx (SImode);
4342 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4343 for (i = 0; i < 4; i++)
4344 arr[last - i] = 3 - i;
4345 break;
4346 case DImode:
4347 sign = gen_reg_rtx (SImode);
4348 c = gen_reg_rtx (SImode);
4349 emit_insn (gen_spu_convert (c, ops[1]));
4350 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4351 for (i = 0; i < 8; i++)
4352 arr[last - i] = 7 - i;
4353 break;
4354 default:
4355 abort ();
4356 }
4357 }
4358 emit_move_insn (pat, array_to_constant (TImode, arr));
4359 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4360}
4361
4362/* expand vector initialization. If there are any constant parts,
4363 load constant parts first. Then load any non-constant parts. */
4364void
4365spu_expand_vector_init (rtx target, rtx vals)
4366{
4367 enum machine_mode mode = GET_MODE (target);
4368 int n_elts = GET_MODE_NUNITS (mode);
4369 int n_var = 0;
4370 bool all_same = true;
790c536c 4371 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 4372 int i;
4373
4374 first = XVECEXP (vals, 0, 0);
4375 for (i = 0; i < n_elts; ++i)
4376 {
4377 x = XVECEXP (vals, 0, i);
4378 if (!CONSTANT_P (x))
4379 ++n_var;
4380 else
4381 {
4382 if (first_constant == NULL_RTX)
4383 first_constant = x;
4384 }
4385 if (i > 0 && !rtx_equal_p (x, first))
4386 all_same = false;
4387 }
4388
4389 /* if all elements are the same, use splats to repeat elements */
4390 if (all_same)
4391 {
4392 if (!CONSTANT_P (first)
4393 && !register_operand (first, GET_MODE (x)))
4394 first = force_reg (GET_MODE (first), first);
4395 emit_insn (gen_spu_splats (target, first));
4396 return;
4397 }
4398
4399 /* load constant parts */
4400 if (n_var != n_elts)
4401 {
4402 if (n_var == 0)
4403 {
4404 emit_move_insn (target,
4405 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4406 }
4407 else
4408 {
4409 rtx constant_parts_rtx = copy_rtx (vals);
4410
4411 gcc_assert (first_constant != NULL_RTX);
4412 /* fill empty slots with the first constant, this increases
4413 our chance of using splats in the recursive call below. */
4414 for (i = 0; i < n_elts; ++i)
4415 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4416 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4417
4418 spu_expand_vector_init (target, constant_parts_rtx);
4419 }
4420 }
4421
4422 /* load variable parts */
4423 if (n_var != 0)
4424 {
4425 rtx insert_operands[4];
4426
4427 insert_operands[0] = target;
4428 insert_operands[2] = target;
4429 for (i = 0; i < n_elts; ++i)
4430 {
4431 x = XVECEXP (vals, 0, i);
4432 if (!CONSTANT_P (x))
4433 {
4434 if (!register_operand (x, GET_MODE (x)))
4435 x = force_reg (GET_MODE (x), x);
4436 insert_operands[1] = x;
4437 insert_operands[3] = GEN_INT (i);
4438 spu_builtin_insert (insert_operands);
4439 }
4440 }
4441 }
4442}
6352eedf 4443
4444static rtx
4445spu_force_reg (enum machine_mode mode, rtx op)
4446{
4447 rtx x, r;
4448 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4449 {
4450 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4451 || GET_MODE (op) == BLKmode)
4452 return force_reg (mode, convert_to_mode (mode, op, 0));
4453 abort ();
4454 }
4455
4456 r = force_reg (GET_MODE (op), op);
4457 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4458 {
4459 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4460 if (x)
4461 return x;
4462 }
4463
4464 x = gen_reg_rtx (mode);
4465 emit_insn (gen_spu_convert (x, r));
4466 return x;
4467}
4468
4469static void
4470spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4471{
4472 HOST_WIDE_INT v = 0;
4473 int lsbits;
4474 /* Check the range of immediate operands. */
4475 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4476 {
4477 int range = p - SPU_BTI_7;
4478 if (!CONSTANT_P (op)
4479 || (GET_CODE (op) == CONST_INT
4480 && (INTVAL (op) < spu_builtin_range[range].low
4481 || INTVAL (op) > spu_builtin_range[range].high)))
4482 error ("%s expects an integer literal in the range [%d, %d].",
4483 d->name,
4484 spu_builtin_range[range].low, spu_builtin_range[range].high);
4485
4486 if (GET_CODE (op) == CONST
4487 && (GET_CODE (XEXP (op, 0)) == PLUS
4488 || GET_CODE (XEXP (op, 0)) == MINUS))
4489 {
4490 v = INTVAL (XEXP (XEXP (op, 0), 1));
4491 op = XEXP (XEXP (op, 0), 0);
4492 }
4493 else if (GET_CODE (op) == CONST_INT)
4494 v = INTVAL (op);
4495
4496 switch (p)
4497 {
4498 case SPU_BTI_S10_4:
4499 lsbits = 4;
4500 break;
4501 case SPU_BTI_U16_2:
4502 /* This is only used in lqa, and stqa. Even though the insns
4503 encode 16 bits of the address (all but the 2 least
4504 significant), only 14 bits are used because it is masked to
4505 be 16 byte aligned. */
4506 lsbits = 4;
4507 break;
4508 case SPU_BTI_S16_2:
4509 /* This is used for lqr and stqr. */
4510 lsbits = 2;
4511 break;
4512 default:
4513 lsbits = 0;
4514 }
4515
4516 if (GET_CODE (op) == LABEL_REF
4517 || (GET_CODE (op) == SYMBOL_REF
4518 && SYMBOL_REF_FUNCTION_P (op))
4519 || (INTVAL (op) & ((1 << lsbits) - 1)) != 0)
4520 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4521 d->name);
4522 }
4523}
4524
4525
4526static void
4527expand_builtin_args (struct spu_builtin_description *d, tree arglist,
4528 rtx target, rtx ops[])
4529{
4530 enum insn_code icode = d->icode;
4531 int i = 0;
4532
4533 /* Expand the arguments into rtl. */
4534
4535 if (d->parm[0] != SPU_BTI_VOID)
4536 ops[i++] = target;
4537
4538 for (; i < insn_data[icode].n_operands; i++)
4539 {
4540 tree arg = TREE_VALUE (arglist);
4541 if (arg == 0)
4542 abort ();
4543 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4544 arglist = TREE_CHAIN (arglist);
4545 }
4546}
4547
4548static rtx
4549spu_expand_builtin_1 (struct spu_builtin_description *d,
4550 tree arglist, rtx target)
4551{
4552 rtx pat;
4553 rtx ops[8];
4554 enum insn_code icode = d->icode;
4555 enum machine_mode mode, tmode;
4556 int i, p;
4557 tree return_type;
4558
4559 /* Set up ops[] with values from arglist. */
4560 expand_builtin_args (d, arglist, target, ops);
4561
4562 /* Handle the target operand which must be operand 0. */
4563 i = 0;
4564 if (d->parm[0] != SPU_BTI_VOID)
4565 {
4566
4567 /* We prefer the mode specified for the match_operand otherwise
4568 use the mode from the builtin function prototype. */
4569 tmode = insn_data[d->icode].operand[0].mode;
4570 if (tmode == VOIDmode)
4571 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4572
4573 /* Try to use target because not using it can lead to extra copies
4574 and when we are using all of the registers extra copies leads
4575 to extra spills. */
4576 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
4577 ops[0] = target;
4578 else
4579 target = ops[0] = gen_reg_rtx (tmode);
4580
4581 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
4582 abort ();
4583
4584 i++;
4585 }
4586
4587 /* Ignore align_hint, but still expand it's args in case they have
4588 side effects. */
4589 if (icode == CODE_FOR_spu_align_hint)
4590 return 0;
4591
4592 /* Handle the rest of the operands. */
4593 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
4594 {
4595 if (insn_data[d->icode].operand[i].mode != VOIDmode)
4596 mode = insn_data[d->icode].operand[i].mode;
4597 else
4598 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
4599
4600 /* mode can be VOIDmode here for labels */
4601
4602 /* For specific intrinsics with an immediate operand, e.g.,
4603 si_ai(), we sometimes need to convert the scalar argument to a
4604 vector argument by splatting the scalar. */
4605 if (VECTOR_MODE_P (mode)
4606 && (GET_CODE (ops[i]) == CONST_INT
4607 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
4608 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
4609 {
4610 if (GET_CODE (ops[i]) == CONST_INT)
4611 ops[i] = spu_const (mode, INTVAL (ops[i]));
4612 else
4613 {
4614 rtx reg = gen_reg_rtx (mode);
4615 enum machine_mode imode = GET_MODE_INNER (mode);
4616 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
4617 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
4618 if (imode != GET_MODE (ops[i]))
4619 ops[i] = convert_to_mode (imode, ops[i],
4620 TYPE_UNSIGNED (spu_builtin_types
4621 [d->parm[i]]));
4622 emit_insn (gen_spu_splats (reg, ops[i]));
4623 ops[i] = reg;
4624 }
4625 }
4626
4627 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
4628 ops[i] = spu_force_reg (mode, ops[i]);
4629
4630 spu_check_builtin_parm (d, ops[i], d->parm[p]);
4631 }
4632
4633 switch (insn_data[icode].n_operands)
4634 {
4635 case 0:
4636 pat = GEN_FCN (icode) (0);
4637 break;
4638 case 1:
4639 pat = GEN_FCN (icode) (ops[0]);
4640 break;
4641 case 2:
4642 pat = GEN_FCN (icode) (ops[0], ops[1]);
4643 break;
4644 case 3:
4645 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
4646 break;
4647 case 4:
4648 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
4649 break;
4650 case 5:
4651 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
4652 break;
4653 case 6:
4654 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
4655 break;
4656 default:
4657 abort ();
4658 }
4659
4660 if (!pat)
4661 abort ();
4662
4663 if (d->type == B_CALL || d->type == B_BISLED)
4664 emit_call_insn (pat);
4665 else if (d->type == B_JUMP)
4666 {
4667 emit_jump_insn (pat);
4668 emit_barrier ();
4669 }
4670 else
4671 emit_insn (pat);
4672
4673 return_type = spu_builtin_types[d->parm[0]];
4674 if (d->parm[0] != SPU_BTI_VOID
4675 && GET_MODE (target) != TYPE_MODE (return_type))
4676 {
4677 /* target is the return value. It should always be the mode of
4678 the builtin function prototype. */
4679 target = spu_force_reg (TYPE_MODE (return_type), target);
4680 }
4681
4682 return target;
4683}
4684
4685rtx
4686spu_expand_builtin (tree exp,
4687 rtx target,
4688 rtx subtarget ATTRIBUTE_UNUSED,
4689 enum machine_mode mode ATTRIBUTE_UNUSED,
4690 int ignore ATTRIBUTE_UNUSED)
4691{
4692 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
4693 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
4694 tree arglist = TREE_OPERAND (exp, 1);
4695 struct spu_builtin_description *d;
4696
4697 if (fcode < NUM_SPU_BUILTINS)
4698 {
4699 d = &spu_builtins[fcode];
4700
4701 return spu_expand_builtin_1 (d, arglist, target);
4702 }
4703 abort ();
4704}
4705