]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2008-09-05 Daniel Kraft <d@domob.eu>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
75a70cf9 1/* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
75a70cf9 53#include "gimple.h"
644459d0 54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
59struct spu_builtin_range
60{
61 int low, high;
62};
63
64static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77};
78
644459d0 79\f
80/* Target specific attribute specifications. */
81char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83/* Prototypes and external defs. */
84static void spu_init_builtins (void);
85static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88static rtx get_pic_reg (void);
89static int need_to_save_reg (int regno, int saving);
90static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94static void emit_nop_for_insn (rtx insn);
95static bool insn_clobbers_hbr (rtx insn);
96static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
5474166e 98static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
644459d0 100static rtx get_branch_target (rtx branch);
101static void insert_branch_hints (void);
102static void insert_nops (void);
103static void spu_machine_dependent_reorg (void);
104static int spu_sched_issue_rate (void);
105static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
106 int can_issue_more);
107static int get_pipe (rtx insn);
108static int spu_sched_adjust_priority (rtx insn, int pri);
109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
111 int flags,
112 unsigned char *no_add_attrs);
113static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static int spu_naked_function_p (tree func);
fb80456a 117static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
644459d0 119static tree spu_build_builtin_va_list (void);
8a58ed0a 120static void spu_va_start (tree, rtx);
75a70cf9 121static tree spu_gimplify_va_arg_expr (tree valist, tree type,
122 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 123static int regno_aligned_for_load (int regno);
124static int store_with_one_insn_p (rtx mem);
644459d0 125static int mem_is_padded_component_ref (rtx x);
126static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
127static void spu_asm_globalize_label (FILE * file, const char *name);
128static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 129 int *total, bool speed);
644459d0 130static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
131static void spu_init_libfuncs (void);
fb80456a 132static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 133static void fix_range (const char *);
69ced2d6 134static void spu_encode_section_info (tree, rtx, int);
e99f512d 135static tree spu_builtin_mul_widen_even (tree);
136static tree spu_builtin_mul_widen_odd (tree);
a76866d3 137static tree spu_builtin_mask_for_load (void);
a28df51d 138static int spu_builtin_vectorization_cost (bool);
a9f1838b 139static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 140static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 141static int spu_sms_res_mii (struct ddg *g);
644459d0 142
143extern const char *reg_names[];
144rtx spu_compare_op0, spu_compare_op1;
145
5474166e 146/* Which instruction set architecture to use. */
147int spu_arch;
148/* Which cpu are we tuning for. */
149int spu_tune;
150
644459d0 151enum spu_immediate {
152 SPU_NONE,
153 SPU_IL,
154 SPU_ILA,
155 SPU_ILH,
156 SPU_ILHU,
157 SPU_ORI,
158 SPU_ORHI,
159 SPU_ORBI,
99369027 160 SPU_IOHL
644459d0 161};
dea01258 162enum immediate_class
163{
164 IC_POOL, /* constant pool */
165 IC_IL1, /* one il* instruction */
166 IC_IL2, /* both ilhu and iohl instructions */
167 IC_IL1s, /* one il* instruction */
168 IC_IL2s, /* both ilhu and iohl instructions */
169 IC_FSMBI, /* the fsmbi instruction */
170 IC_CPAT, /* one of the c*d instructions */
5df189be 171 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 172};
644459d0 173
174static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
175static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 176static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
177static enum immediate_class classify_immediate (rtx op,
178 enum machine_mode mode);
644459d0 179
1bd43494 180static enum machine_mode spu_unwind_word_mode (void);
181
ea32e033 182static enum machine_mode
183spu_libgcc_cmp_return_mode (void);
184
185static enum machine_mode
186spu_libgcc_shift_count_mode (void);
187
644459d0 188/* Built in types. */
189tree spu_builtin_types[SPU_BTI_MAX];
190\f
191/* TARGET overrides. */
192
193#undef TARGET_INIT_BUILTINS
194#define TARGET_INIT_BUILTINS spu_init_builtins
195
644459d0 196#undef TARGET_EXPAND_BUILTIN
197#define TARGET_EXPAND_BUILTIN spu_expand_builtin
198
1bd43494 199#undef TARGET_UNWIND_WORD_MODE
200#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 201
202/* The .8byte directive doesn't seem to work well for a 32 bit
203 architecture. */
204#undef TARGET_ASM_UNALIGNED_DI_OP
205#define TARGET_ASM_UNALIGNED_DI_OP NULL
206
207#undef TARGET_RTX_COSTS
208#define TARGET_RTX_COSTS spu_rtx_costs
209
210#undef TARGET_ADDRESS_COST
f529eb25 211#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 212
213#undef TARGET_SCHED_ISSUE_RATE
214#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
215
216#undef TARGET_SCHED_VARIABLE_ISSUE
217#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
218
219#undef TARGET_SCHED_ADJUST_PRIORITY
220#define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
221
222#undef TARGET_SCHED_ADJUST_COST
223#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
224
225const struct attribute_spec spu_attribute_table[];
226#undef TARGET_ATTRIBUTE_TABLE
227#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
228
229#undef TARGET_ASM_INTEGER
230#define TARGET_ASM_INTEGER spu_assemble_integer
231
232#undef TARGET_SCALAR_MODE_SUPPORTED_P
233#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
234
235#undef TARGET_VECTOR_MODE_SUPPORTED_P
236#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
237
238#undef TARGET_FUNCTION_OK_FOR_SIBCALL
239#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
240
241#undef TARGET_ASM_GLOBALIZE_LABEL
242#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
243
244#undef TARGET_PASS_BY_REFERENCE
245#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
246
247#undef TARGET_MUST_PASS_IN_STACK
248#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
249
250#undef TARGET_BUILD_BUILTIN_VA_LIST
251#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
252
8a58ed0a 253#undef TARGET_EXPAND_BUILTIN_VA_START
254#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
255
644459d0 256#undef TARGET_SETUP_INCOMING_VARARGS
257#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
258
259#undef TARGET_MACHINE_DEPENDENT_REORG
260#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
261
262#undef TARGET_GIMPLIFY_VA_ARG_EXPR
263#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
264
265#undef TARGET_DEFAULT_TARGET_FLAGS
266#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
267
268#undef TARGET_INIT_LIBFUNCS
269#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
270
271#undef TARGET_RETURN_IN_MEMORY
272#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
273
69ced2d6 274#undef TARGET_ENCODE_SECTION_INFO
275#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
276
e99f512d 277#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
278#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
279
280#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
281#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
282
a76866d3 283#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
284#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
285
a28df51d 286#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
287#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
288
0e87db76 289#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
290#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
291
a0515226 292#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
293#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
294
ea32e033 295#undef TARGET_LIBGCC_CMP_RETURN_MODE
296#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
297
298#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
299#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
300
d52fd16a 301#undef TARGET_SCHED_SMS_RES_MII
302#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
303
644459d0 304struct gcc_target targetm = TARGET_INITIALIZER;
305
5df189be 306void
307spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
308{
5df189be 309 /* Override some of the default param values. With so many registers
310 larger values are better for these params. */
311 MAX_PENDING_LIST_LENGTH = 128;
312
313 /* With so many registers this is better on by default. */
314 flag_rename_registers = 1;
315}
316
644459d0 317/* Sometimes certain combinations of command options do not make sense
318 on a particular target machine. You can define a macro
319 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
320 executed once just after all the command options have been parsed. */
321void
322spu_override_options (void)
323{
14d408d9 324 /* Small loops will be unpeeled at -O3. For SPU it is more important
325 to keep code small by default. */
326 if (!flag_unroll_loops && !flag_peel_loops
327 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
328 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
329
644459d0 330 flag_omit_frame_pointer = 1;
331
332 if (align_functions < 8)
333 align_functions = 8;
c7b91b14 334
335 if (spu_fixed_range_string)
336 fix_range (spu_fixed_range_string);
5474166e 337
338 /* Determine processor architectural level. */
339 if (spu_arch_string)
340 {
341 if (strcmp (&spu_arch_string[0], "cell") == 0)
342 spu_arch = PROCESSOR_CELL;
343 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
344 spu_arch = PROCESSOR_CELLEDP;
345 else
346 error ("Unknown architecture '%s'", &spu_arch_string[0]);
347 }
348
349 /* Determine processor to tune for. */
350 if (spu_tune_string)
351 {
352 if (strcmp (&spu_tune_string[0], "cell") == 0)
353 spu_tune = PROCESSOR_CELL;
354 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
355 spu_tune = PROCESSOR_CELLEDP;
356 else
357 error ("Unknown architecture '%s'", &spu_tune_string[0]);
358 }
98bbec1e 359
360 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 361}
362\f
363/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
364 struct attribute_spec.handler. */
365
366/* Table of machine attributes. */
367const struct attribute_spec spu_attribute_table[] =
368{
369 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
370 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
371 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
372 { NULL, 0, 0, false, false, false, NULL }
373};
374
375/* True if MODE is valid for the target. By "valid", we mean able to
376 be manipulated in non-trivial ways. In particular, this means all
377 the arithmetic is supported. */
378static bool
379spu_scalar_mode_supported_p (enum machine_mode mode)
380{
381 switch (mode)
382 {
383 case QImode:
384 case HImode:
385 case SImode:
386 case SFmode:
387 case DImode:
388 case TImode:
389 case DFmode:
390 return true;
391
392 default:
393 return false;
394 }
395}
396
397/* Similarly for vector modes. "Supported" here is less strict. At
398 least some operations are supported; need to check optabs or builtins
399 for further details. */
400static bool
401spu_vector_mode_supported_p (enum machine_mode mode)
402{
403 switch (mode)
404 {
405 case V16QImode:
406 case V8HImode:
407 case V4SImode:
408 case V2DImode:
409 case V4SFmode:
410 case V2DFmode:
411 return true;
412
413 default:
414 return false;
415 }
416}
417
418/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
419 least significant bytes of the outer mode. This function returns
420 TRUE for the SUBREG's where this is correct. */
421int
422valid_subreg (rtx op)
423{
424 enum machine_mode om = GET_MODE (op);
425 enum machine_mode im = GET_MODE (SUBREG_REG (op));
426 return om != VOIDmode && im != VOIDmode
427 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 428 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
429 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 430}
431
432/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 433 and adjust the start offset. */
644459d0 434static rtx
435adjust_operand (rtx op, HOST_WIDE_INT * start)
436{
437 enum machine_mode mode;
438 int op_size;
38aca5eb 439 /* Strip any paradoxical SUBREG. */
440 if (GET_CODE (op) == SUBREG
441 && (GET_MODE_BITSIZE (GET_MODE (op))
442 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 443 {
444 if (start)
445 *start -=
446 GET_MODE_BITSIZE (GET_MODE (op)) -
447 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
448 op = SUBREG_REG (op);
449 }
450 /* If it is smaller than SI, assure a SUBREG */
451 op_size = GET_MODE_BITSIZE (GET_MODE (op));
452 if (op_size < 32)
453 {
454 if (start)
455 *start += 32 - op_size;
456 op_size = 32;
457 }
458 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
459 mode = mode_for_size (op_size, MODE_INT, 0);
460 if (mode != GET_MODE (op))
461 op = gen_rtx_SUBREG (mode, op, 0);
462 return op;
463}
464
465void
466spu_expand_extv (rtx ops[], int unsignedp)
467{
468 HOST_WIDE_INT width = INTVAL (ops[2]);
469 HOST_WIDE_INT start = INTVAL (ops[3]);
470 HOST_WIDE_INT src_size, dst_size;
471 enum machine_mode src_mode, dst_mode;
472 rtx dst = ops[0], src = ops[1];
473 rtx s;
474
475 dst = adjust_operand (ops[0], 0);
476 dst_mode = GET_MODE (dst);
477 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
478
644459d0 479 src = adjust_operand (src, &start);
480 src_mode = GET_MODE (src);
481 src_size = GET_MODE_BITSIZE (GET_MODE (src));
482
483 if (start > 0)
484 {
485 s = gen_reg_rtx (src_mode);
486 switch (src_mode)
487 {
488 case SImode:
489 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
490 break;
491 case DImode:
492 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
493 break;
494 case TImode:
495 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
496 break;
497 default:
498 abort ();
499 }
500 src = s;
501 }
502
503 if (width < src_size)
504 {
505 rtx pat;
506 int icode;
507 switch (src_mode)
508 {
509 case SImode:
510 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
511 break;
512 case DImode:
513 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
514 break;
515 case TImode:
516 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
517 break;
518 default:
519 abort ();
520 }
521 s = gen_reg_rtx (src_mode);
522 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
523 emit_insn (pat);
524 src = s;
525 }
526
527 convert_move (dst, src, unsignedp);
528}
529
530void
531spu_expand_insv (rtx ops[])
532{
533 HOST_WIDE_INT width = INTVAL (ops[1]);
534 HOST_WIDE_INT start = INTVAL (ops[2]);
535 HOST_WIDE_INT maskbits;
536 enum machine_mode dst_mode, src_mode;
537 rtx dst = ops[0], src = ops[3];
538 int dst_size, src_size;
539 rtx mask;
540 rtx shift_reg;
541 int shift;
542
543
544 if (GET_CODE (ops[0]) == MEM)
545 dst = gen_reg_rtx (TImode);
546 else
547 dst = adjust_operand (dst, &start);
548 dst_mode = GET_MODE (dst);
549 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
550
551 if (CONSTANT_P (src))
552 {
553 enum machine_mode m =
554 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
555 src = force_reg (m, convert_to_mode (m, src, 0));
556 }
557 src = adjust_operand (src, 0);
558 src_mode = GET_MODE (src);
559 src_size = GET_MODE_BITSIZE (GET_MODE (src));
560
561 mask = gen_reg_rtx (dst_mode);
562 shift_reg = gen_reg_rtx (dst_mode);
563 shift = dst_size - start - width;
564
565 /* It's not safe to use subreg here because the compiler assumes
566 that the SUBREG_REG is right justified in the SUBREG. */
567 convert_move (shift_reg, src, 1);
568
569 if (shift > 0)
570 {
571 switch (dst_mode)
572 {
573 case SImode:
574 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
575 break;
576 case DImode:
577 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
578 break;
579 case TImode:
580 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
581 break;
582 default:
583 abort ();
584 }
585 }
586 else if (shift < 0)
587 abort ();
588
589 switch (dst_size)
590 {
591 case 32:
592 maskbits = (-1ll << (32 - width - start));
593 if (start)
594 maskbits += (1ll << (32 - start));
595 emit_move_insn (mask, GEN_INT (maskbits));
596 break;
597 case 64:
598 maskbits = (-1ll << (64 - width - start));
599 if (start)
600 maskbits += (1ll << (64 - start));
601 emit_move_insn (mask, GEN_INT (maskbits));
602 break;
603 case 128:
604 {
605 unsigned char arr[16];
606 int i = start / 8;
607 memset (arr, 0, sizeof (arr));
608 arr[i] = 0xff >> (start & 7);
609 for (i++; i <= (start + width - 1) / 8; i++)
610 arr[i] = 0xff;
611 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
612 emit_move_insn (mask, array_to_constant (TImode, arr));
613 }
614 break;
615 default:
616 abort ();
617 }
618 if (GET_CODE (ops[0]) == MEM)
619 {
620 rtx aligned = gen_reg_rtx (SImode);
621 rtx low = gen_reg_rtx (SImode);
622 rtx addr = gen_reg_rtx (SImode);
623 rtx rotl = gen_reg_rtx (SImode);
624 rtx mask0 = gen_reg_rtx (TImode);
625 rtx mem;
626
627 emit_move_insn (addr, XEXP (ops[0], 0));
628 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
629 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
630 emit_insn (gen_negsi2 (rotl, low));
631 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
632 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
633 mem = change_address (ops[0], TImode, aligned);
634 set_mem_alias_set (mem, 0);
635 emit_move_insn (dst, mem);
636 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
637 emit_move_insn (mem, dst);
638 if (start + width > MEM_ALIGN (ops[0]))
639 {
640 rtx shl = gen_reg_rtx (SImode);
641 rtx mask1 = gen_reg_rtx (TImode);
642 rtx dst1 = gen_reg_rtx (TImode);
643 rtx mem1;
644 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
645 emit_insn (gen_shlqby_ti (mask1, mask, shl));
646 mem1 = adjust_address (mem, TImode, 16);
647 set_mem_alias_set (mem1, 0);
648 emit_move_insn (dst1, mem1);
649 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
650 emit_move_insn (mem1, dst1);
651 }
652 }
653 else
71cd778d 654 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 655}
656
657
658int
659spu_expand_block_move (rtx ops[])
660{
661 HOST_WIDE_INT bytes, align, offset;
662 rtx src, dst, sreg, dreg, target;
663 int i;
664 if (GET_CODE (ops[2]) != CONST_INT
665 || GET_CODE (ops[3]) != CONST_INT
48eb4342 666 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 667 return 0;
668
669 bytes = INTVAL (ops[2]);
670 align = INTVAL (ops[3]);
671
672 if (bytes <= 0)
673 return 1;
674
675 dst = ops[0];
676 src = ops[1];
677
678 if (align == 16)
679 {
680 for (offset = 0; offset + 16 <= bytes; offset += 16)
681 {
682 dst = adjust_address (ops[0], V16QImode, offset);
683 src = adjust_address (ops[1], V16QImode, offset);
684 emit_move_insn (dst, src);
685 }
686 if (offset < bytes)
687 {
688 rtx mask;
689 unsigned char arr[16] = { 0 };
690 for (i = 0; i < bytes - offset; i++)
691 arr[i] = 0xff;
692 dst = adjust_address (ops[0], V16QImode, offset);
693 src = adjust_address (ops[1], V16QImode, offset);
694 mask = gen_reg_rtx (V16QImode);
695 sreg = gen_reg_rtx (V16QImode);
696 dreg = gen_reg_rtx (V16QImode);
697 target = gen_reg_rtx (V16QImode);
698 emit_move_insn (mask, array_to_constant (V16QImode, arr));
699 emit_move_insn (dreg, dst);
700 emit_move_insn (sreg, src);
701 emit_insn (gen_selb (target, dreg, sreg, mask));
702 emit_move_insn (dst, target);
703 }
704 return 1;
705 }
706 return 0;
707}
708
709enum spu_comp_code
710{ SPU_EQ, SPU_GT, SPU_GTU };
711
5474166e 712int spu_comp_icode[12][3] = {
713 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
714 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
715 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
716 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
717 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
718 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
719 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
720 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
721 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
722 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
723 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
724 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 725};
726
727/* Generate a compare for CODE. Return a brand-new rtx that represents
728 the result of the compare. GCC can figure this out too if we don't
729 provide all variations of compares, but GCC always wants to use
730 WORD_MODE, we can generate better code in most cases if we do it
731 ourselves. */
732void
733spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
734{
735 int reverse_compare = 0;
736 int reverse_test = 0;
5d70b918 737 rtx compare_result, eq_result;
738 rtx comp_rtx, eq_rtx;
644459d0 739 rtx target = operands[0];
740 enum machine_mode comp_mode;
741 enum machine_mode op_mode;
5d70b918 742 enum spu_comp_code scode, eq_code, ior_code;
644459d0 743 int index;
5d70b918 744 int eq_test = 0;
644459d0 745
746 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
747 and so on, to keep the constant in operand 1. */
748 if (GET_CODE (spu_compare_op1) == CONST_INT)
749 {
750 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
751 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
752 switch (code)
753 {
754 case GE:
755 spu_compare_op1 = GEN_INT (val);
756 code = GT;
757 break;
758 case LT:
759 spu_compare_op1 = GEN_INT (val);
760 code = LE;
761 break;
762 case GEU:
763 spu_compare_op1 = GEN_INT (val);
764 code = GTU;
765 break;
766 case LTU:
767 spu_compare_op1 = GEN_INT (val);
768 code = LEU;
769 break;
770 default:
771 break;
772 }
773 }
774
5d70b918 775 comp_mode = SImode;
776 op_mode = GET_MODE (spu_compare_op0);
777
644459d0 778 switch (code)
779 {
780 case GE:
644459d0 781 scode = SPU_GT;
07027691 782 if (HONOR_NANS (op_mode))
5d70b918 783 {
784 reverse_compare = 0;
785 reverse_test = 0;
786 eq_test = 1;
787 eq_code = SPU_EQ;
788 }
789 else
790 {
791 reverse_compare = 1;
792 reverse_test = 1;
793 }
644459d0 794 break;
795 case LE:
644459d0 796 scode = SPU_GT;
07027691 797 if (HONOR_NANS (op_mode))
5d70b918 798 {
799 reverse_compare = 1;
800 reverse_test = 0;
801 eq_test = 1;
802 eq_code = SPU_EQ;
803 }
804 else
805 {
806 reverse_compare = 0;
807 reverse_test = 1;
808 }
644459d0 809 break;
810 case LT:
811 reverse_compare = 1;
812 reverse_test = 0;
813 scode = SPU_GT;
814 break;
815 case GEU:
816 reverse_compare = 1;
817 reverse_test = 1;
818 scode = SPU_GTU;
819 break;
820 case LEU:
821 reverse_compare = 0;
822 reverse_test = 1;
823 scode = SPU_GTU;
824 break;
825 case LTU:
826 reverse_compare = 1;
827 reverse_test = 0;
828 scode = SPU_GTU;
829 break;
830 case NE:
831 reverse_compare = 0;
832 reverse_test = 1;
833 scode = SPU_EQ;
834 break;
835
836 case EQ:
837 scode = SPU_EQ;
838 break;
839 case GT:
840 scode = SPU_GT;
841 break;
842 case GTU:
843 scode = SPU_GTU;
844 break;
845 default:
846 scode = SPU_EQ;
847 break;
848 }
849
644459d0 850 switch (op_mode)
851 {
852 case QImode:
853 index = 0;
854 comp_mode = QImode;
855 break;
856 case HImode:
857 index = 1;
858 comp_mode = HImode;
859 break;
860 case SImode:
861 index = 2;
862 break;
863 case DImode:
864 index = 3;
865 break;
866 case TImode:
867 index = 4;
868 break;
869 case SFmode:
870 index = 5;
871 break;
872 case DFmode:
873 index = 6;
874 break;
875 case V16QImode:
5474166e 876 index = 7;
877 comp_mode = op_mode;
878 break;
644459d0 879 case V8HImode:
5474166e 880 index = 8;
881 comp_mode = op_mode;
882 break;
644459d0 883 case V4SImode:
5474166e 884 index = 9;
885 comp_mode = op_mode;
886 break;
644459d0 887 case V4SFmode:
5474166e 888 index = 10;
889 comp_mode = V4SImode;
890 break;
644459d0 891 case V2DFmode:
5474166e 892 index = 11;
893 comp_mode = V2DImode;
644459d0 894 break;
5474166e 895 case V2DImode:
644459d0 896 default:
897 abort ();
898 }
899
07027691 900 if (GET_MODE (spu_compare_op1) == DFmode
901 && (scode != SPU_GT && scode != SPU_EQ))
902 abort ();
644459d0 903
904 if (is_set == 0 && spu_compare_op1 == const0_rtx
905 && (GET_MODE (spu_compare_op0) == SImode
906 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
907 {
908 /* Don't need to set a register with the result when we are
909 comparing against zero and branching. */
910 reverse_test = !reverse_test;
911 compare_result = spu_compare_op0;
912 }
913 else
914 {
915 compare_result = gen_reg_rtx (comp_mode);
916
917 if (reverse_compare)
918 {
919 rtx t = spu_compare_op1;
920 spu_compare_op1 = spu_compare_op0;
921 spu_compare_op0 = t;
922 }
923
924 if (spu_comp_icode[index][scode] == 0)
925 abort ();
926
927 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
928 (spu_compare_op0, op_mode))
929 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
930 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
931 (spu_compare_op1, op_mode))
932 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
933 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
934 spu_compare_op0,
935 spu_compare_op1);
936 if (comp_rtx == 0)
937 abort ();
938 emit_insn (comp_rtx);
939
5d70b918 940 if (eq_test)
941 {
942 eq_result = gen_reg_rtx (comp_mode);
943 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
944 spu_compare_op0,
945 spu_compare_op1);
946 if (eq_rtx == 0)
947 abort ();
948 emit_insn (eq_rtx);
949 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
950 gcc_assert (ior_code != CODE_FOR_nothing);
951 emit_insn (GEN_FCN (ior_code)
952 (compare_result, compare_result, eq_result));
953 }
644459d0 954 }
955
956 if (is_set == 0)
957 {
958 rtx bcomp;
959 rtx loc_ref;
960
961 /* We don't have branch on QI compare insns, so we convert the
962 QI compare result to a HI result. */
963 if (comp_mode == QImode)
964 {
965 rtx old_res = compare_result;
966 compare_result = gen_reg_rtx (HImode);
967 comp_mode = HImode;
968 emit_insn (gen_extendqihi2 (compare_result, old_res));
969 }
970
971 if (reverse_test)
972 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
973 else
974 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
975
976 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
977 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
978 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
979 loc_ref, pc_rtx)));
980 }
981 else if (is_set == 2)
982 {
983 int compare_size = GET_MODE_BITSIZE (comp_mode);
984 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
985 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
986 rtx select_mask;
987 rtx op_t = operands[2];
988 rtx op_f = operands[3];
989
990 /* The result of the comparison can be SI, HI or QI mode. Create a
991 mask based on that result. */
992 if (target_size > compare_size)
993 {
994 select_mask = gen_reg_rtx (mode);
995 emit_insn (gen_extend_compare (select_mask, compare_result));
996 }
997 else if (target_size < compare_size)
998 select_mask =
999 gen_rtx_SUBREG (mode, compare_result,
1000 (compare_size - target_size) / BITS_PER_UNIT);
1001 else if (comp_mode != mode)
1002 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1003 else
1004 select_mask = compare_result;
1005
1006 if (GET_MODE (target) != GET_MODE (op_t)
1007 || GET_MODE (target) != GET_MODE (op_f))
1008 abort ();
1009
1010 if (reverse_test)
1011 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1012 else
1013 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1014 }
1015 else
1016 {
1017 if (reverse_test)
1018 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1019 gen_rtx_NOT (comp_mode, compare_result)));
1020 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1021 emit_insn (gen_extendhisi2 (target, compare_result));
1022 else if (GET_MODE (target) == SImode
1023 && GET_MODE (compare_result) == QImode)
1024 emit_insn (gen_extend_compare (target, compare_result));
1025 else
1026 emit_move_insn (target, compare_result);
1027 }
1028}
1029
1030HOST_WIDE_INT
1031const_double_to_hwint (rtx x)
1032{
1033 HOST_WIDE_INT val;
1034 REAL_VALUE_TYPE rv;
1035 if (GET_MODE (x) == SFmode)
1036 {
1037 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1038 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1039 }
1040 else if (GET_MODE (x) == DFmode)
1041 {
1042 long l[2];
1043 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1044 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1045 val = l[0];
1046 val = (val << 32) | (l[1] & 0xffffffff);
1047 }
1048 else
1049 abort ();
1050 return val;
1051}
1052
1053rtx
1054hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1055{
1056 long tv[2];
1057 REAL_VALUE_TYPE rv;
1058 gcc_assert (mode == SFmode || mode == DFmode);
1059
1060 if (mode == SFmode)
1061 tv[0] = (v << 32) >> 32;
1062 else if (mode == DFmode)
1063 {
1064 tv[1] = (v << 32) >> 32;
1065 tv[0] = v >> 32;
1066 }
1067 real_from_target (&rv, tv, mode);
1068 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1069}
1070
1071void
1072print_operand_address (FILE * file, register rtx addr)
1073{
1074 rtx reg;
1075 rtx offset;
1076
e04cf423 1077 if (GET_CODE (addr) == AND
1078 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1079 && INTVAL (XEXP (addr, 1)) == -16)
1080 addr = XEXP (addr, 0);
1081
644459d0 1082 switch (GET_CODE (addr))
1083 {
1084 case REG:
1085 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1086 break;
1087
1088 case PLUS:
1089 reg = XEXP (addr, 0);
1090 offset = XEXP (addr, 1);
1091 if (GET_CODE (offset) == REG)
1092 {
1093 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1094 reg_names[REGNO (offset)]);
1095 }
1096 else if (GET_CODE (offset) == CONST_INT)
1097 {
1098 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1099 INTVAL (offset), reg_names[REGNO (reg)]);
1100 }
1101 else
1102 abort ();
1103 break;
1104
1105 case CONST:
1106 case LABEL_REF:
1107 case SYMBOL_REF:
1108 case CONST_INT:
1109 output_addr_const (file, addr);
1110 break;
1111
1112 default:
1113 debug_rtx (addr);
1114 abort ();
1115 }
1116}
1117
1118void
1119print_operand (FILE * file, rtx x, int code)
1120{
1121 enum machine_mode mode = GET_MODE (x);
1122 HOST_WIDE_INT val;
1123 unsigned char arr[16];
1124 int xcode = GET_CODE (x);
dea01258 1125 int i, info;
644459d0 1126 if (GET_MODE (x) == VOIDmode)
1127 switch (code)
1128 {
644459d0 1129 case 'L': /* 128 bits, signed */
1130 case 'm': /* 128 bits, signed */
1131 case 'T': /* 128 bits, signed */
1132 case 't': /* 128 bits, signed */
1133 mode = TImode;
1134 break;
644459d0 1135 case 'K': /* 64 bits, signed */
1136 case 'k': /* 64 bits, signed */
1137 case 'D': /* 64 bits, signed */
1138 case 'd': /* 64 bits, signed */
1139 mode = DImode;
1140 break;
644459d0 1141 case 'J': /* 32 bits, signed */
1142 case 'j': /* 32 bits, signed */
1143 case 's': /* 32 bits, signed */
1144 case 'S': /* 32 bits, signed */
1145 mode = SImode;
1146 break;
1147 }
1148 switch (code)
1149 {
1150
1151 case 'j': /* 32 bits, signed */
1152 case 'k': /* 64 bits, signed */
1153 case 'm': /* 128 bits, signed */
1154 if (xcode == CONST_INT
1155 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1156 {
1157 gcc_assert (logical_immediate_p (x, mode));
1158 constant_to_array (mode, x, arr);
1159 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1160 val = trunc_int_for_mode (val, SImode);
1161 switch (which_logical_immediate (val))
1162 {
1163 case SPU_ORI:
1164 break;
1165 case SPU_ORHI:
1166 fprintf (file, "h");
1167 break;
1168 case SPU_ORBI:
1169 fprintf (file, "b");
1170 break;
1171 default:
1172 gcc_unreachable();
1173 }
1174 }
1175 else
1176 gcc_unreachable();
1177 return;
1178
1179 case 'J': /* 32 bits, signed */
1180 case 'K': /* 64 bits, signed */
1181 case 'L': /* 128 bits, signed */
1182 if (xcode == CONST_INT
1183 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1184 {
1185 gcc_assert (logical_immediate_p (x, mode)
1186 || iohl_immediate_p (x, mode));
1187 constant_to_array (mode, x, arr);
1188 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1189 val = trunc_int_for_mode (val, SImode);
1190 switch (which_logical_immediate (val))
1191 {
1192 case SPU_ORI:
1193 case SPU_IOHL:
1194 break;
1195 case SPU_ORHI:
1196 val = trunc_int_for_mode (val, HImode);
1197 break;
1198 case SPU_ORBI:
1199 val = trunc_int_for_mode (val, QImode);
1200 break;
1201 default:
1202 gcc_unreachable();
1203 }
1204 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1205 }
1206 else
1207 gcc_unreachable();
1208 return;
1209
1210 case 't': /* 128 bits, signed */
1211 case 'd': /* 64 bits, signed */
1212 case 's': /* 32 bits, signed */
dea01258 1213 if (CONSTANT_P (x))
644459d0 1214 {
dea01258 1215 enum immediate_class c = classify_immediate (x, mode);
1216 switch (c)
1217 {
1218 case IC_IL1:
1219 constant_to_array (mode, x, arr);
1220 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1221 val = trunc_int_for_mode (val, SImode);
1222 switch (which_immediate_load (val))
1223 {
1224 case SPU_IL:
1225 break;
1226 case SPU_ILA:
1227 fprintf (file, "a");
1228 break;
1229 case SPU_ILH:
1230 fprintf (file, "h");
1231 break;
1232 case SPU_ILHU:
1233 fprintf (file, "hu");
1234 break;
1235 default:
1236 gcc_unreachable ();
1237 }
1238 break;
1239 case IC_CPAT:
1240 constant_to_array (mode, x, arr);
1241 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1242 if (info == 1)
1243 fprintf (file, "b");
1244 else if (info == 2)
1245 fprintf (file, "h");
1246 else if (info == 4)
1247 fprintf (file, "w");
1248 else if (info == 8)
1249 fprintf (file, "d");
1250 break;
1251 case IC_IL1s:
1252 if (xcode == CONST_VECTOR)
1253 {
1254 x = CONST_VECTOR_ELT (x, 0);
1255 xcode = GET_CODE (x);
1256 }
1257 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1258 fprintf (file, "a");
1259 else if (xcode == HIGH)
1260 fprintf (file, "hu");
1261 break;
1262 case IC_FSMBI:
5df189be 1263 case IC_FSMBI2:
dea01258 1264 case IC_IL2:
1265 case IC_IL2s:
1266 case IC_POOL:
1267 abort ();
1268 }
644459d0 1269 }
644459d0 1270 else
1271 gcc_unreachable ();
1272 return;
1273
1274 case 'T': /* 128 bits, signed */
1275 case 'D': /* 64 bits, signed */
1276 case 'S': /* 32 bits, signed */
dea01258 1277 if (CONSTANT_P (x))
644459d0 1278 {
dea01258 1279 enum immediate_class c = classify_immediate (x, mode);
1280 switch (c)
644459d0 1281 {
dea01258 1282 case IC_IL1:
1283 constant_to_array (mode, x, arr);
1284 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1285 val = trunc_int_for_mode (val, SImode);
1286 switch (which_immediate_load (val))
1287 {
1288 case SPU_IL:
1289 case SPU_ILA:
1290 break;
1291 case SPU_ILH:
1292 case SPU_ILHU:
1293 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1294 break;
1295 default:
1296 gcc_unreachable ();
1297 }
1298 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1299 break;
1300 case IC_FSMBI:
1301 constant_to_array (mode, x, arr);
1302 val = 0;
1303 for (i = 0; i < 16; i++)
1304 {
1305 val <<= 1;
1306 val |= arr[i] & 1;
1307 }
1308 print_operand (file, GEN_INT (val), 0);
1309 break;
1310 case IC_CPAT:
1311 constant_to_array (mode, x, arr);
1312 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1313 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1314 break;
dea01258 1315 case IC_IL1s:
dea01258 1316 if (xcode == HIGH)
5df189be 1317 x = XEXP (x, 0);
1318 if (GET_CODE (x) == CONST_VECTOR)
1319 x = CONST_VECTOR_ELT (x, 0);
1320 output_addr_const (file, x);
1321 if (xcode == HIGH)
1322 fprintf (file, "@h");
644459d0 1323 break;
dea01258 1324 case IC_IL2:
1325 case IC_IL2s:
5df189be 1326 case IC_FSMBI2:
dea01258 1327 case IC_POOL:
1328 abort ();
644459d0 1329 }
c8befdb9 1330 }
644459d0 1331 else
1332 gcc_unreachable ();
1333 return;
1334
644459d0 1335 case 'C':
1336 if (xcode == CONST_INT)
1337 {
1338 /* Only 4 least significant bits are relevant for generate
1339 control word instructions. */
1340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1341 return;
1342 }
1343 break;
1344
1345 case 'M': /* print code for c*d */
1346 if (GET_CODE (x) == CONST_INT)
1347 switch (INTVAL (x))
1348 {
1349 case 1:
1350 fprintf (file, "b");
1351 break;
1352 case 2:
1353 fprintf (file, "h");
1354 break;
1355 case 4:
1356 fprintf (file, "w");
1357 break;
1358 case 8:
1359 fprintf (file, "d");
1360 break;
1361 default:
1362 gcc_unreachable();
1363 }
1364 else
1365 gcc_unreachable();
1366 return;
1367
1368 case 'N': /* Negate the operand */
1369 if (xcode == CONST_INT)
1370 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1371 else if (xcode == CONST_VECTOR)
1372 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1373 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1374 return;
1375
1376 case 'I': /* enable/disable interrupts */
1377 if (xcode == CONST_INT)
1378 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1379 return;
1380
1381 case 'b': /* branch modifiers */
1382 if (xcode == REG)
1383 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1384 else if (COMPARISON_P (x))
1385 fprintf (file, "%s", xcode == NE ? "n" : "");
1386 return;
1387
1388 case 'i': /* indirect call */
1389 if (xcode == MEM)
1390 {
1391 if (GET_CODE (XEXP (x, 0)) == REG)
1392 /* Used in indirect function calls. */
1393 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1394 else
1395 output_address (XEXP (x, 0));
1396 }
1397 return;
1398
1399 case 'p': /* load/store */
1400 if (xcode == MEM)
1401 {
1402 x = XEXP (x, 0);
1403 xcode = GET_CODE (x);
1404 }
e04cf423 1405 if (xcode == AND)
1406 {
1407 x = XEXP (x, 0);
1408 xcode = GET_CODE (x);
1409 }
644459d0 1410 if (xcode == REG)
1411 fprintf (file, "d");
1412 else if (xcode == CONST_INT)
1413 fprintf (file, "a");
1414 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1415 fprintf (file, "r");
1416 else if (xcode == PLUS || xcode == LO_SUM)
1417 {
1418 if (GET_CODE (XEXP (x, 1)) == REG)
1419 fprintf (file, "x");
1420 else
1421 fprintf (file, "d");
1422 }
1423 return;
1424
5df189be 1425 case 'e':
1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427 val &= 0x7;
1428 output_addr_const (file, GEN_INT (val));
1429 return;
1430
1431 case 'f':
1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433 val &= 0x1f;
1434 output_addr_const (file, GEN_INT (val));
1435 return;
1436
1437 case 'g':
1438 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1439 val &= 0x3f;
1440 output_addr_const (file, GEN_INT (val));
1441 return;
1442
1443 case 'h':
1444 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1445 val = (val >> 3) & 0x1f;
1446 output_addr_const (file, GEN_INT (val));
1447 return;
1448
1449 case 'E':
1450 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1451 val = -val;
1452 val &= 0x7;
1453 output_addr_const (file, GEN_INT (val));
1454 return;
1455
1456 case 'F':
1457 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1458 val = -val;
1459 val &= 0x1f;
1460 output_addr_const (file, GEN_INT (val));
1461 return;
1462
1463 case 'G':
1464 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1465 val = -val;
1466 val &= 0x3f;
1467 output_addr_const (file, GEN_INT (val));
1468 return;
1469
1470 case 'H':
1471 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1472 val = -(val & -8ll);
1473 val = (val >> 3) & 0x1f;
1474 output_addr_const (file, GEN_INT (val));
1475 return;
1476
644459d0 1477 case 0:
1478 if (xcode == REG)
1479 fprintf (file, "%s", reg_names[REGNO (x)]);
1480 else if (xcode == MEM)
1481 output_address (XEXP (x, 0));
1482 else if (xcode == CONST_VECTOR)
dea01258 1483 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1484 else
1485 output_addr_const (file, x);
1486 return;
1487
f6a0d06f 1488 /* unused letters
5df189be 1489 o qr uvw yz
1490 AB OPQR UVWXYZ */
644459d0 1491 default:
1492 output_operand_lossage ("invalid %%xn code");
1493 }
1494 gcc_unreachable ();
1495}
1496
1497extern char call_used_regs[];
644459d0 1498
1499/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1500 caller saved register. For leaf functions it is more efficient to
1501 use a volatile register because we won't need to save and restore the
1502 pic register. This routine is only valid after register allocation
1503 is completed, so we can pick an unused register. */
1504static rtx
1505get_pic_reg (void)
1506{
1507 rtx pic_reg = pic_offset_table_rtx;
1508 if (!reload_completed && !reload_in_progress)
1509 abort ();
1510 return pic_reg;
1511}
1512
5df189be 1513/* Split constant addresses to handle cases that are too large.
1514 Add in the pic register when in PIC mode.
1515 Split immediates that require more than 1 instruction. */
dea01258 1516int
1517spu_split_immediate (rtx * ops)
c8befdb9 1518{
dea01258 1519 enum machine_mode mode = GET_MODE (ops[0]);
1520 enum immediate_class c = classify_immediate (ops[1], mode);
1521
1522 switch (c)
c8befdb9 1523 {
dea01258 1524 case IC_IL2:
1525 {
1526 unsigned char arrhi[16];
1527 unsigned char arrlo[16];
98bbec1e 1528 rtx to, temp, hi, lo;
dea01258 1529 int i;
98bbec1e 1530 enum machine_mode imode = mode;
1531 /* We need to do reals as ints because the constant used in the
1532 IOR might not be a legitimate real constant. */
1533 imode = int_mode_for_mode (mode);
dea01258 1534 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1535 if (imode != mode)
1536 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1537 else
1538 to = ops[0];
1539 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1540 for (i = 0; i < 16; i += 4)
1541 {
1542 arrlo[i + 2] = arrhi[i + 2];
1543 arrlo[i + 3] = arrhi[i + 3];
1544 arrlo[i + 0] = arrlo[i + 1] = 0;
1545 arrhi[i + 2] = arrhi[i + 3] = 0;
1546 }
98bbec1e 1547 hi = array_to_constant (imode, arrhi);
1548 lo = array_to_constant (imode, arrlo);
1549 emit_move_insn (temp, hi);
dea01258 1550 emit_insn (gen_rtx_SET
98bbec1e 1551 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1552 return 1;
1553 }
5df189be 1554 case IC_FSMBI2:
1555 {
1556 unsigned char arr_fsmbi[16];
1557 unsigned char arr_andbi[16];
1558 rtx to, reg_fsmbi, reg_and;
1559 int i;
1560 enum machine_mode imode = mode;
1561 /* We need to do reals as ints because the constant used in the
1562 * AND might not be a legitimate real constant. */
1563 imode = int_mode_for_mode (mode);
1564 constant_to_array (mode, ops[1], arr_fsmbi);
1565 if (imode != mode)
1566 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1567 else
1568 to = ops[0];
1569 for (i = 0; i < 16; i++)
1570 if (arr_fsmbi[i] != 0)
1571 {
1572 arr_andbi[0] = arr_fsmbi[i];
1573 arr_fsmbi[i] = 0xff;
1574 }
1575 for (i = 1; i < 16; i++)
1576 arr_andbi[i] = arr_andbi[0];
1577 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1578 reg_and = array_to_constant (imode, arr_andbi);
1579 emit_move_insn (to, reg_fsmbi);
1580 emit_insn (gen_rtx_SET
1581 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1582 return 1;
1583 }
dea01258 1584 case IC_POOL:
1585 if (reload_in_progress || reload_completed)
1586 {
1587 rtx mem = force_const_mem (mode, ops[1]);
1588 if (TARGET_LARGE_MEM)
1589 {
1590 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1591 emit_move_insn (addr, XEXP (mem, 0));
1592 mem = replace_equiv_address (mem, addr);
1593 }
1594 emit_move_insn (ops[0], mem);
1595 return 1;
1596 }
1597 break;
1598 case IC_IL1s:
1599 case IC_IL2s:
1600 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1601 {
1602 if (c == IC_IL2s)
1603 {
5df189be 1604 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1605 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1606 }
1607 else if (flag_pic)
1608 emit_insn (gen_pic (ops[0], ops[1]));
1609 if (flag_pic)
1610 {
1611 rtx pic_reg = get_pic_reg ();
1612 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1613 crtl->uses_pic_offset_table = 1;
dea01258 1614 }
1615 return flag_pic || c == IC_IL2s;
1616 }
1617 break;
1618 case IC_IL1:
1619 case IC_FSMBI:
1620 case IC_CPAT:
1621 break;
c8befdb9 1622 }
dea01258 1623 return 0;
c8befdb9 1624}
1625
644459d0 1626/* SAVING is TRUE when we are generating the actual load and store
1627 instructions for REGNO. When determining the size of the stack
1628 needed for saving register we must allocate enough space for the
1629 worst case, because we don't always have the information early enough
1630 to not allocate it. But we can at least eliminate the actual loads
1631 and stores during the prologue/epilogue. */
1632static int
1633need_to_save_reg (int regno, int saving)
1634{
3072d30e 1635 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1636 return 1;
1637 if (flag_pic
1638 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1639 && (!saving || crtl->uses_pic_offset_table)
644459d0 1640 && (!saving
3072d30e 1641 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1642 return 1;
1643 return 0;
1644}
1645
1646/* This function is only correct starting with local register
1647 allocation */
1648int
1649spu_saved_regs_size (void)
1650{
1651 int reg_save_size = 0;
1652 int regno;
1653
1654 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1655 if (need_to_save_reg (regno, 0))
1656 reg_save_size += 0x10;
1657 return reg_save_size;
1658}
1659
1660static rtx
1661frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1662{
1663 rtx reg = gen_rtx_REG (V4SImode, regno);
1664 rtx mem =
1665 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1666 return emit_insn (gen_movv4si (mem, reg));
1667}
1668
1669static rtx
1670frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1671{
1672 rtx reg = gen_rtx_REG (V4SImode, regno);
1673 rtx mem =
1674 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1675 return emit_insn (gen_movv4si (reg, mem));
1676}
1677
1678/* This happens after reload, so we need to expand it. */
1679static rtx
1680frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1681{
1682 rtx insn;
1683 if (satisfies_constraint_K (GEN_INT (imm)))
1684 {
1685 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1686 }
1687 else
1688 {
3072d30e 1689 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1690 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1691 if (REGNO (src) == REGNO (scratch))
1692 abort ();
1693 }
644459d0 1694 return insn;
1695}
1696
1697/* Return nonzero if this function is known to have a null epilogue. */
1698
1699int
1700direct_return (void)
1701{
1702 if (reload_completed)
1703 {
1704 if (cfun->static_chain_decl == 0
1705 && (spu_saved_regs_size ()
1706 + get_frame_size ()
abe32cce 1707 + crtl->outgoing_args_size
1708 + crtl->args.pretend_args_size == 0)
644459d0 1709 && current_function_is_leaf)
1710 return 1;
1711 }
1712 return 0;
1713}
1714
1715/*
1716 The stack frame looks like this:
1717 +-------------+
1718 | incoming |
1719 AP | args |
1720 +-------------+
1721 | $lr save |
1722 +-------------+
1723 prev SP | back chain |
1724 +-------------+
1725 | var args |
abe32cce 1726 | reg save | crtl->args.pretend_args_size bytes
644459d0 1727 +-------------+
1728 | ... |
1729 | saved regs | spu_saved_regs_size() bytes
1730 +-------------+
1731 | ... |
1732 FP | vars | get_frame_size() bytes
1733 +-------------+
1734 | ... |
1735 | outgoing |
abe32cce 1736 | args | crtl->outgoing_args_size bytes
644459d0 1737 +-------------+
1738 | $lr of next |
1739 | frame |
1740 +-------------+
1741 SP | back chain |
1742 +-------------+
1743
1744*/
1745void
1746spu_expand_prologue (void)
1747{
1748 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1749 HOST_WIDE_INT total_size;
1750 HOST_WIDE_INT saved_regs_size;
1751 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1752 rtx scratch_reg_0, scratch_reg_1;
1753 rtx insn, real;
1754
1755 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1756 the "toplevel" insn chain. */
1757 emit_note (NOTE_INSN_DELETED);
1758
1759 if (flag_pic && optimize == 0)
18d50ae6 1760 crtl->uses_pic_offset_table = 1;
644459d0 1761
1762 if (spu_naked_function_p (current_function_decl))
1763 return;
1764
1765 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1766 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1767
1768 saved_regs_size = spu_saved_regs_size ();
1769 total_size = size + saved_regs_size
abe32cce 1770 + crtl->outgoing_args_size
1771 + crtl->args.pretend_args_size;
644459d0 1772
1773 if (!current_function_is_leaf
18d50ae6 1774 || cfun->calls_alloca || total_size > 0)
644459d0 1775 total_size += STACK_POINTER_OFFSET;
1776
1777 /* Save this first because code after this might use the link
1778 register as a scratch register. */
1779 if (!current_function_is_leaf)
1780 {
1781 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1782 RTX_FRAME_RELATED_P (insn) = 1;
1783 }
1784
1785 if (total_size > 0)
1786 {
abe32cce 1787 offset = -crtl->args.pretend_args_size;
644459d0 1788 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1789 if (need_to_save_reg (regno, 1))
1790 {
1791 offset -= 16;
1792 insn = frame_emit_store (regno, sp_reg, offset);
1793 RTX_FRAME_RELATED_P (insn) = 1;
1794 }
1795 }
1796
18d50ae6 1797 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1798 {
1799 rtx pic_reg = get_pic_reg ();
1800 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1801 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1802 }
1803
1804 if (total_size > 0)
1805 {
1806 if (flag_stack_check)
1807 {
d819917f 1808 /* We compare against total_size-1 because
644459d0 1809 ($sp >= total_size) <=> ($sp > total_size-1) */
1810 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1811 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1812 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1813 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1814 {
1815 emit_move_insn (scratch_v4si, size_v4si);
1816 size_v4si = scratch_v4si;
1817 }
1818 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1819 emit_insn (gen_vec_extractv4si
1820 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1821 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1822 }
1823
1824 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1825 the value of the previous $sp because we save it as the back
1826 chain. */
1827 if (total_size <= 2000)
1828 {
1829 /* In this case we save the back chain first. */
1830 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1831 insn =
1832 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1833 }
1834 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1835 {
1836 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1837 insn =
1838 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1839 }
1840 else
1841 {
1842 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1843 insn =
1844 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1845 }
1846 RTX_FRAME_RELATED_P (insn) = 1;
1847 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1848 REG_NOTES (insn) =
1849 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1850
1851 if (total_size > 2000)
1852 {
1853 /* Save the back chain ptr */
1854 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1855 }
1856
1857 if (frame_pointer_needed)
1858 {
1859 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1860 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1861 + crtl->outgoing_args_size;
644459d0 1862 /* Set the new frame_pointer */
d8dfeb55 1863 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1864 RTX_FRAME_RELATED_P (insn) = 1;
1865 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1866 REG_NOTES (insn) =
1867 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1868 real, REG_NOTES (insn));
5df189be 1869 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1870 }
1871 }
1872
1873 emit_note (NOTE_INSN_DELETED);
1874}
1875
1876void
1877spu_expand_epilogue (bool sibcall_p)
1878{
1879 int size = get_frame_size (), offset, regno;
1880 HOST_WIDE_INT saved_regs_size, total_size;
1881 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1882 rtx jump, scratch_reg_0;
1883
1884 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1885 the "toplevel" insn chain. */
1886 emit_note (NOTE_INSN_DELETED);
1887
1888 if (spu_naked_function_p (current_function_decl))
1889 return;
1890
1891 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1892
1893 saved_regs_size = spu_saved_regs_size ();
1894 total_size = size + saved_regs_size
abe32cce 1895 + crtl->outgoing_args_size
1896 + crtl->args.pretend_args_size;
644459d0 1897
1898 if (!current_function_is_leaf
18d50ae6 1899 || cfun->calls_alloca || total_size > 0)
644459d0 1900 total_size += STACK_POINTER_OFFSET;
1901
1902 if (total_size > 0)
1903 {
18d50ae6 1904 if (cfun->calls_alloca)
644459d0 1905 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1906 else
1907 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1908
1909
1910 if (saved_regs_size > 0)
1911 {
abe32cce 1912 offset = -crtl->args.pretend_args_size;
644459d0 1913 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1914 if (need_to_save_reg (regno, 1))
1915 {
1916 offset -= 0x10;
1917 frame_emit_load (regno, sp_reg, offset);
1918 }
1919 }
1920 }
1921
1922 if (!current_function_is_leaf)
1923 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1924
1925 if (!sibcall_p)
1926 {
18b42941 1927 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1928 jump = emit_jump_insn (gen__return ());
1929 emit_barrier_after (jump);
1930 }
1931
1932 emit_note (NOTE_INSN_DELETED);
1933}
1934
1935rtx
1936spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1937{
1938 if (count != 0)
1939 return 0;
1940 /* This is inefficient because it ends up copying to a save-register
1941 which then gets saved even though $lr has already been saved. But
1942 it does generate better code for leaf functions and we don't need
1943 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1944 used for __builtin_return_address anyway, so maybe we don't care if
1945 it's inefficient. */
1946 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1947}
1948\f
1949
1950/* Given VAL, generate a constant appropriate for MODE.
1951 If MODE is a vector mode, every element will be VAL.
1952 For TImode, VAL will be zero extended to 128 bits. */
1953rtx
1954spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1955{
1956 rtx inner;
1957 rtvec v;
1958 int units, i;
1959
1960 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1961 || GET_MODE_CLASS (mode) == MODE_FLOAT
1962 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1963 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1964
1965 if (GET_MODE_CLASS (mode) == MODE_INT)
1966 return immed_double_const (val, 0, mode);
1967
1968 /* val is the bit representation of the float */
1969 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1970 return hwint_to_const_double (mode, val);
1971
1972 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1973 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1974 else
1975 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1976
1977 units = GET_MODE_NUNITS (mode);
1978
1979 v = rtvec_alloc (units);
1980
1981 for (i = 0; i < units; ++i)
1982 RTVEC_ELT (v, i) = inner;
1983
1984 return gen_rtx_CONST_VECTOR (mode, v);
1985}
1986\f
1987/* branch hint stuff */
1988
1989/* The hardware requires 8 insns between a hint and the branch it
1990 effects. This variable describes how many rtl instructions the
1991 compiler needs to see before inserting a hint. (FIXME: We should
1992 accept less and insert nops to enforce it because hinting is always
1993 profitable for performance, but we do need to be careful of code
1994 size.) */
1995int spu_hint_dist = (8 * 4);
1996
5474166e 1997/* Create a MODE vector constant from 4 ints. */
1998rtx
1999spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2000{
2001 unsigned char arr[16];
2002 arr[0] = (a >> 24) & 0xff;
2003 arr[1] = (a >> 16) & 0xff;
2004 arr[2] = (a >> 8) & 0xff;
2005 arr[3] = (a >> 0) & 0xff;
2006 arr[4] = (b >> 24) & 0xff;
2007 arr[5] = (b >> 16) & 0xff;
2008 arr[6] = (b >> 8) & 0xff;
2009 arr[7] = (b >> 0) & 0xff;
2010 arr[8] = (c >> 24) & 0xff;
2011 arr[9] = (c >> 16) & 0xff;
2012 arr[10] = (c >> 8) & 0xff;
2013 arr[11] = (c >> 0) & 0xff;
2014 arr[12] = (d >> 24) & 0xff;
2015 arr[13] = (d >> 16) & 0xff;
2016 arr[14] = (d >> 8) & 0xff;
2017 arr[15] = (d >> 0) & 0xff;
2018 return array_to_constant(mode, arr);
2019}
2020
644459d0 2021/* An array of these is used to propagate hints to predecessor blocks. */
2022struct spu_bb_info
2023{
fa7637bd 2024 rtx prop_jump; /* propagated from another block */
2025 basic_block bb; /* the original block. */
644459d0 2026};
2027
2028/* The special $hbr register is used to prevent the insn scheduler from
2029 moving hbr insns across instructions which invalidate them. It
2030 should only be used in a clobber, and this function searches for
2031 insns which clobber it. */
2032static bool
2033insn_clobbers_hbr (rtx insn)
2034{
2035 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2036 {
2037 rtx parallel = PATTERN (insn);
2038 rtx clobber;
2039 int j;
2040 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2041 {
2042 clobber = XVECEXP (parallel, 0, j);
2043 if (GET_CODE (clobber) == CLOBBER
2044 && GET_CODE (XEXP (clobber, 0)) == REG
2045 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2046 return 1;
2047 }
2048 }
2049 return 0;
2050}
2051
2052static void
2053spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2054{
2055 rtx branch_label;
2056 rtx hint, insn, prev, next;
2057
2058 if (before == 0 || branch == 0 || target == 0)
2059 return;
2060
2061 if (distance > 600)
2062 return;
2063
2064
2065 branch_label = gen_label_rtx ();
2066 LABEL_NUSES (branch_label)++;
2067 LABEL_PRESERVE_P (branch_label) = 1;
2068 insn = emit_label_before (branch_label, branch);
2069 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2070
2071 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2072 the current insn is pipe0, dual issue with it. */
2073 prev = prev_active_insn (before);
2074 if (prev && get_pipe (prev) == 0)
2075 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2076 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2077 {
2078 next = next_active_insn (before);
2079 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2080 if (next)
2081 PUT_MODE (next, TImode);
2082 }
2083 else
2084 {
2085 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2086 PUT_MODE (hint, TImode);
2087 }
2088 recog_memoized (hint);
2089}
2090
2091/* Returns 0 if we don't want a hint for this branch. Otherwise return
2092 the rtx for the branch target. */
2093static rtx
2094get_branch_target (rtx branch)
2095{
2096 if (GET_CODE (branch) == JUMP_INSN)
2097 {
2098 rtx set, src;
2099
2100 /* Return statements */
2101 if (GET_CODE (PATTERN (branch)) == RETURN)
2102 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2103
2104 /* jump table */
2105 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2106 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2107 return 0;
2108
2109 set = single_set (branch);
2110 src = SET_SRC (set);
2111 if (GET_CODE (SET_DEST (set)) != PC)
2112 abort ();
2113
2114 if (GET_CODE (src) == IF_THEN_ELSE)
2115 {
2116 rtx lab = 0;
2117 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2118 if (note)
2119 {
2120 /* If the more probable case is not a fall through, then
2121 try a branch hint. */
2122 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2123 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2124 && GET_CODE (XEXP (src, 1)) != PC)
2125 lab = XEXP (src, 1);
2126 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2127 && GET_CODE (XEXP (src, 2)) != PC)
2128 lab = XEXP (src, 2);
2129 }
2130 if (lab)
2131 {
2132 if (GET_CODE (lab) == RETURN)
2133 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2134 return lab;
2135 }
2136 return 0;
2137 }
2138
2139 return src;
2140 }
2141 else if (GET_CODE (branch) == CALL_INSN)
2142 {
2143 rtx call;
2144 /* All of our call patterns are in a PARALLEL and the CALL is
2145 the first pattern in the PARALLEL. */
2146 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2147 abort ();
2148 call = XVECEXP (PATTERN (branch), 0, 0);
2149 if (GET_CODE (call) == SET)
2150 call = SET_SRC (call);
2151 if (GET_CODE (call) != CALL)
2152 abort ();
2153 return XEXP (XEXP (call, 0), 0);
2154 }
2155 return 0;
2156}
2157
2158static void
2159insert_branch_hints (void)
2160{
2161 struct spu_bb_info *spu_bb_info;
2162 rtx branch, insn, next;
2163 rtx branch_target = 0;
2164 int branch_addr = 0, insn_addr, head_addr;
2165 basic_block bb;
2166 unsigned int j;
2167
2168 spu_bb_info =
2169 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2170 sizeof (struct spu_bb_info));
2171
2172 /* We need exact insn addresses and lengths. */
2173 shorten_branches (get_insns ());
2174
2175 FOR_EACH_BB_REVERSE (bb)
2176 {
2177 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2178 branch = 0;
2179 if (spu_bb_info[bb->index].prop_jump)
2180 {
2181 branch = spu_bb_info[bb->index].prop_jump;
2182 branch_target = get_branch_target (branch);
2183 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2184 }
2185 /* Search from end of a block to beginning. In this loop, find
2186 jumps which need a branch and emit them only when:
2187 - it's an indirect branch and we're at the insn which sets
2188 the register
2189 - we're at an insn that will invalidate the hint. e.g., a
2190 call, another hint insn, inline asm that clobbers $hbr, and
2191 some inlined operations (divmodsi4). Don't consider jumps
2192 because they are only at the end of a block and are
2193 considered when we are deciding whether to propagate
2194 - we're getting too far away from the branch. The hbr insns
5b865faf 2195 only have a signed 10-bit offset
644459d0 2196 We go back as far as possible so the branch will be considered
2197 for propagation when we get to the beginning of the block. */
2198 next = 0;
2199 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2200 {
2201 if (INSN_P (insn))
2202 {
2203 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2204 if (branch && next
2205 && ((GET_CODE (branch_target) == REG
2206 && set_of (branch_target, insn) != NULL_RTX)
2207 || insn_clobbers_hbr (insn)
2208 || branch_addr - insn_addr > 600))
2209 {
2210 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2211 if (insn != BB_END (bb)
2212 && branch_addr - next_addr >= spu_hint_dist)
2213 {
2214 if (dump_file)
2215 fprintf (dump_file,
2216 "hint for %i in block %i before %i\n",
2217 INSN_UID (branch), bb->index, INSN_UID (next));
2218 spu_emit_branch_hint (next, branch, branch_target,
2219 branch_addr - next_addr);
2220 }
2221 branch = 0;
2222 }
2223
2224 /* JUMP_P will only be true at the end of a block. When
2225 branch is already set it means we've previously decided
2226 to propagate a hint for that branch into this block. */
2227 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2228 {
2229 branch = 0;
2230 if ((branch_target = get_branch_target (insn)))
2231 {
2232 branch = insn;
2233 branch_addr = insn_addr;
2234 }
2235 }
2236
2237 /* When a branch hint is emitted it will be inserted
2238 before "next". Make sure next is the beginning of a
2239 cycle to minimize impact on the scheduled insns. */
2240 if (GET_MODE (insn) == TImode)
2241 next = insn;
2242 }
2243 if (insn == BB_HEAD (bb))
2244 break;
2245 }
2246
2247 if (branch)
2248 {
2249 /* If we haven't emitted a hint for this branch yet, it might
2250 be profitable to emit it in one of the predecessor blocks,
2251 especially for loops. */
2252 rtx bbend;
2253 basic_block prev = 0, prop = 0, prev2 = 0;
2254 int loop_exit = 0, simple_loop = 0;
2255 int next_addr = 0;
2256 if (next)
2257 next_addr = INSN_ADDRESSES (INSN_UID (next));
2258
2259 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2260 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2261 prev = EDGE_PRED (bb, j)->src;
2262 else
2263 prev2 = EDGE_PRED (bb, j)->src;
2264
2265 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2266 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2267 loop_exit = 1;
2268 else if (EDGE_SUCC (bb, j)->dest == bb)
2269 simple_loop = 1;
2270
2271 /* If this branch is a loop exit then propagate to previous
2272 fallthru block. This catches the cases when it is a simple
2273 loop or when there is an initial branch into the loop. */
2274 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2275 prop = prev;
2276
2277 /* If there is only one adjacent predecessor. Don't propagate
2278 outside this loop. This loop_depth test isn't perfect, but
2279 I'm not sure the loop_father member is valid at this point. */
2280 else if (prev && single_pred_p (bb)
2281 && prev->loop_depth == bb->loop_depth)
2282 prop = prev;
2283
2284 /* If this is the JOIN block of a simple IF-THEN then
80777cd8 2285 propagate the hint to the HEADER block. */
644459d0 2286 else if (prev && prev2
2287 && EDGE_COUNT (bb->preds) == 2
2288 && EDGE_COUNT (prev->preds) == 1
2289 && EDGE_PRED (prev, 0)->src == prev2
2290 && prev2->loop_depth == bb->loop_depth
2291 && GET_CODE (branch_target) != REG)
2292 prop = prev;
2293
2294 /* Don't propagate when:
2295 - this is a simple loop and the hint would be too far
2296 - this is not a simple loop and there are 16 insns in
2297 this block already
2298 - the predecessor block ends in a branch that will be
2299 hinted
2300 - the predecessor block ends in an insn that invalidates
2301 the hint */
2302 if (prop
2303 && prop->index >= 0
2304 && (bbend = BB_END (prop))
2305 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2306 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2307 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2308 {
2309 if (dump_file)
2310 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2311 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2312 bb->index, prop->index, bb->loop_depth,
2313 INSN_UID (branch), loop_exit, simple_loop,
2314 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2315
2316 spu_bb_info[prop->index].prop_jump = branch;
2317 spu_bb_info[prop->index].bb = bb;
2318 }
2319 else if (next && branch_addr - next_addr >= spu_hint_dist)
2320 {
2321 if (dump_file)
2322 fprintf (dump_file, "hint for %i in block %i before %i\n",
2323 INSN_UID (branch), bb->index, INSN_UID (next));
2324 spu_emit_branch_hint (next, branch, branch_target,
2325 branch_addr - next_addr);
2326 }
2327 branch = 0;
2328 }
2329 }
2330 free (spu_bb_info);
2331}
2332\f
2333/* Emit a nop for INSN such that the two will dual issue. This assumes
2334 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2335 We check for TImode to handle a MULTI1 insn which has dual issued its
2336 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2337 ADDR_VEC insns. */
2338static void
2339emit_nop_for_insn (rtx insn)
2340{
2341 int p;
2342 rtx new_insn;
2343 p = get_pipe (insn);
2344 if (p == 1 && GET_MODE (insn) == TImode)
2345 {
2346 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2347 PUT_MODE (new_insn, TImode);
2348 PUT_MODE (insn, VOIDmode);
2349 }
2350 else
2351 new_insn = emit_insn_after (gen_lnop (), insn);
2352}
2353
2354/* Insert nops in basic blocks to meet dual issue alignment
2355 requirements. */
2356static void
2357insert_nops (void)
2358{
2359 rtx insn, next_insn, prev_insn;
2360 int length;
2361 int addr;
2362
2363 /* This sets up INSN_ADDRESSES. */
2364 shorten_branches (get_insns ());
2365
2366 /* Keep track of length added by nops. */
2367 length = 0;
2368
2369 prev_insn = 0;
2370 for (insn = get_insns (); insn; insn = next_insn)
2371 {
2372 next_insn = next_active_insn (insn);
2373 addr = INSN_ADDRESSES (INSN_UID (insn));
2374 if (GET_MODE (insn) == TImode
2375 && next_insn
2376 && GET_MODE (next_insn) != TImode
2377 && ((addr + length) & 7) != 0)
2378 {
2379 /* prev_insn will always be set because the first insn is
2380 always 8-byte aligned. */
2381 emit_nop_for_insn (prev_insn);
2382 length += 4;
2383 }
2384 prev_insn = insn;
2385 }
2386}
2387
2388static void
2389spu_machine_dependent_reorg (void)
2390{
2391 if (optimize > 0)
2392 {
2393 if (TARGET_BRANCH_HINTS)
2394 insert_branch_hints ();
2395 insert_nops ();
2396 }
2397}
2398\f
2399
2400/* Insn scheduling routines, primarily for dual issue. */
2401static int
2402spu_sched_issue_rate (void)
2403{
2404 return 2;
2405}
2406
2407static int
2408spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2409 int verbose ATTRIBUTE_UNUSED, rtx insn,
2410 int can_issue_more)
2411{
2412 if (GET_CODE (PATTERN (insn)) != USE
2413 && GET_CODE (PATTERN (insn)) != CLOBBER
2414 && get_pipe (insn) != -2)
2415 can_issue_more--;
2416 return can_issue_more;
2417}
2418
2419static int
2420get_pipe (rtx insn)
2421{
2422 enum attr_type t;
2423 /* Handle inline asm */
2424 if (INSN_CODE (insn) == -1)
2425 return -1;
2426 t = get_attr_type (insn);
2427 switch (t)
2428 {
2429 case TYPE_CONVERT:
2430 return -2;
2431 case TYPE_MULTI0:
2432 return -1;
2433
2434 case TYPE_FX2:
2435 case TYPE_FX3:
2436 case TYPE_SPR:
2437 case TYPE_NOP:
2438 case TYPE_FXB:
2439 case TYPE_FPD:
2440 case TYPE_FP6:
2441 case TYPE_FP7:
2442 case TYPE_IPREFETCH:
2443 return 0;
2444
2445 case TYPE_LNOP:
2446 case TYPE_SHUF:
2447 case TYPE_LOAD:
2448 case TYPE_STORE:
2449 case TYPE_BR:
2450 case TYPE_MULTI1:
2451 case TYPE_HBR:
2452 return 1;
2453 default:
2454 abort ();
2455 }
2456}
2457
2458static int
2459spu_sched_adjust_priority (rtx insn, int pri)
2460{
2461 int p = get_pipe (insn);
2462 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2463 * scheduling. */
2464 if (GET_CODE (PATTERN (insn)) == USE
2465 || GET_CODE (PATTERN (insn)) == CLOBBER
2466 || p == -2)
2467 return pri + 100;
2468 /* Schedule pipe0 insns early for greedier dual issue. */
2469 if (p != 1)
2470 return pri + 50;
2471 return pri;
2472}
2473
2474/* INSN is dependent on DEP_INSN. */
2475static int
2476spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2477 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2478{
2479 if (GET_CODE (insn) == CALL_INSN)
2480 return cost - 2;
2481 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2482 scheduler makes every insn in a block anti-dependent on the final
2483 jump_insn. We adjust here so higher cost insns will get scheduled
2484 earlier. */
2485 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 2486 return insn_cost (dep_insn) - 3;
644459d0 2487 return cost;
2488}
2489\f
2490/* Create a CONST_DOUBLE from a string. */
2491struct rtx_def *
2492spu_float_const (const char *string, enum machine_mode mode)
2493{
2494 REAL_VALUE_TYPE value;
2495 value = REAL_VALUE_ATOF (string, mode);
2496 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2497}
2498
644459d0 2499int
2500spu_constant_address_p (rtx x)
2501{
2502 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2503 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2504 || GET_CODE (x) == HIGH);
2505}
2506
2507static enum spu_immediate
2508which_immediate_load (HOST_WIDE_INT val)
2509{
2510 gcc_assert (val == trunc_int_for_mode (val, SImode));
2511
2512 if (val >= -0x8000 && val <= 0x7fff)
2513 return SPU_IL;
2514 if (val >= 0 && val <= 0x3ffff)
2515 return SPU_ILA;
2516 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2517 return SPU_ILH;
2518 if ((val & 0xffff) == 0)
2519 return SPU_ILHU;
2520
2521 return SPU_NONE;
2522}
2523
dea01258 2524/* Return true when OP can be loaded by one of the il instructions, or
2525 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 2526int
2527immediate_load_p (rtx op, enum machine_mode mode)
dea01258 2528{
2529 if (CONSTANT_P (op))
2530 {
2531 enum immediate_class c = classify_immediate (op, mode);
5df189be 2532 return c == IC_IL1 || c == IC_IL1s
3072d30e 2533 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 2534 }
2535 return 0;
2536}
2537
2538/* Return true if the first SIZE bytes of arr is a constant that can be
2539 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2540 represent the size and offset of the instruction to use. */
2541static int
2542cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2543{
2544 int cpat, run, i, start;
2545 cpat = 1;
2546 run = 0;
2547 start = -1;
2548 for (i = 0; i < size && cpat; i++)
2549 if (arr[i] != i+16)
2550 {
2551 if (!run)
2552 {
2553 start = i;
2554 if (arr[i] == 3)
2555 run = 1;
2556 else if (arr[i] == 2 && arr[i+1] == 3)
2557 run = 2;
2558 else if (arr[i] == 0)
2559 {
2560 while (arr[i+run] == run && i+run < 16)
2561 run++;
2562 if (run != 4 && run != 8)
2563 cpat = 0;
2564 }
2565 else
2566 cpat = 0;
2567 if ((i & (run-1)) != 0)
2568 cpat = 0;
2569 i += run;
2570 }
2571 else
2572 cpat = 0;
2573 }
b01a6dc3 2574 if (cpat && (run || size < 16))
dea01258 2575 {
2576 if (run == 0)
2577 run = 1;
2578 if (prun)
2579 *prun = run;
2580 if (pstart)
2581 *pstart = start == -1 ? 16-run : start;
2582 return 1;
2583 }
2584 return 0;
2585}
2586
2587/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 2588 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 2589static enum immediate_class
2590classify_immediate (rtx op, enum machine_mode mode)
644459d0 2591{
2592 HOST_WIDE_INT val;
2593 unsigned char arr[16];
5df189be 2594 int i, j, repeated, fsmbi, repeat;
dea01258 2595
2596 gcc_assert (CONSTANT_P (op));
2597
644459d0 2598 if (GET_MODE (op) != VOIDmode)
2599 mode = GET_MODE (op);
2600
dea01258 2601 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 2602 if (!flag_pic
2603 && mode == V4SImode
dea01258 2604 && GET_CODE (op) == CONST_VECTOR
2605 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2606 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2607 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2608 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2609 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2610 op = CONST_VECTOR_ELT (op, 0);
644459d0 2611
dea01258 2612 switch (GET_CODE (op))
2613 {
2614 case SYMBOL_REF:
2615 case LABEL_REF:
2616 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 2617
dea01258 2618 case CONST:
0cfc65d4 2619 /* We can never know if the resulting address fits in 18 bits and can be
2620 loaded with ila. For now, assume the address will not overflow if
2621 the displacement is "small" (fits 'K' constraint). */
2622 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
2623 {
2624 rtx sym = XEXP (XEXP (op, 0), 0);
2625 rtx cst = XEXP (XEXP (op, 0), 1);
2626
2627 if (GET_CODE (sym) == SYMBOL_REF
2628 && GET_CODE (cst) == CONST_INT
2629 && satisfies_constraint_K (cst))
2630 return IC_IL1s;
2631 }
2632 return IC_IL2s;
644459d0 2633
dea01258 2634 case HIGH:
2635 return IC_IL1s;
2636
2637 case CONST_VECTOR:
2638 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2639 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2640 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2641 return IC_POOL;
2642 /* Fall through. */
2643
2644 case CONST_INT:
2645 case CONST_DOUBLE:
2646 constant_to_array (mode, op, arr);
644459d0 2647
dea01258 2648 /* Check that each 4-byte slot is identical. */
2649 repeated = 1;
2650 for (i = 4; i < 16; i += 4)
2651 for (j = 0; j < 4; j++)
2652 if (arr[j] != arr[i + j])
2653 repeated = 0;
2654
2655 if (repeated)
2656 {
2657 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2658 val = trunc_int_for_mode (val, SImode);
2659
2660 if (which_immediate_load (val) != SPU_NONE)
2661 return IC_IL1;
2662 }
2663
2664 /* Any mode of 2 bytes or smaller can be loaded with an il
2665 instruction. */
2666 gcc_assert (GET_MODE_SIZE (mode) > 2);
2667
2668 fsmbi = 1;
5df189be 2669 repeat = 0;
dea01258 2670 for (i = 0; i < 16 && fsmbi; i++)
5df189be 2671 if (arr[i] != 0 && repeat == 0)
2672 repeat = arr[i];
2673 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 2674 fsmbi = 0;
2675 if (fsmbi)
5df189be 2676 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 2677
2678 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2679 return IC_CPAT;
2680
2681 if (repeated)
2682 return IC_IL2;
2683
2684 return IC_POOL;
2685 default:
2686 break;
2687 }
2688 gcc_unreachable ();
644459d0 2689}
2690
2691static enum spu_immediate
2692which_logical_immediate (HOST_WIDE_INT val)
2693{
2694 gcc_assert (val == trunc_int_for_mode (val, SImode));
2695
2696 if (val >= -0x200 && val <= 0x1ff)
2697 return SPU_ORI;
2698 if (val >= 0 && val <= 0xffff)
2699 return SPU_IOHL;
2700 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2701 {
2702 val = trunc_int_for_mode (val, HImode);
2703 if (val >= -0x200 && val <= 0x1ff)
2704 return SPU_ORHI;
2705 if ((val & 0xff) == ((val >> 8) & 0xff))
2706 {
2707 val = trunc_int_for_mode (val, QImode);
2708 if (val >= -0x200 && val <= 0x1ff)
2709 return SPU_ORBI;
2710 }
2711 }
2712 return SPU_NONE;
2713}
2714
5df189be 2715/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2716 CONST_DOUBLEs. */
2717static int
2718const_vector_immediate_p (rtx x)
2719{
2720 int i;
2721 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2722 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2723 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2724 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2725 return 0;
2726 return 1;
2727}
2728
644459d0 2729int
2730logical_immediate_p (rtx op, enum machine_mode mode)
2731{
2732 HOST_WIDE_INT val;
2733 unsigned char arr[16];
2734 int i, j;
2735
2736 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2737 || GET_CODE (op) == CONST_VECTOR);
2738
5df189be 2739 if (GET_CODE (op) == CONST_VECTOR
2740 && !const_vector_immediate_p (op))
2741 return 0;
2742
644459d0 2743 if (GET_MODE (op) != VOIDmode)
2744 mode = GET_MODE (op);
2745
2746 constant_to_array (mode, op, arr);
2747
2748 /* Check that bytes are repeated. */
2749 for (i = 4; i < 16; i += 4)
2750 for (j = 0; j < 4; j++)
2751 if (arr[j] != arr[i + j])
2752 return 0;
2753
2754 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2755 val = trunc_int_for_mode (val, SImode);
2756
2757 i = which_logical_immediate (val);
2758 return i != SPU_NONE && i != SPU_IOHL;
2759}
2760
2761int
2762iohl_immediate_p (rtx op, enum machine_mode mode)
2763{
2764 HOST_WIDE_INT val;
2765 unsigned char arr[16];
2766 int i, j;
2767
2768 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2769 || GET_CODE (op) == CONST_VECTOR);
2770
5df189be 2771 if (GET_CODE (op) == CONST_VECTOR
2772 && !const_vector_immediate_p (op))
2773 return 0;
2774
644459d0 2775 if (GET_MODE (op) != VOIDmode)
2776 mode = GET_MODE (op);
2777
2778 constant_to_array (mode, op, arr);
2779
2780 /* Check that bytes are repeated. */
2781 for (i = 4; i < 16; i += 4)
2782 for (j = 0; j < 4; j++)
2783 if (arr[j] != arr[i + j])
2784 return 0;
2785
2786 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2787 val = trunc_int_for_mode (val, SImode);
2788
2789 return val >= 0 && val <= 0xffff;
2790}
2791
2792int
2793arith_immediate_p (rtx op, enum machine_mode mode,
2794 HOST_WIDE_INT low, HOST_WIDE_INT high)
2795{
2796 HOST_WIDE_INT val;
2797 unsigned char arr[16];
2798 int bytes, i, j;
2799
2800 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2801 || GET_CODE (op) == CONST_VECTOR);
2802
5df189be 2803 if (GET_CODE (op) == CONST_VECTOR
2804 && !const_vector_immediate_p (op))
2805 return 0;
2806
644459d0 2807 if (GET_MODE (op) != VOIDmode)
2808 mode = GET_MODE (op);
2809
2810 constant_to_array (mode, op, arr);
2811
2812 if (VECTOR_MODE_P (mode))
2813 mode = GET_MODE_INNER (mode);
2814
2815 bytes = GET_MODE_SIZE (mode);
2816 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2817
2818 /* Check that bytes are repeated. */
2819 for (i = bytes; i < 16; i += bytes)
2820 for (j = 0; j < bytes; j++)
2821 if (arr[j] != arr[i + j])
2822 return 0;
2823
2824 val = arr[0];
2825 for (j = 1; j < bytes; j++)
2826 val = (val << 8) | arr[j];
2827
2828 val = trunc_int_for_mode (val, mode);
2829
2830 return val >= low && val <= high;
2831}
2832
2833/* We accept:
5b865faf 2834 - any 32-bit constant (SImode, SFmode)
644459d0 2835 - any constant that can be generated with fsmbi (any mode)
5b865faf 2836 - a 64-bit constant where the high and low bits are identical
644459d0 2837 (DImode, DFmode)
5b865faf 2838 - a 128-bit constant where the four 32-bit words match. */
644459d0 2839int
2840spu_legitimate_constant_p (rtx x)
2841{
5df189be 2842 if (GET_CODE (x) == HIGH)
2843 x = XEXP (x, 0);
644459d0 2844 /* V4SI with all identical symbols is valid. */
5df189be 2845 if (!flag_pic
2846 && GET_MODE (x) == V4SImode
644459d0 2847 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2848 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 2849 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 2850 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2851 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2852 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2853
5df189be 2854 if (GET_CODE (x) == CONST_VECTOR
2855 && !const_vector_immediate_p (x))
2856 return 0;
644459d0 2857 return 1;
2858}
2859
2860/* Valid address are:
2861 - symbol_ref, label_ref, const
2862 - reg
2863 - reg + const, where either reg or const is 16 byte aligned
2864 - reg + reg, alignment doesn't matter
2865 The alignment matters in the reg+const case because lqd and stqd
2866 ignore the 4 least significant bits of the const. (TODO: It might be
2867 preferable to allow any alignment and fix it up when splitting.) */
2868int
2869spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2870 rtx x, int reg_ok_strict)
2871{
2872 if (mode == TImode && GET_CODE (x) == AND
2873 && GET_CODE (XEXP (x, 1)) == CONST_INT
2874 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2875 x = XEXP (x, 0);
2876 switch (GET_CODE (x))
2877 {
2878 case SYMBOL_REF:
2879 case LABEL_REF:
2880 return !TARGET_LARGE_MEM;
2881
2882 case CONST:
0cfc65d4 2883 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
2884 {
2885 rtx sym = XEXP (XEXP (x, 0), 0);
2886 rtx cst = XEXP (XEXP (x, 0), 1);
2887
2888 /* Accept any symbol_ref + constant, assuming it does not
2889 wrap around the local store addressability limit. */
2890 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
2891 return 1;
2892 }
2893 return 0;
644459d0 2894
2895 case CONST_INT:
2896 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2897
2898 case SUBREG:
2899 x = XEXP (x, 0);
2900 gcc_assert (GET_CODE (x) == REG);
2901
2902 case REG:
2903 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2904
2905 case PLUS:
2906 case LO_SUM:
2907 {
2908 rtx op0 = XEXP (x, 0);
2909 rtx op1 = XEXP (x, 1);
2910 if (GET_CODE (op0) == SUBREG)
2911 op0 = XEXP (op0, 0);
2912 if (GET_CODE (op1) == SUBREG)
2913 op1 = XEXP (op1, 0);
2914 /* We can't just accept any aligned register because CSE can
2915 change it to a register that is not marked aligned and then
2916 recog will fail. So we only accept frame registers because
2917 they will only be changed to other frame registers. */
2918 if (GET_CODE (op0) == REG
2919 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2920 && GET_CODE (op1) == CONST_INT
2921 && INTVAL (op1) >= -0x2000
2922 && INTVAL (op1) <= 0x1fff
5df189be 2923 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 2924 return 1;
2925 if (GET_CODE (op0) == REG
2926 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2927 && GET_CODE (op1) == REG
2928 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2929 return 1;
2930 }
2931 break;
2932
2933 default:
2934 break;
2935 }
2936 return 0;
2937}
2938
2939/* When the address is reg + const_int, force the const_int into a
fa7637bd 2940 register. */
644459d0 2941rtx
2942spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2943 enum machine_mode mode)
2944{
2945 rtx op0, op1;
2946 /* Make sure both operands are registers. */
2947 if (GET_CODE (x) == PLUS)
2948 {
2949 op0 = XEXP (x, 0);
2950 op1 = XEXP (x, 1);
2951 if (ALIGNED_SYMBOL_REF_P (op0))
2952 {
2953 op0 = force_reg (Pmode, op0);
2954 mark_reg_pointer (op0, 128);
2955 }
2956 else if (GET_CODE (op0) != REG)
2957 op0 = force_reg (Pmode, op0);
2958 if (ALIGNED_SYMBOL_REF_P (op1))
2959 {
2960 op1 = force_reg (Pmode, op1);
2961 mark_reg_pointer (op1, 128);
2962 }
2963 else if (GET_CODE (op1) != REG)
2964 op1 = force_reg (Pmode, op1);
2965 x = gen_rtx_PLUS (Pmode, op0, op1);
2966 if (spu_legitimate_address (mode, x, 0))
2967 return x;
2968 }
2969 return NULL_RTX;
2970}
2971
2972/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2973 struct attribute_spec.handler. */
2974static tree
2975spu_handle_fndecl_attribute (tree * node,
2976 tree name,
2977 tree args ATTRIBUTE_UNUSED,
2978 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2979{
2980 if (TREE_CODE (*node) != FUNCTION_DECL)
2981 {
2982 warning (0, "`%s' attribute only applies to functions",
2983 IDENTIFIER_POINTER (name));
2984 *no_add_attrs = true;
2985 }
2986
2987 return NULL_TREE;
2988}
2989
2990/* Handle the "vector" attribute. */
2991static tree
2992spu_handle_vector_attribute (tree * node, tree name,
2993 tree args ATTRIBUTE_UNUSED,
2994 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2995{
2996 tree type = *node, result = NULL_TREE;
2997 enum machine_mode mode;
2998 int unsigned_p;
2999
3000 while (POINTER_TYPE_P (type)
3001 || TREE_CODE (type) == FUNCTION_TYPE
3002 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3003 type = TREE_TYPE (type);
3004
3005 mode = TYPE_MODE (type);
3006
3007 unsigned_p = TYPE_UNSIGNED (type);
3008 switch (mode)
3009 {
3010 case DImode:
3011 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3012 break;
3013 case SImode:
3014 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3015 break;
3016 case HImode:
3017 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3018 break;
3019 case QImode:
3020 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3021 break;
3022 case SFmode:
3023 result = V4SF_type_node;
3024 break;
3025 case DFmode:
3026 result = V2DF_type_node;
3027 break;
3028 default:
3029 break;
3030 }
3031
3032 /* Propagate qualifiers attached to the element type
3033 onto the vector type. */
3034 if (result && result != type && TYPE_QUALS (type))
3035 result = build_qualified_type (result, TYPE_QUALS (type));
3036
3037 *no_add_attrs = true; /* No need to hang on to the attribute. */
3038
3039 if (!result)
3040 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3041 else
d991e6e8 3042 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3043
3044 return NULL_TREE;
3045}
3046
f2b32076 3047/* Return nonzero if FUNC is a naked function. */
644459d0 3048static int
3049spu_naked_function_p (tree func)
3050{
3051 tree a;
3052
3053 if (TREE_CODE (func) != FUNCTION_DECL)
3054 abort ();
3055
3056 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3057 return a != NULL_TREE;
3058}
3059
3060int
3061spu_initial_elimination_offset (int from, int to)
3062{
3063 int saved_regs_size = spu_saved_regs_size ();
3064 int sp_offset = 0;
abe32cce 3065 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3066 || get_frame_size () || saved_regs_size)
3067 sp_offset = STACK_POINTER_OFFSET;
3068 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3069 return (sp_offset + crtl->outgoing_args_size);
644459d0 3070 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3071 return 0;
3072 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3073 return sp_offset + crtl->outgoing_args_size
644459d0 3074 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3075 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3076 return get_frame_size () + saved_regs_size + sp_offset;
3077 return 0;
3078}
3079
3080rtx
fb80456a 3081spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3082{
3083 enum machine_mode mode = TYPE_MODE (type);
3084 int byte_size = ((mode == BLKmode)
3085 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3086
3087 /* Make sure small structs are left justified in a register. */
3088 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3089 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3090 {
3091 enum machine_mode smode;
3092 rtvec v;
3093 int i;
3094 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3095 int n = byte_size / UNITS_PER_WORD;
3096 v = rtvec_alloc (nregs);
3097 for (i = 0; i < n; i++)
3098 {
3099 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3100 gen_rtx_REG (TImode,
3101 FIRST_RETURN_REGNUM
3102 + i),
3103 GEN_INT (UNITS_PER_WORD * i));
3104 byte_size -= UNITS_PER_WORD;
3105 }
3106
3107 if (n < nregs)
3108 {
3109 if (byte_size < 4)
3110 byte_size = 4;
3111 smode =
3112 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3113 RTVEC_ELT (v, n) =
3114 gen_rtx_EXPR_LIST (VOIDmode,
3115 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3116 GEN_INT (UNITS_PER_WORD * n));
3117 }
3118 return gen_rtx_PARALLEL (mode, v);
3119 }
3120 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3121}
3122
3123rtx
3124spu_function_arg (CUMULATIVE_ARGS cum,
3125 enum machine_mode mode,
3126 tree type, int named ATTRIBUTE_UNUSED)
3127{
3128 int byte_size;
3129
3130 if (cum >= MAX_REGISTER_ARGS)
3131 return 0;
3132
3133 byte_size = ((mode == BLKmode)
3134 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3135
3136 /* The ABI does not allow parameters to be passed partially in
3137 reg and partially in stack. */
3138 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3139 return 0;
3140
3141 /* Make sure small structs are left justified in a register. */
3142 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3143 && byte_size < UNITS_PER_WORD && byte_size > 0)
3144 {
3145 enum machine_mode smode;
3146 rtx gr_reg;
3147 if (byte_size < 4)
3148 byte_size = 4;
3149 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3150 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3151 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3152 const0_rtx);
3153 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3154 }
3155 else
3156 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3157}
3158
3159/* Variable sized types are passed by reference. */
3160static bool
3161spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3162 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3163 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3164{
3165 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3166}
3167\f
3168
3169/* Var args. */
3170
3171/* Create and return the va_list datatype.
3172
3173 On SPU, va_list is an array type equivalent to
3174
3175 typedef struct __va_list_tag
3176 {
3177 void *__args __attribute__((__aligned(16)));
3178 void *__skip __attribute__((__aligned(16)));
3179
3180 } va_list[1];
3181
fa7637bd 3182 where __args points to the arg that will be returned by the next
644459d0 3183 va_arg(), and __skip points to the previous stack frame such that
3184 when __args == __skip we should advance __args by 32 bytes. */
3185static tree
3186spu_build_builtin_va_list (void)
3187{
3188 tree f_args, f_skip, record, type_decl;
3189 bool owp;
3190
3191 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3192
3193 type_decl =
3194 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3195
3196 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3197 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3198
3199 DECL_FIELD_CONTEXT (f_args) = record;
3200 DECL_ALIGN (f_args) = 128;
3201 DECL_USER_ALIGN (f_args) = 1;
3202
3203 DECL_FIELD_CONTEXT (f_skip) = record;
3204 DECL_ALIGN (f_skip) = 128;
3205 DECL_USER_ALIGN (f_skip) = 1;
3206
3207 TREE_CHAIN (record) = type_decl;
3208 TYPE_NAME (record) = type_decl;
3209 TYPE_FIELDS (record) = f_args;
3210 TREE_CHAIN (f_args) = f_skip;
3211
3212 /* We know this is being padded and we want it too. It is an internal
3213 type so hide the warnings from the user. */
3214 owp = warn_padded;
3215 warn_padded = false;
3216
3217 layout_type (record);
3218
3219 warn_padded = owp;
3220
3221 /* The correct type is an array type of one element. */
3222 return build_array_type (record, build_index_type (size_zero_node));
3223}
3224
3225/* Implement va_start by filling the va_list structure VALIST.
3226 NEXTARG points to the first anonymous stack argument.
3227
3228 The following global variables are used to initialize
3229 the va_list structure:
3230
abe32cce 3231 crtl->args.info;
644459d0 3232 the CUMULATIVE_ARGS for this function
3233
abe32cce 3234 crtl->args.arg_offset_rtx:
644459d0 3235 holds the offset of the first anonymous stack argument
3236 (relative to the virtual arg pointer). */
3237
8a58ed0a 3238static void
644459d0 3239spu_va_start (tree valist, rtx nextarg)
3240{
3241 tree f_args, f_skip;
3242 tree args, skip, t;
3243
3244 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3245 f_skip = TREE_CHAIN (f_args);
3246
3247 valist = build_va_arg_indirect_ref (valist);
3248 args =
3249 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3250 skip =
3251 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3252
3253 /* Find the __args area. */
3254 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3255 if (crtl->args.pretend_args_size > 0)
0de36bdb 3256 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3257 size_int (-STACK_POINTER_OFFSET));
75a70cf9 3258 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3259 TREE_SIDE_EFFECTS (t) = 1;
3260 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3261
3262 /* Find the __skip area. */
3263 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3264 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3265 size_int (crtl->args.pretend_args_size
0de36bdb 3266 - STACK_POINTER_OFFSET));
75a70cf9 3267 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3268 TREE_SIDE_EFFECTS (t) = 1;
3269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3270}
3271
3272/* Gimplify va_arg by updating the va_list structure
3273 VALIST as required to retrieve an argument of type
3274 TYPE, and returning that argument.
3275
3276 ret = va_arg(VALIST, TYPE);
3277
3278 generates code equivalent to:
3279
3280 paddedsize = (sizeof(TYPE) + 15) & -16;
3281 if (VALIST.__args + paddedsize > VALIST.__skip
3282 && VALIST.__args <= VALIST.__skip)
3283 addr = VALIST.__skip + 32;
3284 else
3285 addr = VALIST.__args;
3286 VALIST.__args = addr + paddedsize;
3287 ret = *(TYPE *)addr;
3288 */
3289static tree
75a70cf9 3290spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3291 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 3292{
3293 tree f_args, f_skip;
3294 tree args, skip;
3295 HOST_WIDE_INT size, rsize;
3296 tree paddedsize, addr, tmp;
3297 bool pass_by_reference_p;
3298
3299 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3300 f_skip = TREE_CHAIN (f_args);
3301
3302 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3303 args =
3304 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3305 skip =
3306 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3307
3308 addr = create_tmp_var (ptr_type_node, "va_arg");
3309 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3310
3311 /* if an object is dynamically sized, a pointer to it is passed
3312 instead of the object itself. */
3313 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3314 false);
3315 if (pass_by_reference_p)
3316 type = build_pointer_type (type);
3317 size = int_size_in_bytes (type);
3318 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3319
3320 /* build conditional expression to calculate addr. The expression
3321 will be gimplified later. */
0de36bdb 3322 paddedsize = size_int (rsize);
75a70cf9 3323 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 3324 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 3325 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3326 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3327 unshare_expr (skip)));
644459d0 3328
3329 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 3330 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3331 size_int (32)), unshare_expr (args));
644459d0 3332
75a70cf9 3333 gimplify_assign (addr, tmp, pre_p);
644459d0 3334
3335 /* update VALIST.__args */
0de36bdb 3336 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 3337 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 3338
3339 addr = fold_convert (build_pointer_type (type), addr);
3340
3341 if (pass_by_reference_p)
3342 addr = build_va_arg_indirect_ref (addr);
3343
3344 return build_va_arg_indirect_ref (addr);
3345}
3346
3347/* Save parameter registers starting with the register that corresponds
3348 to the first unnamed parameters. If the first unnamed parameter is
3349 in the stack then save no registers. Set pretend_args_size to the
3350 amount of space needed to save the registers. */
3351void
3352spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3353 tree type, int *pretend_size, int no_rtl)
3354{
3355 if (!no_rtl)
3356 {
3357 rtx tmp;
3358 int regno;
3359 int offset;
3360 int ncum = *cum;
3361
3362 /* cum currently points to the last named argument, we want to
3363 start at the next argument. */
3364 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3365
3366 offset = -STACK_POINTER_OFFSET;
3367 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3368 {
3369 tmp = gen_frame_mem (V4SImode,
3370 plus_constant (virtual_incoming_args_rtx,
3371 offset));
3372 emit_move_insn (tmp,
3373 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3374 offset += 16;
3375 }
3376 *pretend_size = offset + STACK_POINTER_OFFSET;
3377 }
3378}
3379\f
3380void
3381spu_conditional_register_usage (void)
3382{
3383 if (flag_pic)
3384 {
3385 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3386 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3387 }
644459d0 3388}
3389
3390/* This is called to decide when we can simplify a load instruction. We
3391 must only return true for registers which we know will always be
3392 aligned. Taking into account that CSE might replace this reg with
3393 another one that has not been marked aligned.
3394 So this is really only true for frame, stack and virtual registers,
fa7637bd 3395 which we know are always aligned and should not be adversely effected
3396 by CSE. */
644459d0 3397static int
3398regno_aligned_for_load (int regno)
3399{
3400 return regno == FRAME_POINTER_REGNUM
5df189be 3401 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 3402 || regno == ARG_POINTER_REGNUM
644459d0 3403 || regno == STACK_POINTER_REGNUM
5df189be 3404 || (regno >= FIRST_VIRTUAL_REGISTER
3405 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 3406}
3407
3408/* Return TRUE when mem is known to be 16-byte aligned. */
3409int
3410aligned_mem_p (rtx mem)
3411{
3412 if (MEM_ALIGN (mem) >= 128)
3413 return 1;
3414 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3415 return 1;
3416 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3417 {
3418 rtx p0 = XEXP (XEXP (mem, 0), 0);
3419 rtx p1 = XEXP (XEXP (mem, 0), 1);
3420 if (regno_aligned_for_load (REGNO (p0)))
3421 {
3422 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3423 return 1;
3424 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3425 return 1;
3426 }
3427 }
3428 else if (GET_CODE (XEXP (mem, 0)) == REG)
3429 {
3430 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3431 return 1;
3432 }
3433 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3434 return 1;
3435 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3436 {
3437 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3438 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3439 if (GET_CODE (p0) == SYMBOL_REF
3440 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3441 return 1;
3442 }
3443 return 0;
3444}
3445
69ced2d6 3446/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3447 into its SYMBOL_REF_FLAGS. */
3448static void
3449spu_encode_section_info (tree decl, rtx rtl, int first)
3450{
3451 default_encode_section_info (decl, rtl, first);
3452
3453 /* If a variable has a forced alignment to < 16 bytes, mark it with
3454 SYMBOL_FLAG_ALIGN1. */
3455 if (TREE_CODE (decl) == VAR_DECL
3456 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3457 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3458}
3459
644459d0 3460/* Return TRUE if we are certain the mem refers to a complete object
3461 which is both 16-byte aligned and padded to a 16-byte boundary. This
3462 would make it safe to store with a single instruction.
3463 We guarantee the alignment and padding for static objects by aligning
3464 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3465 FIXME: We currently cannot guarantee this for objects on the stack
3466 because assign_parm_setup_stack calls assign_stack_local with the
3467 alignment of the parameter mode and in that case the alignment never
3468 gets adjusted by LOCAL_ALIGNMENT. */
3469static int
3470store_with_one_insn_p (rtx mem)
3471{
3472 rtx addr = XEXP (mem, 0);
3473 if (GET_MODE (mem) == BLKmode)
3474 return 0;
3475 /* Only static objects. */
3476 if (GET_CODE (addr) == SYMBOL_REF)
3477 {
3478 /* We use the associated declaration to make sure the access is
fa7637bd 3479 referring to the whole object.
644459d0 3480 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3481 if it is necessary. Will there be cases where one exists, and
3482 the other does not? Will there be cases where both exist, but
3483 have different types? */
3484 tree decl = MEM_EXPR (mem);
3485 if (decl
3486 && TREE_CODE (decl) == VAR_DECL
3487 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3488 return 1;
3489 decl = SYMBOL_REF_DECL (addr);
3490 if (decl
3491 && TREE_CODE (decl) == VAR_DECL
3492 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3493 return 1;
3494 }
3495 return 0;
3496}
3497
3498int
3499spu_expand_mov (rtx * ops, enum machine_mode mode)
3500{
3501 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3502 abort ();
3503
3504 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3505 {
3506 rtx from = SUBREG_REG (ops[1]);
3507 enum machine_mode imode = GET_MODE (from);
3508
3509 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3510 && GET_MODE_CLASS (imode) == MODE_INT
3511 && subreg_lowpart_p (ops[1]));
3512
3513 if (GET_MODE_SIZE (imode) < 4)
3514 {
3515 from = gen_rtx_SUBREG (SImode, from, 0);
3516 imode = SImode;
3517 }
3518
3519 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3520 {
99bdde56 3521 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 3522 emit_insn (GEN_FCN (icode) (ops[0], from));
3523 }
3524 else
3525 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3526 return 1;
3527 }
3528
3529 /* At least one of the operands needs to be a register. */
3530 if ((reload_in_progress | reload_completed) == 0
3531 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3532 {
3533 rtx temp = force_reg (mode, ops[1]);
3534 emit_move_insn (ops[0], temp);
3535 return 1;
3536 }
3537 if (reload_in_progress || reload_completed)
3538 {
dea01258 3539 if (CONSTANT_P (ops[1]))
3540 return spu_split_immediate (ops);
644459d0 3541 return 0;
3542 }
3543 else
3544 {
3545 if (GET_CODE (ops[0]) == MEM)
3546 {
3547 if (!spu_valid_move (ops))
3548 {
3549 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3550 gen_reg_rtx (TImode)));
3551 return 1;
3552 }
3553 }
3554 else if (GET_CODE (ops[1]) == MEM)
3555 {
3556 if (!spu_valid_move (ops))
3557 {
3558 emit_insn (gen_load
3559 (ops[0], ops[1], gen_reg_rtx (TImode),
3560 gen_reg_rtx (SImode)));
3561 return 1;
3562 }
3563 }
3564 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3565 extend them. */
3566 if (GET_CODE (ops[1]) == CONST_INT)
3567 {
3568 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3569 if (val != INTVAL (ops[1]))
3570 {
3571 emit_move_insn (ops[0], GEN_INT (val));
3572 return 1;
3573 }
3574 }
3575 }
3576 return 0;
3577}
3578
644459d0 3579void
3580spu_split_load (rtx * ops)
3581{
3582 enum machine_mode mode = GET_MODE (ops[0]);
3583 rtx addr, load, rot, mem, p0, p1;
3584 int rot_amt;
3585
3586 addr = XEXP (ops[1], 0);
3587
3588 rot = 0;
3589 rot_amt = 0;
3590 if (GET_CODE (addr) == PLUS)
3591 {
3592 /* 8 cases:
3593 aligned reg + aligned reg => lqx
3594 aligned reg + unaligned reg => lqx, rotqby
3595 aligned reg + aligned const => lqd
3596 aligned reg + unaligned const => lqd, rotqbyi
3597 unaligned reg + aligned reg => lqx, rotqby
3598 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3599 unaligned reg + aligned const => lqd, rotqby
3600 unaligned reg + unaligned const -> not allowed by legitimate address
3601 */
3602 p0 = XEXP (addr, 0);
3603 p1 = XEXP (addr, 1);
aa71ecd4 3604 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 3605 {
aa71ecd4 3606 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 3607 {
3608 emit_insn (gen_addsi3 (ops[3], p0, p1));
3609 rot = ops[3];
3610 }
3611 else
3612 rot = p0;
3613 }
3614 else
3615 {
3616 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3617 {
3618 rot_amt = INTVAL (p1) & 15;
3619 p1 = GEN_INT (INTVAL (p1) & -16);
3620 addr = gen_rtx_PLUS (SImode, p0, p1);
3621 }
aa71ecd4 3622 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 3623 rot = p1;
3624 }
3625 }
3626 else if (GET_CODE (addr) == REG)
3627 {
aa71ecd4 3628 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 3629 rot = addr;
3630 }
3631 else if (GET_CODE (addr) == CONST)
3632 {
3633 if (GET_CODE (XEXP (addr, 0)) == PLUS
3634 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3635 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3636 {
3637 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3638 if (rot_amt & -16)
3639 addr = gen_rtx_CONST (Pmode,
3640 gen_rtx_PLUS (Pmode,
3641 XEXP (XEXP (addr, 0), 0),
3642 GEN_INT (rot_amt & -16)));
3643 else
3644 addr = XEXP (XEXP (addr, 0), 0);
3645 }
3646 else
3647 rot = addr;
3648 }
3649 else if (GET_CODE (addr) == CONST_INT)
3650 {
3651 rot_amt = INTVAL (addr);
3652 addr = GEN_INT (rot_amt & -16);
3653 }
3654 else if (!ALIGNED_SYMBOL_REF_P (addr))
3655 rot = addr;
3656
3657 if (GET_MODE_SIZE (mode) < 4)
3658 rot_amt += GET_MODE_SIZE (mode) - 4;
3659
3660 rot_amt &= 15;
3661
3662 if (rot && rot_amt)
3663 {
3664 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3665 rot = ops[3];
3666 rot_amt = 0;
3667 }
3668
3669 load = ops[2];
3670
3671 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3672 mem = change_address (ops[1], TImode, addr);
3673
e04cf423 3674 emit_insn (gen_movti (load, mem));
644459d0 3675
3676 if (rot)
3677 emit_insn (gen_rotqby_ti (load, load, rot));
3678 else if (rot_amt)
3679 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3680
3681 if (reload_completed)
3682 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3683 else
3684 emit_insn (gen_spu_convert (ops[0], load));
3685}
3686
3687void
3688spu_split_store (rtx * ops)
3689{
3690 enum machine_mode mode = GET_MODE (ops[0]);
3691 rtx pat = ops[2];
3692 rtx reg = ops[3];
3693 rtx addr, p0, p1, p1_lo, smem;
3694 int aform;
3695 int scalar;
3696
3697 addr = XEXP (ops[0], 0);
3698
3699 if (GET_CODE (addr) == PLUS)
3700 {
3701 /* 8 cases:
3702 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3703 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3704 aligned reg + aligned const => lqd, c?d, shuf, stqx
3705 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3706 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3707 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3708 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3709 unaligned reg + unaligned const -> not allowed by legitimate address
3710 */
3711 aform = 0;
3712 p0 = XEXP (addr, 0);
3713 p1 = p1_lo = XEXP (addr, 1);
3714 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3715 {
3716 p1_lo = GEN_INT (INTVAL (p1) & 15);
3717 p1 = GEN_INT (INTVAL (p1) & -16);
3718 addr = gen_rtx_PLUS (SImode, p0, p1);
3719 }
3720 }
3721 else if (GET_CODE (addr) == REG)
3722 {
3723 aform = 0;
3724 p0 = addr;
3725 p1 = p1_lo = const0_rtx;
3726 }
3727 else
3728 {
3729 aform = 1;
3730 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3731 p1 = 0; /* aform doesn't use p1 */
3732 p1_lo = addr;
3733 if (ALIGNED_SYMBOL_REF_P (addr))
3734 p1_lo = const0_rtx;
3735 else if (GET_CODE (addr) == CONST)
3736 {
3737 if (GET_CODE (XEXP (addr, 0)) == PLUS
3738 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3739 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3740 {
3741 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3742 if ((v & -16) != 0)
3743 addr = gen_rtx_CONST (Pmode,
3744 gen_rtx_PLUS (Pmode,
3745 XEXP (XEXP (addr, 0), 0),
3746 GEN_INT (v & -16)));
3747 else
3748 addr = XEXP (XEXP (addr, 0), 0);
3749 p1_lo = GEN_INT (v & 15);
3750 }
3751 }
3752 else if (GET_CODE (addr) == CONST_INT)
3753 {
3754 p1_lo = GEN_INT (INTVAL (addr) & 15);
3755 addr = GEN_INT (INTVAL (addr) & -16);
3756 }
3757 }
3758
e04cf423 3759 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3760
644459d0 3761 scalar = store_with_one_insn_p (ops[0]);
3762 if (!scalar)
3763 {
3764 /* We could copy the flags from the ops[0] MEM to mem here,
3765 We don't because we want this load to be optimized away if
3766 possible, and copying the flags will prevent that in certain
3767 cases, e.g. consider the volatile flag. */
3768
e04cf423 3769 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3770 set_mem_alias_set (lmem, 0);
3771 emit_insn (gen_movti (reg, lmem));
644459d0 3772
aa71ecd4 3773 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 3774 p0 = stack_pointer_rtx;
3775 if (!p1_lo)
3776 p1_lo = const0_rtx;
3777
3778 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3779 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3780 }
3781 else if (reload_completed)
3782 {
3783 if (GET_CODE (ops[1]) == REG)
3784 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3785 else if (GET_CODE (ops[1]) == SUBREG)
3786 emit_move_insn (reg,
3787 gen_rtx_REG (GET_MODE (reg),
3788 REGNO (SUBREG_REG (ops[1]))));
3789 else
3790 abort ();
3791 }
3792 else
3793 {
3794 if (GET_CODE (ops[1]) == REG)
3795 emit_insn (gen_spu_convert (reg, ops[1]));
3796 else if (GET_CODE (ops[1]) == SUBREG)
3797 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3798 else
3799 abort ();
3800 }
3801
3802 if (GET_MODE_SIZE (mode) < 4 && scalar)
3803 emit_insn (gen_shlqby_ti
3804 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3805
644459d0 3806 smem = change_address (ops[0], TImode, addr);
3807 /* We can't use the previous alias set because the memory has changed
3808 size and can potentially overlap objects of other types. */
3809 set_mem_alias_set (smem, 0);
3810
e04cf423 3811 emit_insn (gen_movti (smem, reg));
644459d0 3812}
3813
3814/* Return TRUE if X is MEM which is a struct member reference
3815 and the member can safely be loaded and stored with a single
3816 instruction because it is padded. */
3817static int
3818mem_is_padded_component_ref (rtx x)
3819{
3820 tree t = MEM_EXPR (x);
3821 tree r;
3822 if (!t || TREE_CODE (t) != COMPONENT_REF)
3823 return 0;
3824 t = TREE_OPERAND (t, 1);
3825 if (!t || TREE_CODE (t) != FIELD_DECL
3826 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3827 return 0;
3828 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3829 r = DECL_FIELD_CONTEXT (t);
3830 if (!r || TREE_CODE (r) != RECORD_TYPE)
3831 return 0;
3832 /* Make sure they are the same mode */
3833 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3834 return 0;
3835 /* If there are no following fields then the field alignment assures
fa7637bd 3836 the structure is padded to the alignment which means this field is
3837 padded too. */
644459d0 3838 if (TREE_CHAIN (t) == 0)
3839 return 1;
3840 /* If the following field is also aligned then this field will be
3841 padded. */
3842 t = TREE_CHAIN (t);
3843 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3844 return 1;
3845 return 0;
3846}
3847
c7b91b14 3848/* Parse the -mfixed-range= option string. */
3849static void
3850fix_range (const char *const_str)
3851{
3852 int i, first, last;
3853 char *str, *dash, *comma;
3854
3855 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3856 REG2 are either register names or register numbers. The effect
3857 of this option is to mark the registers in the range from REG1 to
3858 REG2 as ``fixed'' so they won't be used by the compiler. */
3859
3860 i = strlen (const_str);
3861 str = (char *) alloca (i + 1);
3862 memcpy (str, const_str, i + 1);
3863
3864 while (1)
3865 {
3866 dash = strchr (str, '-');
3867 if (!dash)
3868 {
3869 warning (0, "value of -mfixed-range must have form REG1-REG2");
3870 return;
3871 }
3872 *dash = '\0';
3873 comma = strchr (dash + 1, ',');
3874 if (comma)
3875 *comma = '\0';
3876
3877 first = decode_reg_name (str);
3878 if (first < 0)
3879 {
3880 warning (0, "unknown register name: %s", str);
3881 return;
3882 }
3883
3884 last = decode_reg_name (dash + 1);
3885 if (last < 0)
3886 {
3887 warning (0, "unknown register name: %s", dash + 1);
3888 return;
3889 }
3890
3891 *dash = '-';
3892
3893 if (first > last)
3894 {
3895 warning (0, "%s-%s is an empty range", str, dash + 1);
3896 return;
3897 }
3898
3899 for (i = first; i <= last; ++i)
3900 fixed_regs[i] = call_used_regs[i] = 1;
3901
3902 if (!comma)
3903 break;
3904
3905 *comma = ',';
3906 str = comma + 1;
3907 }
3908}
3909
644459d0 3910int
3911spu_valid_move (rtx * ops)
3912{
3913 enum machine_mode mode = GET_MODE (ops[0]);
3914 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3915 return 0;
3916
3917 /* init_expr_once tries to recog against load and store insns to set
3918 the direct_load[] and direct_store[] arrays. We always want to
3919 consider those loads and stores valid. init_expr_once is called in
3920 the context of a dummy function which does not have a decl. */
3921 if (cfun->decl == 0)
3922 return 1;
3923
3924 /* Don't allows loads/stores which would require more than 1 insn.
3925 During and after reload we assume loads and stores only take 1
3926 insn. */
3927 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3928 {
3929 if (GET_CODE (ops[0]) == MEM
3930 && (GET_MODE_SIZE (mode) < 4
3931 || !(store_with_one_insn_p (ops[0])
3932 || mem_is_padded_component_ref (ops[0]))))
3933 return 0;
3934 if (GET_CODE (ops[1]) == MEM
3935 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3936 return 0;
3937 }
3938 return 1;
3939}
3940
3941/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3942 can be generated using the fsmbi instruction. */
3943int
3944fsmbi_const_p (rtx x)
3945{
dea01258 3946 if (CONSTANT_P (x))
3947 {
5df189be 3948 /* We can always choose TImode for CONST_INT because the high bits
dea01258 3949 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 3950 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 3951 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 3952 }
3953 return 0;
3954}
3955
3956/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3957 can be generated using the cbd, chd, cwd or cdd instruction. */
3958int
3959cpat_const_p (rtx x, enum machine_mode mode)
3960{
3961 if (CONSTANT_P (x))
3962 {
3963 enum immediate_class c = classify_immediate (x, mode);
3964 return c == IC_CPAT;
3965 }
3966 return 0;
3967}
644459d0 3968
dea01258 3969rtx
3970gen_cpat_const (rtx * ops)
3971{
3972 unsigned char dst[16];
3973 int i, offset, shift, isize;
3974 if (GET_CODE (ops[3]) != CONST_INT
3975 || GET_CODE (ops[2]) != CONST_INT
3976 || (GET_CODE (ops[1]) != CONST_INT
3977 && GET_CODE (ops[1]) != REG))
3978 return 0;
3979 if (GET_CODE (ops[1]) == REG
3980 && (!REG_POINTER (ops[1])
3981 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3982 return 0;
644459d0 3983
3984 for (i = 0; i < 16; i++)
dea01258 3985 dst[i] = i + 16;
3986 isize = INTVAL (ops[3]);
3987 if (isize == 1)
3988 shift = 3;
3989 else if (isize == 2)
3990 shift = 2;
3991 else
3992 shift = 0;
3993 offset = (INTVAL (ops[2]) +
3994 (GET_CODE (ops[1]) ==
3995 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3996 for (i = 0; i < isize; i++)
3997 dst[offset + i] = i + shift;
3998 return array_to_constant (TImode, dst);
644459d0 3999}
4000
4001/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4002 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4003 than 16 bytes, the value is repeated across the rest of the array. */
4004void
4005constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4006{
4007 HOST_WIDE_INT val;
4008 int i, j, first;
4009
4010 memset (arr, 0, 16);
4011 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4012 if (GET_CODE (x) == CONST_INT
4013 || (GET_CODE (x) == CONST_DOUBLE
4014 && (mode == SFmode || mode == DFmode)))
4015 {
4016 gcc_assert (mode != VOIDmode && mode != BLKmode);
4017
4018 if (GET_CODE (x) == CONST_DOUBLE)
4019 val = const_double_to_hwint (x);
4020 else
4021 val = INTVAL (x);
4022 first = GET_MODE_SIZE (mode) - 1;
4023 for (i = first; i >= 0; i--)
4024 {
4025 arr[i] = val & 0xff;
4026 val >>= 8;
4027 }
4028 /* Splat the constant across the whole array. */
4029 for (j = 0, i = first + 1; i < 16; i++)
4030 {
4031 arr[i] = arr[j];
4032 j = (j == first) ? 0 : j + 1;
4033 }
4034 }
4035 else if (GET_CODE (x) == CONST_DOUBLE)
4036 {
4037 val = CONST_DOUBLE_LOW (x);
4038 for (i = 15; i >= 8; i--)
4039 {
4040 arr[i] = val & 0xff;
4041 val >>= 8;
4042 }
4043 val = CONST_DOUBLE_HIGH (x);
4044 for (i = 7; i >= 0; i--)
4045 {
4046 arr[i] = val & 0xff;
4047 val >>= 8;
4048 }
4049 }
4050 else if (GET_CODE (x) == CONST_VECTOR)
4051 {
4052 int units;
4053 rtx elt;
4054 mode = GET_MODE_INNER (mode);
4055 units = CONST_VECTOR_NUNITS (x);
4056 for (i = 0; i < units; i++)
4057 {
4058 elt = CONST_VECTOR_ELT (x, i);
4059 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4060 {
4061 if (GET_CODE (elt) == CONST_DOUBLE)
4062 val = const_double_to_hwint (elt);
4063 else
4064 val = INTVAL (elt);
4065 first = GET_MODE_SIZE (mode) - 1;
4066 if (first + i * GET_MODE_SIZE (mode) > 16)
4067 abort ();
4068 for (j = first; j >= 0; j--)
4069 {
4070 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4071 val >>= 8;
4072 }
4073 }
4074 }
4075 }
4076 else
4077 gcc_unreachable();
4078}
4079
4080/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4081 smaller than 16 bytes, use the bytes that would represent that value
4082 in a register, e.g., for QImode return the value of arr[3]. */
4083rtx
4084array_to_constant (enum machine_mode mode, unsigned char arr[16])
4085{
4086 enum machine_mode inner_mode;
4087 rtvec v;
4088 int units, size, i, j, k;
4089 HOST_WIDE_INT val;
4090
4091 if (GET_MODE_CLASS (mode) == MODE_INT
4092 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4093 {
4094 j = GET_MODE_SIZE (mode);
4095 i = j < 4 ? 4 - j : 0;
4096 for (val = 0; i < j; i++)
4097 val = (val << 8) | arr[i];
4098 val = trunc_int_for_mode (val, mode);
4099 return GEN_INT (val);
4100 }
4101
4102 if (mode == TImode)
4103 {
4104 HOST_WIDE_INT high;
4105 for (i = high = 0; i < 8; i++)
4106 high = (high << 8) | arr[i];
4107 for (i = 8, val = 0; i < 16; i++)
4108 val = (val << 8) | arr[i];
4109 return immed_double_const (val, high, TImode);
4110 }
4111 if (mode == SFmode)
4112 {
4113 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4114 val = trunc_int_for_mode (val, SImode);
171b6d22 4115 return hwint_to_const_double (SFmode, val);
644459d0 4116 }
4117 if (mode == DFmode)
4118 {
4119 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4120 val <<= 32;
4121 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
171b6d22 4122 return hwint_to_const_double (DFmode, val);
644459d0 4123 }
4124
4125 if (!VECTOR_MODE_P (mode))
4126 abort ();
4127
4128 units = GET_MODE_NUNITS (mode);
4129 size = GET_MODE_UNIT_SIZE (mode);
4130 inner_mode = GET_MODE_INNER (mode);
4131 v = rtvec_alloc (units);
4132
4133 for (k = i = 0; i < units; ++i)
4134 {
4135 val = 0;
4136 for (j = 0; j < size; j++, k++)
4137 val = (val << 8) | arr[k];
4138
4139 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4140 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4141 else
4142 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4143 }
4144 if (k > 16)
4145 abort ();
4146
4147 return gen_rtx_CONST_VECTOR (mode, v);
4148}
4149
4150static void
4151reloc_diagnostic (rtx x)
4152{
4153 tree loc_decl, decl = 0;
4154 const char *msg;
4155 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4156 return;
4157
4158 if (GET_CODE (x) == SYMBOL_REF)
4159 decl = SYMBOL_REF_DECL (x);
4160 else if (GET_CODE (x) == CONST
4161 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4162 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4163
4164 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4165 if (decl && !DECL_P (decl))
4166 decl = 0;
4167
4168 /* We use last_assemble_variable_decl to get line information. It's
4169 not always going to be right and might not even be close, but will
4170 be right for the more common cases. */
5df189be 4171 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4172 loc_decl = decl;
4173 else
4174 loc_decl = last_assemble_variable_decl;
4175
4176 /* The decl could be a string constant. */
4177 if (decl && DECL_P (decl))
4178 msg = "%Jcreating run-time relocation for %qD";
4179 else
4180 msg = "creating run-time relocation";
4181
99369027 4182 if (TARGET_WARN_RELOC)
644459d0 4183 warning (0, msg, loc_decl, decl);
99369027 4184 else
4185 error (msg, loc_decl, decl);
644459d0 4186}
4187
4188/* Hook into assemble_integer so we can generate an error for run-time
4189 relocations. The SPU ABI disallows them. */
4190static bool
4191spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4192{
4193 /* By default run-time relocations aren't supported, but we allow them
4194 in case users support it in their own run-time loader. And we provide
4195 a warning for those users that don't. */
4196 if ((GET_CODE (x) == SYMBOL_REF)
4197 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4198 reloc_diagnostic (x);
4199
4200 return default_assemble_integer (x, size, aligned_p);
4201}
4202
4203static void
4204spu_asm_globalize_label (FILE * file, const char *name)
4205{
4206 fputs ("\t.global\t", file);
4207 assemble_name (file, name);
4208 fputs ("\n", file);
4209}
4210
4211static bool
f529eb25 4212spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4213 bool speed ATTRIBUTE_UNUSED)
644459d0 4214{
4215 enum machine_mode mode = GET_MODE (x);
4216 int cost = COSTS_N_INSNS (2);
4217
4218 /* Folding to a CONST_VECTOR will use extra space but there might
4219 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4220 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4221 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4222 because this cost will only be compared against a single insn.
4223 if (code == CONST_VECTOR)
4224 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4225 */
4226
4227 /* Use defaults for float operations. Not accurate but good enough. */
4228 if (mode == DFmode)
4229 {
4230 *total = COSTS_N_INSNS (13);
4231 return true;
4232 }
4233 if (mode == SFmode)
4234 {
4235 *total = COSTS_N_INSNS (6);
4236 return true;
4237 }
4238 switch (code)
4239 {
4240 case CONST_INT:
4241 if (satisfies_constraint_K (x))
4242 *total = 0;
4243 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4244 *total = COSTS_N_INSNS (1);
4245 else
4246 *total = COSTS_N_INSNS (3);
4247 return true;
4248
4249 case CONST:
4250 *total = COSTS_N_INSNS (3);
4251 return true;
4252
4253 case LABEL_REF:
4254 case SYMBOL_REF:
4255 *total = COSTS_N_INSNS (0);
4256 return true;
4257
4258 case CONST_DOUBLE:
4259 *total = COSTS_N_INSNS (5);
4260 return true;
4261
4262 case FLOAT_EXTEND:
4263 case FLOAT_TRUNCATE:
4264 case FLOAT:
4265 case UNSIGNED_FLOAT:
4266 case FIX:
4267 case UNSIGNED_FIX:
4268 *total = COSTS_N_INSNS (7);
4269 return true;
4270
4271 case PLUS:
4272 if (mode == TImode)
4273 {
4274 *total = COSTS_N_INSNS (9);
4275 return true;
4276 }
4277 break;
4278
4279 case MULT:
4280 cost =
4281 GET_CODE (XEXP (x, 0)) ==
4282 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4283 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4284 {
4285 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4286 {
4287 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4288 cost = COSTS_N_INSNS (14);
4289 if ((val & 0xffff) == 0)
4290 cost = COSTS_N_INSNS (9);
4291 else if (val > 0 && val < 0x10000)
4292 cost = COSTS_N_INSNS (11);
4293 }
4294 }
4295 *total = cost;
4296 return true;
4297 case DIV:
4298 case UDIV:
4299 case MOD:
4300 case UMOD:
4301 *total = COSTS_N_INSNS (20);
4302 return true;
4303 case ROTATE:
4304 case ROTATERT:
4305 case ASHIFT:
4306 case ASHIFTRT:
4307 case LSHIFTRT:
4308 *total = COSTS_N_INSNS (4);
4309 return true;
4310 case UNSPEC:
4311 if (XINT (x, 1) == UNSPEC_CONVERT)
4312 *total = COSTS_N_INSNS (0);
4313 else
4314 *total = COSTS_N_INSNS (4);
4315 return true;
4316 }
4317 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4318 if (GET_MODE_CLASS (mode) == MODE_INT
4319 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4320 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4321 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4322 *total = cost;
4323 return true;
4324}
4325
1bd43494 4326static enum machine_mode
4327spu_unwind_word_mode (void)
644459d0 4328{
1bd43494 4329 return SImode;
644459d0 4330}
4331
4332/* Decide whether we can make a sibling call to a function. DECL is the
4333 declaration of the function being targeted by the call and EXP is the
4334 CALL_EXPR representing the call. */
4335static bool
4336spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4337{
4338 return decl && !TARGET_LARGE_MEM;
4339}
4340
4341/* We need to correctly update the back chain pointer and the Available
4342 Stack Size (which is in the second slot of the sp register.) */
4343void
4344spu_allocate_stack (rtx op0, rtx op1)
4345{
4346 HOST_WIDE_INT v;
4347 rtx chain = gen_reg_rtx (V4SImode);
4348 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4349 rtx sp = gen_reg_rtx (V4SImode);
4350 rtx splatted = gen_reg_rtx (V4SImode);
4351 rtx pat = gen_reg_rtx (TImode);
4352
4353 /* copy the back chain so we can save it back again. */
4354 emit_move_insn (chain, stack_bot);
4355
4356 op1 = force_reg (SImode, op1);
4357
4358 v = 0x1020300010203ll;
4359 emit_move_insn (pat, immed_double_const (v, v, TImode));
4360 emit_insn (gen_shufb (splatted, op1, op1, pat));
4361
4362 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4363 emit_insn (gen_subv4si3 (sp, sp, splatted));
4364
4365 if (flag_stack_check)
4366 {
4367 rtx avail = gen_reg_rtx(SImode);
4368 rtx result = gen_reg_rtx(SImode);
4369 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4370 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4371 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4372 }
4373
4374 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4375
4376 emit_move_insn (stack_bot, chain);
4377
4378 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4379}
4380
4381void
4382spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4383{
4384 static unsigned char arr[16] =
4385 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4386 rtx temp = gen_reg_rtx (SImode);
4387 rtx temp2 = gen_reg_rtx (SImode);
4388 rtx temp3 = gen_reg_rtx (V4SImode);
4389 rtx temp4 = gen_reg_rtx (V4SImode);
4390 rtx pat = gen_reg_rtx (TImode);
4391 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4392
4393 /* Restore the backchain from the first word, sp from the second. */
4394 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4395 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4396
4397 emit_move_insn (pat, array_to_constant (TImode, arr));
4398
4399 /* Compute Available Stack Size for sp */
4400 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4401 emit_insn (gen_shufb (temp3, temp, temp, pat));
4402
4403 /* Compute Available Stack Size for back chain */
4404 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4405 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4406 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4407
4408 emit_insn (gen_addv4si3 (sp, sp, temp3));
4409 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4410}
4411
4412static void
4413spu_init_libfuncs (void)
4414{
4415 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4416 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4417 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4418 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4419 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4420 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4421 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4422 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4423 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4424 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4425 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4426
4427 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4428 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 4429
4430 set_optab_libfunc (smul_optab, TImode, "__multi3");
4431 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
4432 set_optab_libfunc (smod_optab, TImode, "__modti3");
4433 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
4434 set_optab_libfunc (umod_optab, TImode, "__umodti3");
4435 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 4436}
4437
4438/* Make a subreg, stripping any existing subreg. We could possibly just
4439 call simplify_subreg, but in this case we know what we want. */
4440rtx
4441spu_gen_subreg (enum machine_mode mode, rtx x)
4442{
4443 if (GET_CODE (x) == SUBREG)
4444 x = SUBREG_REG (x);
4445 if (GET_MODE (x) == mode)
4446 return x;
4447 return gen_rtx_SUBREG (mode, x, 0);
4448}
4449
4450static bool
fb80456a 4451spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 4452{
4453 return (TYPE_MODE (type) == BLKmode
4454 && ((type) == 0
4455 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4456 || int_size_in_bytes (type) >
4457 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4458}
4459\f
4460/* Create the built-in types and functions */
4461
4462struct spu_builtin_description spu_builtins[] = {
4463#define DEF_BUILTIN(fcode, icode, name, type, params) \
4464 {fcode, icode, name, type, params, NULL_TREE},
4465#include "spu-builtins.def"
4466#undef DEF_BUILTIN
4467};
4468
4469static void
4470spu_init_builtins (void)
4471{
4472 struct spu_builtin_description *d;
4473 unsigned int i;
4474
4475 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4476 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4477 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4478 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4479 V4SF_type_node = build_vector_type (float_type_node, 4);
4480 V2DF_type_node = build_vector_type (double_type_node, 2);
4481
4482 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4483 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4484 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4485 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4486
c4ecce0c 4487 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 4488
4489 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4490 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4491 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4492 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4493 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4494 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4495 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4496 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4497 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4498 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4499 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4500 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4501
4502 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4503 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4504 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4505 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4506 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4507 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4508 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4509 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4510
4511 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4512 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4513
4514 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4515
4516 spu_builtin_types[SPU_BTI_PTR] =
4517 build_pointer_type (build_qualified_type
4518 (void_type_node,
4519 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4520
4521 /* For each builtin we build a new prototype. The tree code will make
4522 sure nodes are shared. */
4523 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4524 {
4525 tree p;
4526 char name[64]; /* build_function will make a copy. */
4527 int parm;
4528
4529 if (d->name == 0)
4530 continue;
4531
5dfbd18f 4532 /* Find last parm. */
644459d0 4533 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 4534 ;
644459d0 4535
4536 p = void_list_node;
4537 while (parm > 1)
4538 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4539
4540 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4541
4542 sprintf (name, "__builtin_%s", d->name);
4543 d->fndecl =
4544 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4545 NULL, NULL_TREE);
a76866d3 4546 if (d->fcode == SPU_MASK_FOR_LOAD)
4547 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 4548
4549 /* These builtins don't throw. */
4550 TREE_NOTHROW (d->fndecl) = 1;
644459d0 4551 }
4552}
4553
cf31d486 4554void
4555spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4556{
4557 static unsigned char arr[16] =
4558 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4559
4560 rtx temp = gen_reg_rtx (Pmode);
4561 rtx temp2 = gen_reg_rtx (V4SImode);
4562 rtx temp3 = gen_reg_rtx (V4SImode);
4563 rtx pat = gen_reg_rtx (TImode);
4564 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4565
4566 emit_move_insn (pat, array_to_constant (TImode, arr));
4567
4568 /* Restore the sp. */
4569 emit_move_insn (temp, op1);
4570 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4571
4572 /* Compute available stack size for sp. */
4573 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4574 emit_insn (gen_shufb (temp3, temp, temp, pat));
4575
4576 emit_insn (gen_addv4si3 (sp, sp, temp3));
4577 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4578}
4579
644459d0 4580int
4581spu_safe_dma (HOST_WIDE_INT channel)
4582{
006e4b96 4583 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 4584}
4585
4586void
4587spu_builtin_splats (rtx ops[])
4588{
4589 enum machine_mode mode = GET_MODE (ops[0]);
4590 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4591 {
4592 unsigned char arr[16];
4593 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4594 emit_move_insn (ops[0], array_to_constant (mode, arr));
4595 }
644459d0 4596 else
4597 {
4598 rtx reg = gen_reg_rtx (TImode);
4599 rtx shuf;
4600 if (GET_CODE (ops[1]) != REG
4601 && GET_CODE (ops[1]) != SUBREG)
4602 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4603 switch (mode)
4604 {
4605 case V2DImode:
4606 case V2DFmode:
4607 shuf =
4608 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4609 TImode);
4610 break;
4611 case V4SImode:
4612 case V4SFmode:
4613 shuf =
4614 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4615 TImode);
4616 break;
4617 case V8HImode:
4618 shuf =
4619 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4620 TImode);
4621 break;
4622 case V16QImode:
4623 shuf =
4624 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4625 TImode);
4626 break;
4627 default:
4628 abort ();
4629 }
4630 emit_move_insn (reg, shuf);
4631 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4632 }
4633}
4634
4635void
4636spu_builtin_extract (rtx ops[])
4637{
4638 enum machine_mode mode;
4639 rtx rot, from, tmp;
4640
4641 mode = GET_MODE (ops[1]);
4642
4643 if (GET_CODE (ops[2]) == CONST_INT)
4644 {
4645 switch (mode)
4646 {
4647 case V16QImode:
4648 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4649 break;
4650 case V8HImode:
4651 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4652 break;
4653 case V4SFmode:
4654 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4655 break;
4656 case V4SImode:
4657 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4658 break;
4659 case V2DImode:
4660 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4661 break;
4662 case V2DFmode:
4663 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4664 break;
4665 default:
4666 abort ();
4667 }
4668 return;
4669 }
4670
4671 from = spu_gen_subreg (TImode, ops[1]);
4672 rot = gen_reg_rtx (TImode);
4673 tmp = gen_reg_rtx (SImode);
4674
4675 switch (mode)
4676 {
4677 case V16QImode:
4678 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4679 break;
4680 case V8HImode:
4681 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4682 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4683 break;
4684 case V4SFmode:
4685 case V4SImode:
4686 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4687 break;
4688 case V2DImode:
4689 case V2DFmode:
4690 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4691 break;
4692 default:
4693 abort ();
4694 }
4695 emit_insn (gen_rotqby_ti (rot, from, tmp));
4696
4697 emit_insn (gen_spu_convert (ops[0], rot));
4698}
4699
4700void
4701spu_builtin_insert (rtx ops[])
4702{
4703 enum machine_mode mode = GET_MODE (ops[0]);
4704 enum machine_mode imode = GET_MODE_INNER (mode);
4705 rtx mask = gen_reg_rtx (TImode);
4706 rtx offset;
4707
4708 if (GET_CODE (ops[3]) == CONST_INT)
4709 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4710 else
4711 {
4712 offset = gen_reg_rtx (SImode);
4713 emit_insn (gen_mulsi3
4714 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4715 }
4716 emit_insn (gen_cpat
4717 (mask, stack_pointer_rtx, offset,
4718 GEN_INT (GET_MODE_SIZE (imode))));
4719 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4720}
4721
4722void
4723spu_builtin_promote (rtx ops[])
4724{
4725 enum machine_mode mode, imode;
4726 rtx rot, from, offset;
4727 HOST_WIDE_INT pos;
4728
4729 mode = GET_MODE (ops[0]);
4730 imode = GET_MODE_INNER (mode);
4731
4732 from = gen_reg_rtx (TImode);
4733 rot = spu_gen_subreg (TImode, ops[0]);
4734
4735 emit_insn (gen_spu_convert (from, ops[1]));
4736
4737 if (GET_CODE (ops[2]) == CONST_INT)
4738 {
4739 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4740 if (GET_MODE_SIZE (imode) < 4)
4741 pos += 4 - GET_MODE_SIZE (imode);
4742 offset = GEN_INT (pos & 15);
4743 }
4744 else
4745 {
4746 offset = gen_reg_rtx (SImode);
4747 switch (mode)
4748 {
4749 case V16QImode:
4750 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4751 break;
4752 case V8HImode:
4753 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4754 emit_insn (gen_addsi3 (offset, offset, offset));
4755 break;
4756 case V4SFmode:
4757 case V4SImode:
4758 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4759 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4760 break;
4761 case V2DImode:
4762 case V2DFmode:
4763 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4764 break;
4765 default:
4766 abort ();
4767 }
4768 }
4769 emit_insn (gen_rotqby_ti (rot, from, offset));
4770}
4771
4772void
4773spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4774{
4775 rtx shuf = gen_reg_rtx (V4SImode);
4776 rtx insn = gen_reg_rtx (V4SImode);
4777 rtx shufc;
4778 rtx insnc;
4779 rtx mem;
4780
4781 fnaddr = force_reg (SImode, fnaddr);
4782 cxt = force_reg (SImode, cxt);
4783
4784 if (TARGET_LARGE_MEM)
4785 {
4786 rtx rotl = gen_reg_rtx (V4SImode);
4787 rtx mask = gen_reg_rtx (V4SImode);
4788 rtx bi = gen_reg_rtx (SImode);
4789 unsigned char shufa[16] = {
4790 2, 3, 0, 1, 18, 19, 16, 17,
4791 0, 1, 2, 3, 16, 17, 18, 19
4792 };
4793 unsigned char insna[16] = {
4794 0x41, 0, 0, 79,
4795 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4796 0x60, 0x80, 0, 79,
4797 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4798 };
4799
4800 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4801 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4802
4803 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 4804 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 4805 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4806 emit_insn (gen_selb (insn, insnc, rotl, mask));
4807
4808 mem = memory_address (Pmode, tramp);
4809 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4810
4811 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4812 mem = memory_address (Pmode, plus_constant (tramp, 16));
4813 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4814 }
4815 else
4816 {
4817 rtx scxt = gen_reg_rtx (SImode);
4818 rtx sfnaddr = gen_reg_rtx (SImode);
4819 unsigned char insna[16] = {
4820 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4821 0x30, 0, 0, 0,
4822 0, 0, 0, 0,
4823 0, 0, 0, 0
4824 };
4825
4826 shufc = gen_reg_rtx (TImode);
4827 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4828
4829 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4830 fits 18 bits and the last 4 are zeros. This will be true if
4831 the stack pointer is initialized to 0x3fff0 at program start,
4832 otherwise the ila instruction will be garbage. */
4833
4834 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4835 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4836 emit_insn (gen_cpat
4837 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4838 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4839 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4840
4841 mem = memory_address (Pmode, tramp);
4842 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4843
4844 }
4845 emit_insn (gen_sync ());
4846}
4847
4848void
4849spu_expand_sign_extend (rtx ops[])
4850{
4851 unsigned char arr[16];
4852 rtx pat = gen_reg_rtx (TImode);
4853 rtx sign, c;
4854 int i, last;
4855 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4856 if (GET_MODE (ops[1]) == QImode)
4857 {
4858 sign = gen_reg_rtx (HImode);
4859 emit_insn (gen_extendqihi2 (sign, ops[1]));
4860 for (i = 0; i < 16; i++)
4861 arr[i] = 0x12;
4862 arr[last] = 0x13;
4863 }
4864 else
4865 {
4866 for (i = 0; i < 16; i++)
4867 arr[i] = 0x10;
4868 switch (GET_MODE (ops[1]))
4869 {
4870 case HImode:
4871 sign = gen_reg_rtx (SImode);
4872 emit_insn (gen_extendhisi2 (sign, ops[1]));
4873 arr[last] = 0x03;
4874 arr[last - 1] = 0x02;
4875 break;
4876 case SImode:
4877 sign = gen_reg_rtx (SImode);
4878 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4879 for (i = 0; i < 4; i++)
4880 arr[last - i] = 3 - i;
4881 break;
4882 case DImode:
4883 sign = gen_reg_rtx (SImode);
4884 c = gen_reg_rtx (SImode);
4885 emit_insn (gen_spu_convert (c, ops[1]));
4886 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4887 for (i = 0; i < 8; i++)
4888 arr[last - i] = 7 - i;
4889 break;
4890 default:
4891 abort ();
4892 }
4893 }
4894 emit_move_insn (pat, array_to_constant (TImode, arr));
4895 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4896}
4897
4898/* expand vector initialization. If there are any constant parts,
4899 load constant parts first. Then load any non-constant parts. */
4900void
4901spu_expand_vector_init (rtx target, rtx vals)
4902{
4903 enum machine_mode mode = GET_MODE (target);
4904 int n_elts = GET_MODE_NUNITS (mode);
4905 int n_var = 0;
4906 bool all_same = true;
790c536c 4907 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 4908 int i;
4909
4910 first = XVECEXP (vals, 0, 0);
4911 for (i = 0; i < n_elts; ++i)
4912 {
4913 x = XVECEXP (vals, 0, i);
e442af0b 4914 if (!(CONST_INT_P (x)
4915 || GET_CODE (x) == CONST_DOUBLE
4916 || GET_CODE (x) == CONST_FIXED))
644459d0 4917 ++n_var;
4918 else
4919 {
4920 if (first_constant == NULL_RTX)
4921 first_constant = x;
4922 }
4923 if (i > 0 && !rtx_equal_p (x, first))
4924 all_same = false;
4925 }
4926
4927 /* if all elements are the same, use splats to repeat elements */
4928 if (all_same)
4929 {
4930 if (!CONSTANT_P (first)
4931 && !register_operand (first, GET_MODE (x)))
4932 first = force_reg (GET_MODE (first), first);
4933 emit_insn (gen_spu_splats (target, first));
4934 return;
4935 }
4936
4937 /* load constant parts */
4938 if (n_var != n_elts)
4939 {
4940 if (n_var == 0)
4941 {
4942 emit_move_insn (target,
4943 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4944 }
4945 else
4946 {
4947 rtx constant_parts_rtx = copy_rtx (vals);
4948
4949 gcc_assert (first_constant != NULL_RTX);
4950 /* fill empty slots with the first constant, this increases
4951 our chance of using splats in the recursive call below. */
4952 for (i = 0; i < n_elts; ++i)
e442af0b 4953 {
4954 x = XVECEXP (constant_parts_rtx, 0, i);
4955 if (!(CONST_INT_P (x)
4956 || GET_CODE (x) == CONST_DOUBLE
4957 || GET_CODE (x) == CONST_FIXED))
4958 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4959 }
644459d0 4960
4961 spu_expand_vector_init (target, constant_parts_rtx);
4962 }
4963 }
4964
4965 /* load variable parts */
4966 if (n_var != 0)
4967 {
4968 rtx insert_operands[4];
4969
4970 insert_operands[0] = target;
4971 insert_operands[2] = target;
4972 for (i = 0; i < n_elts; ++i)
4973 {
4974 x = XVECEXP (vals, 0, i);
e442af0b 4975 if (!(CONST_INT_P (x)
4976 || GET_CODE (x) == CONST_DOUBLE
4977 || GET_CODE (x) == CONST_FIXED))
644459d0 4978 {
4979 if (!register_operand (x, GET_MODE (x)))
4980 x = force_reg (GET_MODE (x), x);
4981 insert_operands[1] = x;
4982 insert_operands[3] = GEN_INT (i);
4983 spu_builtin_insert (insert_operands);
4984 }
4985 }
4986 }
4987}
6352eedf 4988
5474166e 4989/* Return insn index for the vector compare instruction for given CODE,
4990 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4991
4992static int
4993get_vec_cmp_insn (enum rtx_code code,
4994 enum machine_mode dest_mode,
4995 enum machine_mode op_mode)
4996
4997{
4998 switch (code)
4999 {
5000 case EQ:
5001 if (dest_mode == V16QImode && op_mode == V16QImode)
5002 return CODE_FOR_ceq_v16qi;
5003 if (dest_mode == V8HImode && op_mode == V8HImode)
5004 return CODE_FOR_ceq_v8hi;
5005 if (dest_mode == V4SImode && op_mode == V4SImode)
5006 return CODE_FOR_ceq_v4si;
5007 if (dest_mode == V4SImode && op_mode == V4SFmode)
5008 return CODE_FOR_ceq_v4sf;
5009 if (dest_mode == V2DImode && op_mode == V2DFmode)
5010 return CODE_FOR_ceq_v2df;
5011 break;
5012 case GT:
5013 if (dest_mode == V16QImode && op_mode == V16QImode)
5014 return CODE_FOR_cgt_v16qi;
5015 if (dest_mode == V8HImode && op_mode == V8HImode)
5016 return CODE_FOR_cgt_v8hi;
5017 if (dest_mode == V4SImode && op_mode == V4SImode)
5018 return CODE_FOR_cgt_v4si;
5019 if (dest_mode == V4SImode && op_mode == V4SFmode)
5020 return CODE_FOR_cgt_v4sf;
5021 if (dest_mode == V2DImode && op_mode == V2DFmode)
5022 return CODE_FOR_cgt_v2df;
5023 break;
5024 case GTU:
5025 if (dest_mode == V16QImode && op_mode == V16QImode)
5026 return CODE_FOR_clgt_v16qi;
5027 if (dest_mode == V8HImode && op_mode == V8HImode)
5028 return CODE_FOR_clgt_v8hi;
5029 if (dest_mode == V4SImode && op_mode == V4SImode)
5030 return CODE_FOR_clgt_v4si;
5031 break;
5032 default:
5033 break;
5034 }
5035 return -1;
5036}
5037
5038/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5039 DMODE is expected destination mode. This is a recursive function. */
5040
5041static rtx
5042spu_emit_vector_compare (enum rtx_code rcode,
5043 rtx op0, rtx op1,
5044 enum machine_mode dmode)
5045{
5046 int vec_cmp_insn;
5047 rtx mask;
5048 enum machine_mode dest_mode;
5049 enum machine_mode op_mode = GET_MODE (op1);
5050
5051 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5052
5053 /* Floating point vector compare instructions uses destination V4SImode.
5054 Double floating point vector compare instructions uses destination V2DImode.
5055 Move destination to appropriate mode later. */
5056 if (dmode == V4SFmode)
5057 dest_mode = V4SImode;
5058 else if (dmode == V2DFmode)
5059 dest_mode = V2DImode;
5060 else
5061 dest_mode = dmode;
5062
5063 mask = gen_reg_rtx (dest_mode);
5064 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5065
5066 if (vec_cmp_insn == -1)
5067 {
5068 bool swap_operands = false;
5069 bool try_again = false;
5070 switch (rcode)
5071 {
5072 case LT:
5073 rcode = GT;
5074 swap_operands = true;
5075 try_again = true;
5076 break;
5077 case LTU:
5078 rcode = GTU;
5079 swap_operands = true;
5080 try_again = true;
5081 break;
5082 case NE:
5083 /* Treat A != B as ~(A==B). */
5084 {
5085 enum insn_code nor_code;
5086 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5087 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5088 gcc_assert (nor_code != CODE_FOR_nothing);
5089 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5090 if (dmode != dest_mode)
5091 {
5092 rtx temp = gen_reg_rtx (dest_mode);
5093 convert_move (temp, mask, 0);
5094 return temp;
5095 }
5096 return mask;
5097 }
5098 break;
5099 case GE:
5100 case GEU:
5101 case LE:
5102 case LEU:
5103 /* Try GT/GTU/LT/LTU OR EQ */
5104 {
5105 rtx c_rtx, eq_rtx;
5106 enum insn_code ior_code;
5107 enum rtx_code new_code;
5108
5109 switch (rcode)
5110 {
5111 case GE: new_code = GT; break;
5112 case GEU: new_code = GTU; break;
5113 case LE: new_code = LT; break;
5114 case LEU: new_code = LTU; break;
5115 default:
5116 gcc_unreachable ();
5117 }
5118
5119 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5120 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5121
99bdde56 5122 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5123 gcc_assert (ior_code != CODE_FOR_nothing);
5124 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5125 if (dmode != dest_mode)
5126 {
5127 rtx temp = gen_reg_rtx (dest_mode);
5128 convert_move (temp, mask, 0);
5129 return temp;
5130 }
5131 return mask;
5132 }
5133 break;
5134 default:
5135 gcc_unreachable ();
5136 }
5137
5138 /* You only get two chances. */
5139 if (try_again)
5140 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5141
5142 gcc_assert (vec_cmp_insn != -1);
5143
5144 if (swap_operands)
5145 {
5146 rtx tmp;
5147 tmp = op0;
5148 op0 = op1;
5149 op1 = tmp;
5150 }
5151 }
5152
5153 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5154 if (dmode != dest_mode)
5155 {
5156 rtx temp = gen_reg_rtx (dest_mode);
5157 convert_move (temp, mask, 0);
5158 return temp;
5159 }
5160 return mask;
5161}
5162
5163
5164/* Emit vector conditional expression.
5165 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5166 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5167
5168int
5169spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5170 rtx cond, rtx cc_op0, rtx cc_op1)
5171{
5172 enum machine_mode dest_mode = GET_MODE (dest);
5173 enum rtx_code rcode = GET_CODE (cond);
5174 rtx mask;
5175
5176 /* Get the vector mask for the given relational operations. */
5177 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5178
5179 emit_insn(gen_selb (dest, op2, op1, mask));
5180
5181 return 1;
5182}
5183
6352eedf 5184static rtx
5185spu_force_reg (enum machine_mode mode, rtx op)
5186{
5187 rtx x, r;
5188 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5189 {
5190 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5191 || GET_MODE (op) == BLKmode)
5192 return force_reg (mode, convert_to_mode (mode, op, 0));
5193 abort ();
5194 }
5195
5196 r = force_reg (GET_MODE (op), op);
5197 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5198 {
5199 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5200 if (x)
5201 return x;
5202 }
5203
5204 x = gen_reg_rtx (mode);
5205 emit_insn (gen_spu_convert (x, r));
5206 return x;
5207}
5208
5209static void
5210spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5211{
5212 HOST_WIDE_INT v = 0;
5213 int lsbits;
5214 /* Check the range of immediate operands. */
5215 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5216 {
5217 int range = p - SPU_BTI_7;
5df189be 5218
5219 if (!CONSTANT_P (op))
6352eedf 5220 error ("%s expects an integer literal in the range [%d, %d].",
5221 d->name,
5222 spu_builtin_range[range].low, spu_builtin_range[range].high);
5223
5224 if (GET_CODE (op) == CONST
5225 && (GET_CODE (XEXP (op, 0)) == PLUS
5226 || GET_CODE (XEXP (op, 0)) == MINUS))
5227 {
5228 v = INTVAL (XEXP (XEXP (op, 0), 1));
5229 op = XEXP (XEXP (op, 0), 0);
5230 }
5231 else if (GET_CODE (op) == CONST_INT)
5232 v = INTVAL (op);
5df189be 5233 else if (GET_CODE (op) == CONST_VECTOR
5234 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5235 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5236
5237 /* The default for v is 0 which is valid in every range. */
5238 if (v < spu_builtin_range[range].low
5239 || v > spu_builtin_range[range].high)
5240 error ("%s expects an integer literal in the range [%d, %d]. ("
5241 HOST_WIDE_INT_PRINT_DEC ")",
5242 d->name,
5243 spu_builtin_range[range].low, spu_builtin_range[range].high,
5244 v);
6352eedf 5245
5246 switch (p)
5247 {
5248 case SPU_BTI_S10_4:
5249 lsbits = 4;
5250 break;
5251 case SPU_BTI_U16_2:
5252 /* This is only used in lqa, and stqa. Even though the insns
5253 encode 16 bits of the address (all but the 2 least
5254 significant), only 14 bits are used because it is masked to
5255 be 16 byte aligned. */
5256 lsbits = 4;
5257 break;
5258 case SPU_BTI_S16_2:
5259 /* This is used for lqr and stqr. */
5260 lsbits = 2;
5261 break;
5262 default:
5263 lsbits = 0;
5264 }
5265
5266 if (GET_CODE (op) == LABEL_REF
5267 || (GET_CODE (op) == SYMBOL_REF
5268 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5269 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5270 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5271 d->name);
5272 }
5273}
5274
5275
5276static void
5df189be 5277expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5278 rtx target, rtx ops[])
5279{
5280 enum insn_code icode = d->icode;
5df189be 5281 int i = 0, a;
6352eedf 5282
5283 /* Expand the arguments into rtl. */
5284
5285 if (d->parm[0] != SPU_BTI_VOID)
5286 ops[i++] = target;
5287
5df189be 5288 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
6352eedf 5289 {
5df189be 5290 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5291 if (arg == 0)
5292 abort ();
5293 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5294 }
5295}
5296
5297static rtx
5298spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5299 tree exp, rtx target)
6352eedf 5300{
5301 rtx pat;
5302 rtx ops[8];
5303 enum insn_code icode = d->icode;
5304 enum machine_mode mode, tmode;
5305 int i, p;
5306 tree return_type;
5307
5308 /* Set up ops[] with values from arglist. */
5df189be 5309 expand_builtin_args (d, exp, target, ops);
6352eedf 5310
5311 /* Handle the target operand which must be operand 0. */
5312 i = 0;
5313 if (d->parm[0] != SPU_BTI_VOID)
5314 {
5315
5316 /* We prefer the mode specified for the match_operand otherwise
5317 use the mode from the builtin function prototype. */
5318 tmode = insn_data[d->icode].operand[0].mode;
5319 if (tmode == VOIDmode)
5320 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5321
5322 /* Try to use target because not using it can lead to extra copies
5323 and when we are using all of the registers extra copies leads
5324 to extra spills. */
5325 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5326 ops[0] = target;
5327 else
5328 target = ops[0] = gen_reg_rtx (tmode);
5329
5330 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5331 abort ();
5332
5333 i++;
5334 }
5335
a76866d3 5336 if (d->fcode == SPU_MASK_FOR_LOAD)
5337 {
5338 enum machine_mode mode = insn_data[icode].operand[1].mode;
5339 tree arg;
5340 rtx addr, op, pat;
5341
5342 /* get addr */
5df189be 5343 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5344 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5345 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5346 addr = memory_address (mode, op);
5347
5348 /* negate addr */
5349 op = gen_reg_rtx (GET_MODE (addr));
5350 emit_insn (gen_rtx_SET (VOIDmode, op,
5351 gen_rtx_NEG (GET_MODE (addr), addr)));
5352 op = gen_rtx_MEM (mode, op);
5353
5354 pat = GEN_FCN (icode) (target, op);
5355 if (!pat)
5356 return 0;
5357 emit_insn (pat);
5358 return target;
5359 }
5360
6352eedf 5361 /* Ignore align_hint, but still expand it's args in case they have
5362 side effects. */
5363 if (icode == CODE_FOR_spu_align_hint)
5364 return 0;
5365
5366 /* Handle the rest of the operands. */
5367 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5368 {
5369 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5370 mode = insn_data[d->icode].operand[i].mode;
5371 else
5372 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5373
5374 /* mode can be VOIDmode here for labels */
5375
5376 /* For specific intrinsics with an immediate operand, e.g.,
5377 si_ai(), we sometimes need to convert the scalar argument to a
5378 vector argument by splatting the scalar. */
5379 if (VECTOR_MODE_P (mode)
5380 && (GET_CODE (ops[i]) == CONST_INT
5381 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5382 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5383 {
5384 if (GET_CODE (ops[i]) == CONST_INT)
5385 ops[i] = spu_const (mode, INTVAL (ops[i]));
5386 else
5387 {
5388 rtx reg = gen_reg_rtx (mode);
5389 enum machine_mode imode = GET_MODE_INNER (mode);
5390 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5391 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5392 if (imode != GET_MODE (ops[i]))
5393 ops[i] = convert_to_mode (imode, ops[i],
5394 TYPE_UNSIGNED (spu_builtin_types
5395 [d->parm[i]]));
5396 emit_insn (gen_spu_splats (reg, ops[i]));
5397 ops[i] = reg;
5398 }
5399 }
5400
5df189be 5401 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5402
6352eedf 5403 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5404 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 5405 }
5406
5407 switch (insn_data[icode].n_operands)
5408 {
5409 case 0:
5410 pat = GEN_FCN (icode) (0);
5411 break;
5412 case 1:
5413 pat = GEN_FCN (icode) (ops[0]);
5414 break;
5415 case 2:
5416 pat = GEN_FCN (icode) (ops[0], ops[1]);
5417 break;
5418 case 3:
5419 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5420 break;
5421 case 4:
5422 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5423 break;
5424 case 5:
5425 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5426 break;
5427 case 6:
5428 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5429 break;
5430 default:
5431 abort ();
5432 }
5433
5434 if (!pat)
5435 abort ();
5436
5437 if (d->type == B_CALL || d->type == B_BISLED)
5438 emit_call_insn (pat);
5439 else if (d->type == B_JUMP)
5440 {
5441 emit_jump_insn (pat);
5442 emit_barrier ();
5443 }
5444 else
5445 emit_insn (pat);
5446
5447 return_type = spu_builtin_types[d->parm[0]];
5448 if (d->parm[0] != SPU_BTI_VOID
5449 && GET_MODE (target) != TYPE_MODE (return_type))
5450 {
5451 /* target is the return value. It should always be the mode of
5452 the builtin function prototype. */
5453 target = spu_force_reg (TYPE_MODE (return_type), target);
5454 }
5455
5456 return target;
5457}
5458
5459rtx
5460spu_expand_builtin (tree exp,
5461 rtx target,
5462 rtx subtarget ATTRIBUTE_UNUSED,
5463 enum machine_mode mode ATTRIBUTE_UNUSED,
5464 int ignore ATTRIBUTE_UNUSED)
5465{
5df189be 5466 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 5467 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 5468 struct spu_builtin_description *d;
5469
5470 if (fcode < NUM_SPU_BUILTINS)
5471 {
5472 d = &spu_builtins[fcode];
5473
5df189be 5474 return spu_expand_builtin_1 (d, exp, target);
6352eedf 5475 }
5476 abort ();
5477}
5478
e99f512d 5479/* Implement targetm.vectorize.builtin_mul_widen_even. */
5480static tree
5481spu_builtin_mul_widen_even (tree type)
5482{
e99f512d 5483 switch (TYPE_MODE (type))
5484 {
5485 case V8HImode:
5486 if (TYPE_UNSIGNED (type))
5487 return spu_builtins[SPU_MULE_0].fndecl;
5488 else
5489 return spu_builtins[SPU_MULE_1].fndecl;
5490 break;
5491 default:
5492 return NULL_TREE;
5493 }
5494}
5495
5496/* Implement targetm.vectorize.builtin_mul_widen_odd. */
5497static tree
5498spu_builtin_mul_widen_odd (tree type)
5499{
5500 switch (TYPE_MODE (type))
5501 {
5502 case V8HImode:
5503 if (TYPE_UNSIGNED (type))
5504 return spu_builtins[SPU_MULO_1].fndecl;
5505 else
5506 return spu_builtins[SPU_MULO_0].fndecl;
5507 break;
5508 default:
5509 return NULL_TREE;
5510 }
5511}
5512
a76866d3 5513/* Implement targetm.vectorize.builtin_mask_for_load. */
5514static tree
5515spu_builtin_mask_for_load (void)
5516{
5517 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5518 gcc_assert (d);
5519 return d->fndecl;
5520}
5df189be 5521
a28df51d 5522/* Implement targetm.vectorize.builtin_vectorization_cost. */
5523static int
5524spu_builtin_vectorization_cost (bool runtime_test)
5525{
5526 /* If the branch of the runtime test is taken - i.e. - the vectorized
5527 version is skipped - this incurs a misprediction cost (because the
5528 vectorized version is expected to be the fall-through). So we subtract
becfaa62 5529 the latency of a mispredicted branch from the costs that are incurred
a28df51d 5530 when the vectorized version is executed. */
5531 if (runtime_test)
5532 return -19;
5533 else
5534 return 0;
5535}
5536
0e87db76 5537/* Return true iff, data reference of TYPE can reach vector alignment (16)
5538 after applying N number of iterations. This routine does not determine
5539 how may iterations are required to reach desired alignment. */
5540
5541static bool
a9f1838b 5542spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 5543{
5544 if (is_packed)
5545 return false;
5546
5547 /* All other types are naturally aligned. */
5548 return true;
5549}
5550
a0515226 5551/* Implement targetm.vectorize.builtin_vec_perm. */
5552tree
5553spu_builtin_vec_perm (tree type, tree *mask_element_type)
5554{
5555 struct spu_builtin_description *d;
5556
5557 *mask_element_type = unsigned_char_type_node;
5558
5559 switch (TYPE_MODE (type))
5560 {
5561 case V16QImode:
5562 if (TYPE_UNSIGNED (type))
5563 d = &spu_builtins[SPU_SHUFFLE_0];
5564 else
5565 d = &spu_builtins[SPU_SHUFFLE_1];
5566 break;
5567
5568 case V8HImode:
5569 if (TYPE_UNSIGNED (type))
5570 d = &spu_builtins[SPU_SHUFFLE_2];
5571 else
5572 d = &spu_builtins[SPU_SHUFFLE_3];
5573 break;
5574
5575 case V4SImode:
5576 if (TYPE_UNSIGNED (type))
5577 d = &spu_builtins[SPU_SHUFFLE_4];
5578 else
5579 d = &spu_builtins[SPU_SHUFFLE_5];
5580 break;
5581
5582 case V2DImode:
5583 if (TYPE_UNSIGNED (type))
5584 d = &spu_builtins[SPU_SHUFFLE_6];
5585 else
5586 d = &spu_builtins[SPU_SHUFFLE_7];
5587 break;
5588
5589 case V4SFmode:
5590 d = &spu_builtins[SPU_SHUFFLE_8];
5591 break;
5592
5593 case V2DFmode:
5594 d = &spu_builtins[SPU_SHUFFLE_9];
5595 break;
5596
5597 default:
5598 return NULL_TREE;
5599 }
5600
5601 gcc_assert (d);
5602 return d->fndecl;
5603}
5604
d52fd16a 5605/* Count the total number of instructions in each pipe and return the
5606 maximum, which is used as the Minimum Iteration Interval (MII)
5607 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5608 -2 are instructions that can go in pipe0 or pipe1. */
5609static int
5610spu_sms_res_mii (struct ddg *g)
5611{
5612 int i;
5613 unsigned t[4] = {0, 0, 0, 0};
5614
5615 for (i = 0; i < g->num_nodes; i++)
5616 {
5617 rtx insn = g->nodes[i].insn;
5618 int p = get_pipe (insn) + 2;
5619
5620 assert (p >= 0);
5621 assert (p < 4);
5622
5623 t[p]++;
5624 if (dump_file && INSN_P (insn))
5625 fprintf (dump_file, "i%d %s %d %d\n",
5626 INSN_UID (insn),
5627 insn_data[INSN_CODE(insn)].name,
5628 p, t[p]);
5629 }
5630 if (dump_file)
5631 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5632
5633 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5634}
5635
5636
5df189be 5637void
5638spu_init_expanders (void)
5639{
5640 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5641 * frame_pointer_needed is true. We don't know that until we're
5642 * expanding the prologue. */
5643 if (cfun)
5644 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 5645}
5646
5647static enum machine_mode
5648spu_libgcc_cmp_return_mode (void)
5649{
5650
5651/* For SPU word mode is TI mode so it is better to use SImode
5652 for compare returns. */
5653 return SImode;
5654}
5655
5656static enum machine_mode
5657spu_libgcc_shift_count_mode (void)
5658{
5659/* For SPU word mode is TI mode so it is better to use SImode
5660 for shift counts. */
5661 return SImode;
5662}