]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* tree-ssa-loop.c (gate_tree_parallelize_loops): Return true when
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cf31d486 1/* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
53#include "tree-gimple.h"
54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
59struct spu_builtin_range
60{
61 int low, high;
62};
63
64static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77};
78
644459d0 79\f
80/* Target specific attribute specifications. */
81char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83/* Prototypes and external defs. */
84static void spu_init_builtins (void);
85static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88static rtx get_pic_reg (void);
89static int need_to_save_reg (int regno, int saving);
90static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94static void emit_nop_for_insn (rtx insn);
95static bool insn_clobbers_hbr (rtx insn);
96static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
5474166e 98static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
644459d0 100static rtx get_branch_target (rtx branch);
101static void insert_branch_hints (void);
102static void insert_nops (void);
103static void spu_machine_dependent_reorg (void);
104static int spu_sched_issue_rate (void);
105static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
106 int can_issue_more);
107static int get_pipe (rtx insn);
108static int spu_sched_adjust_priority (rtx insn, int pri);
109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
111 int flags,
112 unsigned char *no_add_attrs);
113static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static int spu_naked_function_p (tree func);
fb80456a 117static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
644459d0 119static tree spu_build_builtin_va_list (void);
120static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
121 tree * post_p);
122static int regno_aligned_for_load (int regno);
123static int store_with_one_insn_p (rtx mem);
124static int reg_align (rtx reg);
125static int mem_is_padded_component_ref (rtx x);
126static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
127static void spu_asm_globalize_label (FILE * file, const char *name);
128static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
129 int *total);
130static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
131static void spu_init_libfuncs (void);
fb80456a 132static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 133static void fix_range (const char *);
69ced2d6 134static void spu_encode_section_info (tree, rtx, int);
e99f512d 135static tree spu_builtin_mul_widen_even (tree);
136static tree spu_builtin_mul_widen_odd (tree);
a76866d3 137static tree spu_builtin_mask_for_load (void);
a28df51d 138static int spu_builtin_vectorization_cost (bool);
a9f1838b 139static bool spu_vector_alignment_reachable (const_tree, bool);
d52fd16a 140static int spu_sms_res_mii (struct ddg *g);
644459d0 141
142extern const char *reg_names[];
143rtx spu_compare_op0, spu_compare_op1;
144
5474166e 145/* Which instruction set architecture to use. */
146int spu_arch;
147/* Which cpu are we tuning for. */
148int spu_tune;
149
644459d0 150enum spu_immediate {
151 SPU_NONE,
152 SPU_IL,
153 SPU_ILA,
154 SPU_ILH,
155 SPU_ILHU,
156 SPU_ORI,
157 SPU_ORHI,
158 SPU_ORBI,
99369027 159 SPU_IOHL
644459d0 160};
dea01258 161enum immediate_class
162{
163 IC_POOL, /* constant pool */
164 IC_IL1, /* one il* instruction */
165 IC_IL2, /* both ilhu and iohl instructions */
166 IC_IL1s, /* one il* instruction */
167 IC_IL2s, /* both ilhu and iohl instructions */
168 IC_FSMBI, /* the fsmbi instruction */
169 IC_CPAT, /* one of the c*d instructions */
5df189be 170 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 171};
644459d0 172
173static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
174static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 175static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
176static enum immediate_class classify_immediate (rtx op,
177 enum machine_mode mode);
644459d0 178
ea32e033 179static enum machine_mode
180spu_libgcc_cmp_return_mode (void);
181
182static enum machine_mode
183spu_libgcc_shift_count_mode (void);
184
644459d0 185/* Built in types. */
186tree spu_builtin_types[SPU_BTI_MAX];
187\f
188/* TARGET overrides. */
189
190#undef TARGET_INIT_BUILTINS
191#define TARGET_INIT_BUILTINS spu_init_builtins
192
644459d0 193#undef TARGET_EXPAND_BUILTIN
194#define TARGET_EXPAND_BUILTIN spu_expand_builtin
195
196#undef TARGET_EH_RETURN_FILTER_MODE
197#define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
198
199/* The .8byte directive doesn't seem to work well for a 32 bit
200 architecture. */
201#undef TARGET_ASM_UNALIGNED_DI_OP
202#define TARGET_ASM_UNALIGNED_DI_OP NULL
203
204#undef TARGET_RTX_COSTS
205#define TARGET_RTX_COSTS spu_rtx_costs
206
207#undef TARGET_ADDRESS_COST
208#define TARGET_ADDRESS_COST hook_int_rtx_0
209
210#undef TARGET_SCHED_ISSUE_RATE
211#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
212
213#undef TARGET_SCHED_VARIABLE_ISSUE
214#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
215
216#undef TARGET_SCHED_ADJUST_PRIORITY
217#define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
218
219#undef TARGET_SCHED_ADJUST_COST
220#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
221
222const struct attribute_spec spu_attribute_table[];
223#undef TARGET_ATTRIBUTE_TABLE
224#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
225
226#undef TARGET_ASM_INTEGER
227#define TARGET_ASM_INTEGER spu_assemble_integer
228
229#undef TARGET_SCALAR_MODE_SUPPORTED_P
230#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
231
232#undef TARGET_VECTOR_MODE_SUPPORTED_P
233#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
234
235#undef TARGET_FUNCTION_OK_FOR_SIBCALL
236#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
237
238#undef TARGET_ASM_GLOBALIZE_LABEL
239#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
240
241#undef TARGET_PASS_BY_REFERENCE
242#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
243
244#undef TARGET_MUST_PASS_IN_STACK
245#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
246
247#undef TARGET_BUILD_BUILTIN_VA_LIST
248#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
249
250#undef TARGET_SETUP_INCOMING_VARARGS
251#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
252
253#undef TARGET_MACHINE_DEPENDENT_REORG
254#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
255
256#undef TARGET_GIMPLIFY_VA_ARG_EXPR
257#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
258
259#undef TARGET_DEFAULT_TARGET_FLAGS
260#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
261
262#undef TARGET_INIT_LIBFUNCS
263#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
264
265#undef TARGET_RETURN_IN_MEMORY
266#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
267
69ced2d6 268#undef TARGET_ENCODE_SECTION_INFO
269#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
270
e99f512d 271#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
272#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
273
274#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
275#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
276
a76866d3 277#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
278#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
279
a28df51d 280#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
281#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
282
0e87db76 283#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
284#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
285
ea32e033 286#undef TARGET_LIBGCC_CMP_RETURN_MODE
287#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
288
289#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
290#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
291
d52fd16a 292#undef TARGET_SCHED_SMS_RES_MII
293#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
294
644459d0 295struct gcc_target targetm = TARGET_INITIALIZER;
296
5df189be 297void
298spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
299{
5df189be 300 /* Override some of the default param values. With so many registers
301 larger values are better for these params. */
302 MAX_PENDING_LIST_LENGTH = 128;
303
304 /* With so many registers this is better on by default. */
305 flag_rename_registers = 1;
306}
307
644459d0 308/* Sometimes certain combinations of command options do not make sense
309 on a particular target machine. You can define a macro
310 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
311 executed once just after all the command options have been parsed. */
312void
313spu_override_options (void)
314{
14d408d9 315 /* Small loops will be unpeeled at -O3. For SPU it is more important
316 to keep code small by default. */
317 if (!flag_unroll_loops && !flag_peel_loops
318 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
319 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
320
644459d0 321 flag_omit_frame_pointer = 1;
322
323 if (align_functions < 8)
324 align_functions = 8;
c7b91b14 325
326 if (spu_fixed_range_string)
327 fix_range (spu_fixed_range_string);
5474166e 328
329 /* Determine processor architectural level. */
330 if (spu_arch_string)
331 {
332 if (strcmp (&spu_arch_string[0], "cell") == 0)
333 spu_arch = PROCESSOR_CELL;
334 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
335 spu_arch = PROCESSOR_CELLEDP;
336 else
337 error ("Unknown architecture '%s'", &spu_arch_string[0]);
338 }
339
340 /* Determine processor to tune for. */
341 if (spu_tune_string)
342 {
343 if (strcmp (&spu_tune_string[0], "cell") == 0)
344 spu_tune = PROCESSOR_CELL;
345 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
346 spu_tune = PROCESSOR_CELLEDP;
347 else
348 error ("Unknown architecture '%s'", &spu_tune_string[0]);
349 }
644459d0 350}
351\f
352/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
353 struct attribute_spec.handler. */
354
355/* Table of machine attributes. */
356const struct attribute_spec spu_attribute_table[] =
357{
358 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
359 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
360 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
361 { NULL, 0, 0, false, false, false, NULL }
362};
363
364/* True if MODE is valid for the target. By "valid", we mean able to
365 be manipulated in non-trivial ways. In particular, this means all
366 the arithmetic is supported. */
367static bool
368spu_scalar_mode_supported_p (enum machine_mode mode)
369{
370 switch (mode)
371 {
372 case QImode:
373 case HImode:
374 case SImode:
375 case SFmode:
376 case DImode:
377 case TImode:
378 case DFmode:
379 return true;
380
381 default:
382 return false;
383 }
384}
385
386/* Similarly for vector modes. "Supported" here is less strict. At
387 least some operations are supported; need to check optabs or builtins
388 for further details. */
389static bool
390spu_vector_mode_supported_p (enum machine_mode mode)
391{
392 switch (mode)
393 {
394 case V16QImode:
395 case V8HImode:
396 case V4SImode:
397 case V2DImode:
398 case V4SFmode:
399 case V2DFmode:
400 return true;
401
402 default:
403 return false;
404 }
405}
406
407/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
408 least significant bytes of the outer mode. This function returns
409 TRUE for the SUBREG's where this is correct. */
410int
411valid_subreg (rtx op)
412{
413 enum machine_mode om = GET_MODE (op);
414 enum machine_mode im = GET_MODE (SUBREG_REG (op));
415 return om != VOIDmode && im != VOIDmode
416 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
417 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
418}
419
420/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 421 and adjust the start offset. */
644459d0 422static rtx
423adjust_operand (rtx op, HOST_WIDE_INT * start)
424{
425 enum machine_mode mode;
426 int op_size;
427 /* Strip any SUBREG */
428 if (GET_CODE (op) == SUBREG)
429 {
430 if (start)
431 *start -=
432 GET_MODE_BITSIZE (GET_MODE (op)) -
433 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
434 op = SUBREG_REG (op);
435 }
436 /* If it is smaller than SI, assure a SUBREG */
437 op_size = GET_MODE_BITSIZE (GET_MODE (op));
438 if (op_size < 32)
439 {
440 if (start)
441 *start += 32 - op_size;
442 op_size = 32;
443 }
444 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
445 mode = mode_for_size (op_size, MODE_INT, 0);
446 if (mode != GET_MODE (op))
447 op = gen_rtx_SUBREG (mode, op, 0);
448 return op;
449}
450
451void
452spu_expand_extv (rtx ops[], int unsignedp)
453{
454 HOST_WIDE_INT width = INTVAL (ops[2]);
455 HOST_WIDE_INT start = INTVAL (ops[3]);
456 HOST_WIDE_INT src_size, dst_size;
457 enum machine_mode src_mode, dst_mode;
458 rtx dst = ops[0], src = ops[1];
459 rtx s;
460
461 dst = adjust_operand (ops[0], 0);
462 dst_mode = GET_MODE (dst);
463 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
464
644459d0 465 src = adjust_operand (src, &start);
466 src_mode = GET_MODE (src);
467 src_size = GET_MODE_BITSIZE (GET_MODE (src));
468
469 if (start > 0)
470 {
471 s = gen_reg_rtx (src_mode);
472 switch (src_mode)
473 {
474 case SImode:
475 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
476 break;
477 case DImode:
478 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
479 break;
480 case TImode:
481 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
482 break;
483 default:
484 abort ();
485 }
486 src = s;
487 }
488
489 if (width < src_size)
490 {
491 rtx pat;
492 int icode;
493 switch (src_mode)
494 {
495 case SImode:
496 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
497 break;
498 case DImode:
499 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
500 break;
501 case TImode:
502 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
503 break;
504 default:
505 abort ();
506 }
507 s = gen_reg_rtx (src_mode);
508 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
509 emit_insn (pat);
510 src = s;
511 }
512
513 convert_move (dst, src, unsignedp);
514}
515
516void
517spu_expand_insv (rtx ops[])
518{
519 HOST_WIDE_INT width = INTVAL (ops[1]);
520 HOST_WIDE_INT start = INTVAL (ops[2]);
521 HOST_WIDE_INT maskbits;
522 enum machine_mode dst_mode, src_mode;
523 rtx dst = ops[0], src = ops[3];
524 int dst_size, src_size;
525 rtx mask;
526 rtx shift_reg;
527 int shift;
528
529
530 if (GET_CODE (ops[0]) == MEM)
531 dst = gen_reg_rtx (TImode);
532 else
533 dst = adjust_operand (dst, &start);
534 dst_mode = GET_MODE (dst);
535 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
536
537 if (CONSTANT_P (src))
538 {
539 enum machine_mode m =
540 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
541 src = force_reg (m, convert_to_mode (m, src, 0));
542 }
543 src = adjust_operand (src, 0);
544 src_mode = GET_MODE (src);
545 src_size = GET_MODE_BITSIZE (GET_MODE (src));
546
547 mask = gen_reg_rtx (dst_mode);
548 shift_reg = gen_reg_rtx (dst_mode);
549 shift = dst_size - start - width;
550
551 /* It's not safe to use subreg here because the compiler assumes
552 that the SUBREG_REG is right justified in the SUBREG. */
553 convert_move (shift_reg, src, 1);
554
555 if (shift > 0)
556 {
557 switch (dst_mode)
558 {
559 case SImode:
560 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
561 break;
562 case DImode:
563 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
564 break;
565 case TImode:
566 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
567 break;
568 default:
569 abort ();
570 }
571 }
572 else if (shift < 0)
573 abort ();
574
575 switch (dst_size)
576 {
577 case 32:
578 maskbits = (-1ll << (32 - width - start));
579 if (start)
580 maskbits += (1ll << (32 - start));
581 emit_move_insn (mask, GEN_INT (maskbits));
582 break;
583 case 64:
584 maskbits = (-1ll << (64 - width - start));
585 if (start)
586 maskbits += (1ll << (64 - start));
587 emit_move_insn (mask, GEN_INT (maskbits));
588 break;
589 case 128:
590 {
591 unsigned char arr[16];
592 int i = start / 8;
593 memset (arr, 0, sizeof (arr));
594 arr[i] = 0xff >> (start & 7);
595 for (i++; i <= (start + width - 1) / 8; i++)
596 arr[i] = 0xff;
597 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
598 emit_move_insn (mask, array_to_constant (TImode, arr));
599 }
600 break;
601 default:
602 abort ();
603 }
604 if (GET_CODE (ops[0]) == MEM)
605 {
606 rtx aligned = gen_reg_rtx (SImode);
607 rtx low = gen_reg_rtx (SImode);
608 rtx addr = gen_reg_rtx (SImode);
609 rtx rotl = gen_reg_rtx (SImode);
610 rtx mask0 = gen_reg_rtx (TImode);
611 rtx mem;
612
613 emit_move_insn (addr, XEXP (ops[0], 0));
614 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
615 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
616 emit_insn (gen_negsi2 (rotl, low));
617 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
618 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
619 mem = change_address (ops[0], TImode, aligned);
620 set_mem_alias_set (mem, 0);
621 emit_move_insn (dst, mem);
622 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
623 emit_move_insn (mem, dst);
624 if (start + width > MEM_ALIGN (ops[0]))
625 {
626 rtx shl = gen_reg_rtx (SImode);
627 rtx mask1 = gen_reg_rtx (TImode);
628 rtx dst1 = gen_reg_rtx (TImode);
629 rtx mem1;
630 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
631 emit_insn (gen_shlqby_ti (mask1, mask, shl));
632 mem1 = adjust_address (mem, TImode, 16);
633 set_mem_alias_set (mem1, 0);
634 emit_move_insn (dst1, mem1);
635 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
636 emit_move_insn (mem1, dst1);
637 }
638 }
639 else
71cd778d 640 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 641}
642
643
644int
645spu_expand_block_move (rtx ops[])
646{
647 HOST_WIDE_INT bytes, align, offset;
648 rtx src, dst, sreg, dreg, target;
649 int i;
650 if (GET_CODE (ops[2]) != CONST_INT
651 || GET_CODE (ops[3]) != CONST_INT
652 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
653 return 0;
654
655 bytes = INTVAL (ops[2]);
656 align = INTVAL (ops[3]);
657
658 if (bytes <= 0)
659 return 1;
660
661 dst = ops[0];
662 src = ops[1];
663
664 if (align == 16)
665 {
666 for (offset = 0; offset + 16 <= bytes; offset += 16)
667 {
668 dst = adjust_address (ops[0], V16QImode, offset);
669 src = adjust_address (ops[1], V16QImode, offset);
670 emit_move_insn (dst, src);
671 }
672 if (offset < bytes)
673 {
674 rtx mask;
675 unsigned char arr[16] = { 0 };
676 for (i = 0; i < bytes - offset; i++)
677 arr[i] = 0xff;
678 dst = adjust_address (ops[0], V16QImode, offset);
679 src = adjust_address (ops[1], V16QImode, offset);
680 mask = gen_reg_rtx (V16QImode);
681 sreg = gen_reg_rtx (V16QImode);
682 dreg = gen_reg_rtx (V16QImode);
683 target = gen_reg_rtx (V16QImode);
684 emit_move_insn (mask, array_to_constant (V16QImode, arr));
685 emit_move_insn (dreg, dst);
686 emit_move_insn (sreg, src);
687 emit_insn (gen_selb (target, dreg, sreg, mask));
688 emit_move_insn (dst, target);
689 }
690 return 1;
691 }
692 return 0;
693}
694
695enum spu_comp_code
696{ SPU_EQ, SPU_GT, SPU_GTU };
697
5474166e 698int spu_comp_icode[12][3] = {
699 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
700 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
701 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
702 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
703 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
704 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
705 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
706 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
707 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
708 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
709 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
710 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 711};
712
713/* Generate a compare for CODE. Return a brand-new rtx that represents
714 the result of the compare. GCC can figure this out too if we don't
715 provide all variations of compares, but GCC always wants to use
716 WORD_MODE, we can generate better code in most cases if we do it
717 ourselves. */
718void
719spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
720{
721 int reverse_compare = 0;
722 int reverse_test = 0;
5d70b918 723 rtx compare_result, eq_result;
724 rtx comp_rtx, eq_rtx;
644459d0 725 rtx target = operands[0];
726 enum machine_mode comp_mode;
727 enum machine_mode op_mode;
5d70b918 728 enum spu_comp_code scode, eq_code, ior_code;
644459d0 729 int index;
5d70b918 730 int eq_test = 0;
644459d0 731
732 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
733 and so on, to keep the constant in operand 1. */
734 if (GET_CODE (spu_compare_op1) == CONST_INT)
735 {
736 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
737 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
738 switch (code)
739 {
740 case GE:
741 spu_compare_op1 = GEN_INT (val);
742 code = GT;
743 break;
744 case LT:
745 spu_compare_op1 = GEN_INT (val);
746 code = LE;
747 break;
748 case GEU:
749 spu_compare_op1 = GEN_INT (val);
750 code = GTU;
751 break;
752 case LTU:
753 spu_compare_op1 = GEN_INT (val);
754 code = LEU;
755 break;
756 default:
757 break;
758 }
759 }
760
5d70b918 761 comp_mode = SImode;
762 op_mode = GET_MODE (spu_compare_op0);
763
644459d0 764 switch (code)
765 {
766 case GE:
644459d0 767 scode = SPU_GT;
07027691 768 if (HONOR_NANS (op_mode))
5d70b918 769 {
770 reverse_compare = 0;
771 reverse_test = 0;
772 eq_test = 1;
773 eq_code = SPU_EQ;
774 }
775 else
776 {
777 reverse_compare = 1;
778 reverse_test = 1;
779 }
644459d0 780 break;
781 case LE:
644459d0 782 scode = SPU_GT;
07027691 783 if (HONOR_NANS (op_mode))
5d70b918 784 {
785 reverse_compare = 1;
786 reverse_test = 0;
787 eq_test = 1;
788 eq_code = SPU_EQ;
789 }
790 else
791 {
792 reverse_compare = 0;
793 reverse_test = 1;
794 }
644459d0 795 break;
796 case LT:
797 reverse_compare = 1;
798 reverse_test = 0;
799 scode = SPU_GT;
800 break;
801 case GEU:
802 reverse_compare = 1;
803 reverse_test = 1;
804 scode = SPU_GTU;
805 break;
806 case LEU:
807 reverse_compare = 0;
808 reverse_test = 1;
809 scode = SPU_GTU;
810 break;
811 case LTU:
812 reverse_compare = 1;
813 reverse_test = 0;
814 scode = SPU_GTU;
815 break;
816 case NE:
817 reverse_compare = 0;
818 reverse_test = 1;
819 scode = SPU_EQ;
820 break;
821
822 case EQ:
823 scode = SPU_EQ;
824 break;
825 case GT:
826 scode = SPU_GT;
827 break;
828 case GTU:
829 scode = SPU_GTU;
830 break;
831 default:
832 scode = SPU_EQ;
833 break;
834 }
835
644459d0 836 switch (op_mode)
837 {
838 case QImode:
839 index = 0;
840 comp_mode = QImode;
841 break;
842 case HImode:
843 index = 1;
844 comp_mode = HImode;
845 break;
846 case SImode:
847 index = 2;
848 break;
849 case DImode:
850 index = 3;
851 break;
852 case TImode:
853 index = 4;
854 break;
855 case SFmode:
856 index = 5;
857 break;
858 case DFmode:
859 index = 6;
860 break;
861 case V16QImode:
5474166e 862 index = 7;
863 comp_mode = op_mode;
864 break;
644459d0 865 case V8HImode:
5474166e 866 index = 8;
867 comp_mode = op_mode;
868 break;
644459d0 869 case V4SImode:
5474166e 870 index = 9;
871 comp_mode = op_mode;
872 break;
644459d0 873 case V4SFmode:
5474166e 874 index = 10;
875 comp_mode = V4SImode;
876 break;
644459d0 877 case V2DFmode:
5474166e 878 index = 11;
879 comp_mode = V2DImode;
644459d0 880 break;
5474166e 881 case V2DImode:
644459d0 882 default:
883 abort ();
884 }
885
07027691 886 if (GET_MODE (spu_compare_op1) == DFmode
887 && (scode != SPU_GT && scode != SPU_EQ))
888 abort ();
644459d0 889
890 if (is_set == 0 && spu_compare_op1 == const0_rtx
891 && (GET_MODE (spu_compare_op0) == SImode
892 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
893 {
894 /* Don't need to set a register with the result when we are
895 comparing against zero and branching. */
896 reverse_test = !reverse_test;
897 compare_result = spu_compare_op0;
898 }
899 else
900 {
901 compare_result = gen_reg_rtx (comp_mode);
902
903 if (reverse_compare)
904 {
905 rtx t = spu_compare_op1;
906 spu_compare_op1 = spu_compare_op0;
907 spu_compare_op0 = t;
908 }
909
910 if (spu_comp_icode[index][scode] == 0)
911 abort ();
912
913 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
914 (spu_compare_op0, op_mode))
915 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
916 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
917 (spu_compare_op1, op_mode))
918 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
919 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
920 spu_compare_op0,
921 spu_compare_op1);
922 if (comp_rtx == 0)
923 abort ();
924 emit_insn (comp_rtx);
925
5d70b918 926 if (eq_test)
927 {
928 eq_result = gen_reg_rtx (comp_mode);
929 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
930 spu_compare_op0,
931 spu_compare_op1);
932 if (eq_rtx == 0)
933 abort ();
934 emit_insn (eq_rtx);
935 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
936 gcc_assert (ior_code != CODE_FOR_nothing);
937 emit_insn (GEN_FCN (ior_code)
938 (compare_result, compare_result, eq_result));
939 }
644459d0 940 }
941
942 if (is_set == 0)
943 {
944 rtx bcomp;
945 rtx loc_ref;
946
947 /* We don't have branch on QI compare insns, so we convert the
948 QI compare result to a HI result. */
949 if (comp_mode == QImode)
950 {
951 rtx old_res = compare_result;
952 compare_result = gen_reg_rtx (HImode);
953 comp_mode = HImode;
954 emit_insn (gen_extendqihi2 (compare_result, old_res));
955 }
956
957 if (reverse_test)
958 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
959 else
960 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
961
962 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
963 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
964 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
965 loc_ref, pc_rtx)));
966 }
967 else if (is_set == 2)
968 {
969 int compare_size = GET_MODE_BITSIZE (comp_mode);
970 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
971 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
972 rtx select_mask;
973 rtx op_t = operands[2];
974 rtx op_f = operands[3];
975
976 /* The result of the comparison can be SI, HI or QI mode. Create a
977 mask based on that result. */
978 if (target_size > compare_size)
979 {
980 select_mask = gen_reg_rtx (mode);
981 emit_insn (gen_extend_compare (select_mask, compare_result));
982 }
983 else if (target_size < compare_size)
984 select_mask =
985 gen_rtx_SUBREG (mode, compare_result,
986 (compare_size - target_size) / BITS_PER_UNIT);
987 else if (comp_mode != mode)
988 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
989 else
990 select_mask = compare_result;
991
992 if (GET_MODE (target) != GET_MODE (op_t)
993 || GET_MODE (target) != GET_MODE (op_f))
994 abort ();
995
996 if (reverse_test)
997 emit_insn (gen_selb (target, op_t, op_f, select_mask));
998 else
999 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1000 }
1001 else
1002 {
1003 if (reverse_test)
1004 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1005 gen_rtx_NOT (comp_mode, compare_result)));
1006 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1007 emit_insn (gen_extendhisi2 (target, compare_result));
1008 else if (GET_MODE (target) == SImode
1009 && GET_MODE (compare_result) == QImode)
1010 emit_insn (gen_extend_compare (target, compare_result));
1011 else
1012 emit_move_insn (target, compare_result);
1013 }
1014}
1015
1016HOST_WIDE_INT
1017const_double_to_hwint (rtx x)
1018{
1019 HOST_WIDE_INT val;
1020 REAL_VALUE_TYPE rv;
1021 if (GET_MODE (x) == SFmode)
1022 {
1023 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1024 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1025 }
1026 else if (GET_MODE (x) == DFmode)
1027 {
1028 long l[2];
1029 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1030 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1031 val = l[0];
1032 val = (val << 32) | (l[1] & 0xffffffff);
1033 }
1034 else
1035 abort ();
1036 return val;
1037}
1038
1039rtx
1040hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1041{
1042 long tv[2];
1043 REAL_VALUE_TYPE rv;
1044 gcc_assert (mode == SFmode || mode == DFmode);
1045
1046 if (mode == SFmode)
1047 tv[0] = (v << 32) >> 32;
1048 else if (mode == DFmode)
1049 {
1050 tv[1] = (v << 32) >> 32;
1051 tv[0] = v >> 32;
1052 }
1053 real_from_target (&rv, tv, mode);
1054 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1055}
1056
1057void
1058print_operand_address (FILE * file, register rtx addr)
1059{
1060 rtx reg;
1061 rtx offset;
1062
e04cf423 1063 if (GET_CODE (addr) == AND
1064 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1065 && INTVAL (XEXP (addr, 1)) == -16)
1066 addr = XEXP (addr, 0);
1067
644459d0 1068 switch (GET_CODE (addr))
1069 {
1070 case REG:
1071 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1072 break;
1073
1074 case PLUS:
1075 reg = XEXP (addr, 0);
1076 offset = XEXP (addr, 1);
1077 if (GET_CODE (offset) == REG)
1078 {
1079 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1080 reg_names[REGNO (offset)]);
1081 }
1082 else if (GET_CODE (offset) == CONST_INT)
1083 {
1084 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1085 INTVAL (offset), reg_names[REGNO (reg)]);
1086 }
1087 else
1088 abort ();
1089 break;
1090
1091 case CONST:
1092 case LABEL_REF:
1093 case SYMBOL_REF:
1094 case CONST_INT:
1095 output_addr_const (file, addr);
1096 break;
1097
1098 default:
1099 debug_rtx (addr);
1100 abort ();
1101 }
1102}
1103
1104void
1105print_operand (FILE * file, rtx x, int code)
1106{
1107 enum machine_mode mode = GET_MODE (x);
1108 HOST_WIDE_INT val;
1109 unsigned char arr[16];
1110 int xcode = GET_CODE (x);
dea01258 1111 int i, info;
644459d0 1112 if (GET_MODE (x) == VOIDmode)
1113 switch (code)
1114 {
644459d0 1115 case 'L': /* 128 bits, signed */
1116 case 'm': /* 128 bits, signed */
1117 case 'T': /* 128 bits, signed */
1118 case 't': /* 128 bits, signed */
1119 mode = TImode;
1120 break;
644459d0 1121 case 'K': /* 64 bits, signed */
1122 case 'k': /* 64 bits, signed */
1123 case 'D': /* 64 bits, signed */
1124 case 'd': /* 64 bits, signed */
1125 mode = DImode;
1126 break;
644459d0 1127 case 'J': /* 32 bits, signed */
1128 case 'j': /* 32 bits, signed */
1129 case 's': /* 32 bits, signed */
1130 case 'S': /* 32 bits, signed */
1131 mode = SImode;
1132 break;
1133 }
1134 switch (code)
1135 {
1136
1137 case 'j': /* 32 bits, signed */
1138 case 'k': /* 64 bits, signed */
1139 case 'm': /* 128 bits, signed */
1140 if (xcode == CONST_INT
1141 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1142 {
1143 gcc_assert (logical_immediate_p (x, mode));
1144 constant_to_array (mode, x, arr);
1145 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1146 val = trunc_int_for_mode (val, SImode);
1147 switch (which_logical_immediate (val))
1148 {
1149 case SPU_ORI:
1150 break;
1151 case SPU_ORHI:
1152 fprintf (file, "h");
1153 break;
1154 case SPU_ORBI:
1155 fprintf (file, "b");
1156 break;
1157 default:
1158 gcc_unreachable();
1159 }
1160 }
1161 else
1162 gcc_unreachable();
1163 return;
1164
1165 case 'J': /* 32 bits, signed */
1166 case 'K': /* 64 bits, signed */
1167 case 'L': /* 128 bits, signed */
1168 if (xcode == CONST_INT
1169 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1170 {
1171 gcc_assert (logical_immediate_p (x, mode)
1172 || iohl_immediate_p (x, mode));
1173 constant_to_array (mode, x, arr);
1174 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1175 val = trunc_int_for_mode (val, SImode);
1176 switch (which_logical_immediate (val))
1177 {
1178 case SPU_ORI:
1179 case SPU_IOHL:
1180 break;
1181 case SPU_ORHI:
1182 val = trunc_int_for_mode (val, HImode);
1183 break;
1184 case SPU_ORBI:
1185 val = trunc_int_for_mode (val, QImode);
1186 break;
1187 default:
1188 gcc_unreachable();
1189 }
1190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1191 }
1192 else
1193 gcc_unreachable();
1194 return;
1195
1196 case 't': /* 128 bits, signed */
1197 case 'd': /* 64 bits, signed */
1198 case 's': /* 32 bits, signed */
dea01258 1199 if (CONSTANT_P (x))
644459d0 1200 {
dea01258 1201 enum immediate_class c = classify_immediate (x, mode);
1202 switch (c)
1203 {
1204 case IC_IL1:
1205 constant_to_array (mode, x, arr);
1206 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1207 val = trunc_int_for_mode (val, SImode);
1208 switch (which_immediate_load (val))
1209 {
1210 case SPU_IL:
1211 break;
1212 case SPU_ILA:
1213 fprintf (file, "a");
1214 break;
1215 case SPU_ILH:
1216 fprintf (file, "h");
1217 break;
1218 case SPU_ILHU:
1219 fprintf (file, "hu");
1220 break;
1221 default:
1222 gcc_unreachable ();
1223 }
1224 break;
1225 case IC_CPAT:
1226 constant_to_array (mode, x, arr);
1227 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1228 if (info == 1)
1229 fprintf (file, "b");
1230 else if (info == 2)
1231 fprintf (file, "h");
1232 else if (info == 4)
1233 fprintf (file, "w");
1234 else if (info == 8)
1235 fprintf (file, "d");
1236 break;
1237 case IC_IL1s:
1238 if (xcode == CONST_VECTOR)
1239 {
1240 x = CONST_VECTOR_ELT (x, 0);
1241 xcode = GET_CODE (x);
1242 }
1243 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1244 fprintf (file, "a");
1245 else if (xcode == HIGH)
1246 fprintf (file, "hu");
1247 break;
1248 case IC_FSMBI:
5df189be 1249 case IC_FSMBI2:
dea01258 1250 case IC_IL2:
1251 case IC_IL2s:
1252 case IC_POOL:
1253 abort ();
1254 }
644459d0 1255 }
644459d0 1256 else
1257 gcc_unreachable ();
1258 return;
1259
1260 case 'T': /* 128 bits, signed */
1261 case 'D': /* 64 bits, signed */
1262 case 'S': /* 32 bits, signed */
dea01258 1263 if (CONSTANT_P (x))
644459d0 1264 {
dea01258 1265 enum immediate_class c = classify_immediate (x, mode);
1266 switch (c)
644459d0 1267 {
dea01258 1268 case IC_IL1:
1269 constant_to_array (mode, x, arr);
1270 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1271 val = trunc_int_for_mode (val, SImode);
1272 switch (which_immediate_load (val))
1273 {
1274 case SPU_IL:
1275 case SPU_ILA:
1276 break;
1277 case SPU_ILH:
1278 case SPU_ILHU:
1279 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1280 break;
1281 default:
1282 gcc_unreachable ();
1283 }
1284 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1285 break;
1286 case IC_FSMBI:
1287 constant_to_array (mode, x, arr);
1288 val = 0;
1289 for (i = 0; i < 16; i++)
1290 {
1291 val <<= 1;
1292 val |= arr[i] & 1;
1293 }
1294 print_operand (file, GEN_INT (val), 0);
1295 break;
1296 case IC_CPAT:
1297 constant_to_array (mode, x, arr);
1298 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1300 break;
dea01258 1301 case IC_IL1s:
dea01258 1302 if (xcode == HIGH)
5df189be 1303 x = XEXP (x, 0);
1304 if (GET_CODE (x) == CONST_VECTOR)
1305 x = CONST_VECTOR_ELT (x, 0);
1306 output_addr_const (file, x);
1307 if (xcode == HIGH)
1308 fprintf (file, "@h");
644459d0 1309 break;
dea01258 1310 case IC_IL2:
1311 case IC_IL2s:
5df189be 1312 case IC_FSMBI2:
dea01258 1313 case IC_POOL:
1314 abort ();
644459d0 1315 }
c8befdb9 1316 }
644459d0 1317 else
1318 gcc_unreachable ();
1319 return;
1320
644459d0 1321 case 'C':
1322 if (xcode == CONST_INT)
1323 {
1324 /* Only 4 least significant bits are relevant for generate
1325 control word instructions. */
1326 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1327 return;
1328 }
1329 break;
1330
1331 case 'M': /* print code for c*d */
1332 if (GET_CODE (x) == CONST_INT)
1333 switch (INTVAL (x))
1334 {
1335 case 1:
1336 fprintf (file, "b");
1337 break;
1338 case 2:
1339 fprintf (file, "h");
1340 break;
1341 case 4:
1342 fprintf (file, "w");
1343 break;
1344 case 8:
1345 fprintf (file, "d");
1346 break;
1347 default:
1348 gcc_unreachable();
1349 }
1350 else
1351 gcc_unreachable();
1352 return;
1353
1354 case 'N': /* Negate the operand */
1355 if (xcode == CONST_INT)
1356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1357 else if (xcode == CONST_VECTOR)
1358 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1359 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1360 return;
1361
1362 case 'I': /* enable/disable interrupts */
1363 if (xcode == CONST_INT)
1364 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1365 return;
1366
1367 case 'b': /* branch modifiers */
1368 if (xcode == REG)
1369 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1370 else if (COMPARISON_P (x))
1371 fprintf (file, "%s", xcode == NE ? "n" : "");
1372 return;
1373
1374 case 'i': /* indirect call */
1375 if (xcode == MEM)
1376 {
1377 if (GET_CODE (XEXP (x, 0)) == REG)
1378 /* Used in indirect function calls. */
1379 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1380 else
1381 output_address (XEXP (x, 0));
1382 }
1383 return;
1384
1385 case 'p': /* load/store */
1386 if (xcode == MEM)
1387 {
1388 x = XEXP (x, 0);
1389 xcode = GET_CODE (x);
1390 }
e04cf423 1391 if (xcode == AND)
1392 {
1393 x = XEXP (x, 0);
1394 xcode = GET_CODE (x);
1395 }
644459d0 1396 if (xcode == REG)
1397 fprintf (file, "d");
1398 else if (xcode == CONST_INT)
1399 fprintf (file, "a");
1400 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1401 fprintf (file, "r");
1402 else if (xcode == PLUS || xcode == LO_SUM)
1403 {
1404 if (GET_CODE (XEXP (x, 1)) == REG)
1405 fprintf (file, "x");
1406 else
1407 fprintf (file, "d");
1408 }
1409 return;
1410
5df189be 1411 case 'e':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val &= 0x7;
1414 output_addr_const (file, GEN_INT (val));
1415 return;
1416
1417 case 'f':
1418 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1419 val &= 0x1f;
1420 output_addr_const (file, GEN_INT (val));
1421 return;
1422
1423 case 'g':
1424 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1425 val &= 0x3f;
1426 output_addr_const (file, GEN_INT (val));
1427 return;
1428
1429 case 'h':
1430 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1431 val = (val >> 3) & 0x1f;
1432 output_addr_const (file, GEN_INT (val));
1433 return;
1434
1435 case 'E':
1436 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1437 val = -val;
1438 val &= 0x7;
1439 output_addr_const (file, GEN_INT (val));
1440 return;
1441
1442 case 'F':
1443 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1444 val = -val;
1445 val &= 0x1f;
1446 output_addr_const (file, GEN_INT (val));
1447 return;
1448
1449 case 'G':
1450 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1451 val = -val;
1452 val &= 0x3f;
1453 output_addr_const (file, GEN_INT (val));
1454 return;
1455
1456 case 'H':
1457 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1458 val = -(val & -8ll);
1459 val = (val >> 3) & 0x1f;
1460 output_addr_const (file, GEN_INT (val));
1461 return;
1462
644459d0 1463 case 0:
1464 if (xcode == REG)
1465 fprintf (file, "%s", reg_names[REGNO (x)]);
1466 else if (xcode == MEM)
1467 output_address (XEXP (x, 0));
1468 else if (xcode == CONST_VECTOR)
dea01258 1469 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1470 else
1471 output_addr_const (file, x);
1472 return;
1473
f6a0d06f 1474 /* unused letters
5df189be 1475 o qr uvw yz
1476 AB OPQR UVWXYZ */
644459d0 1477 default:
1478 output_operand_lossage ("invalid %%xn code");
1479 }
1480 gcc_unreachable ();
1481}
1482
1483extern char call_used_regs[];
644459d0 1484
1485/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1486 caller saved register. For leaf functions it is more efficient to
1487 use a volatile register because we won't need to save and restore the
1488 pic register. This routine is only valid after register allocation
1489 is completed, so we can pick an unused register. */
1490static rtx
1491get_pic_reg (void)
1492{
1493 rtx pic_reg = pic_offset_table_rtx;
1494 if (!reload_completed && !reload_in_progress)
1495 abort ();
1496 return pic_reg;
1497}
1498
5df189be 1499/* Split constant addresses to handle cases that are too large.
1500 Add in the pic register when in PIC mode.
1501 Split immediates that require more than 1 instruction. */
dea01258 1502int
1503spu_split_immediate (rtx * ops)
c8befdb9 1504{
dea01258 1505 enum machine_mode mode = GET_MODE (ops[0]);
1506 enum immediate_class c = classify_immediate (ops[1], mode);
1507
1508 switch (c)
c8befdb9 1509 {
dea01258 1510 case IC_IL2:
1511 {
1512 unsigned char arrhi[16];
1513 unsigned char arrlo[16];
1514 rtx to, hi, lo;
1515 int i;
1516 constant_to_array (mode, ops[1], arrhi);
e1ba4a27 1517 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
dea01258 1518 for (i = 0; i < 16; i += 4)
1519 {
1520 arrlo[i + 2] = arrhi[i + 2];
1521 arrlo[i + 3] = arrhi[i + 3];
1522 arrlo[i + 0] = arrlo[i + 1] = 0;
1523 arrhi[i + 2] = arrhi[i + 3] = 0;
1524 }
1525 hi = array_to_constant (mode, arrhi);
1526 lo = array_to_constant (mode, arrlo);
1527 emit_move_insn (to, hi);
1528 emit_insn (gen_rtx_SET
1529 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1530 return 1;
1531 }
5df189be 1532 case IC_FSMBI2:
1533 {
1534 unsigned char arr_fsmbi[16];
1535 unsigned char arr_andbi[16];
1536 rtx to, reg_fsmbi, reg_and;
1537 int i;
1538 enum machine_mode imode = mode;
1539 /* We need to do reals as ints because the constant used in the
1540 * AND might not be a legitimate real constant. */
1541 imode = int_mode_for_mode (mode);
1542 constant_to_array (mode, ops[1], arr_fsmbi);
1543 if (imode != mode)
1544 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1545 else
1546 to = ops[0];
1547 for (i = 0; i < 16; i++)
1548 if (arr_fsmbi[i] != 0)
1549 {
1550 arr_andbi[0] = arr_fsmbi[i];
1551 arr_fsmbi[i] = 0xff;
1552 }
1553 for (i = 1; i < 16; i++)
1554 arr_andbi[i] = arr_andbi[0];
1555 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1556 reg_and = array_to_constant (imode, arr_andbi);
1557 emit_move_insn (to, reg_fsmbi);
1558 emit_insn (gen_rtx_SET
1559 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1560 return 1;
1561 }
dea01258 1562 case IC_POOL:
1563 if (reload_in_progress || reload_completed)
1564 {
1565 rtx mem = force_const_mem (mode, ops[1]);
1566 if (TARGET_LARGE_MEM)
1567 {
1568 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1569 emit_move_insn (addr, XEXP (mem, 0));
1570 mem = replace_equiv_address (mem, addr);
1571 }
1572 emit_move_insn (ops[0], mem);
1573 return 1;
1574 }
1575 break;
1576 case IC_IL1s:
1577 case IC_IL2s:
1578 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1579 {
1580 if (c == IC_IL2s)
1581 {
5df189be 1582 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1583 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1584 }
1585 else if (flag_pic)
1586 emit_insn (gen_pic (ops[0], ops[1]));
1587 if (flag_pic)
1588 {
1589 rtx pic_reg = get_pic_reg ();
1590 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1591 current_function_uses_pic_offset_table = 1;
1592 }
1593 return flag_pic || c == IC_IL2s;
1594 }
1595 break;
1596 case IC_IL1:
1597 case IC_FSMBI:
1598 case IC_CPAT:
1599 break;
c8befdb9 1600 }
dea01258 1601 return 0;
c8befdb9 1602}
1603
644459d0 1604/* SAVING is TRUE when we are generating the actual load and store
1605 instructions for REGNO. When determining the size of the stack
1606 needed for saving register we must allocate enough space for the
1607 worst case, because we don't always have the information early enough
1608 to not allocate it. But we can at least eliminate the actual loads
1609 and stores during the prologue/epilogue. */
1610static int
1611need_to_save_reg (int regno, int saving)
1612{
3072d30e 1613 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1614 return 1;
1615 if (flag_pic
1616 && regno == PIC_OFFSET_TABLE_REGNUM
1617 && (!saving || current_function_uses_pic_offset_table)
1618 && (!saving
3072d30e 1619 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1620 return 1;
1621 return 0;
1622}
1623
1624/* This function is only correct starting with local register
1625 allocation */
1626int
1627spu_saved_regs_size (void)
1628{
1629 int reg_save_size = 0;
1630 int regno;
1631
1632 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1633 if (need_to_save_reg (regno, 0))
1634 reg_save_size += 0x10;
1635 return reg_save_size;
1636}
1637
1638static rtx
1639frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1640{
1641 rtx reg = gen_rtx_REG (V4SImode, regno);
1642 rtx mem =
1643 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1644 return emit_insn (gen_movv4si (mem, reg));
1645}
1646
1647static rtx
1648frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1649{
1650 rtx reg = gen_rtx_REG (V4SImode, regno);
1651 rtx mem =
1652 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1653 return emit_insn (gen_movv4si (reg, mem));
1654}
1655
1656/* This happens after reload, so we need to expand it. */
1657static rtx
1658frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1659{
1660 rtx insn;
1661 if (satisfies_constraint_K (GEN_INT (imm)))
1662 {
1663 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1664 }
1665 else
1666 {
3072d30e 1667 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1668 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1669 if (REGNO (src) == REGNO (scratch))
1670 abort ();
1671 }
644459d0 1672 return insn;
1673}
1674
1675/* Return nonzero if this function is known to have a null epilogue. */
1676
1677int
1678direct_return (void)
1679{
1680 if (reload_completed)
1681 {
1682 if (cfun->static_chain_decl == 0
1683 && (spu_saved_regs_size ()
1684 + get_frame_size ()
1685 + current_function_outgoing_args_size
1686 + current_function_pretend_args_size == 0)
1687 && current_function_is_leaf)
1688 return 1;
1689 }
1690 return 0;
1691}
1692
1693/*
1694 The stack frame looks like this:
1695 +-------------+
1696 | incoming |
1697 AP | args |
1698 +-------------+
1699 | $lr save |
1700 +-------------+
1701 prev SP | back chain |
1702 +-------------+
1703 | var args |
1704 | reg save | current_function_pretend_args_size bytes
1705 +-------------+
1706 | ... |
1707 | saved regs | spu_saved_regs_size() bytes
1708 +-------------+
1709 | ... |
1710 FP | vars | get_frame_size() bytes
1711 +-------------+
1712 | ... |
1713 | outgoing |
1714 | args | current_function_outgoing_args_size bytes
1715 +-------------+
1716 | $lr of next |
1717 | frame |
1718 +-------------+
1719 SP | back chain |
1720 +-------------+
1721
1722*/
1723void
1724spu_expand_prologue (void)
1725{
1726 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1727 HOST_WIDE_INT total_size;
1728 HOST_WIDE_INT saved_regs_size;
1729 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1730 rtx scratch_reg_0, scratch_reg_1;
1731 rtx insn, real;
1732
1733 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1734 the "toplevel" insn chain. */
1735 emit_note (NOTE_INSN_DELETED);
1736
1737 if (flag_pic && optimize == 0)
1738 current_function_uses_pic_offset_table = 1;
1739
1740 if (spu_naked_function_p (current_function_decl))
1741 return;
1742
1743 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1744 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1745
1746 saved_regs_size = spu_saved_regs_size ();
1747 total_size = size + saved_regs_size
1748 + current_function_outgoing_args_size
1749 + current_function_pretend_args_size;
1750
1751 if (!current_function_is_leaf
1752 || current_function_calls_alloca || total_size > 0)
1753 total_size += STACK_POINTER_OFFSET;
1754
1755 /* Save this first because code after this might use the link
1756 register as a scratch register. */
1757 if (!current_function_is_leaf)
1758 {
1759 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1760 RTX_FRAME_RELATED_P (insn) = 1;
1761 }
1762
1763 if (total_size > 0)
1764 {
1765 offset = -current_function_pretend_args_size;
1766 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1767 if (need_to_save_reg (regno, 1))
1768 {
1769 offset -= 16;
1770 insn = frame_emit_store (regno, sp_reg, offset);
1771 RTX_FRAME_RELATED_P (insn) = 1;
1772 }
1773 }
1774
1775 if (flag_pic && current_function_uses_pic_offset_table)
1776 {
1777 rtx pic_reg = get_pic_reg ();
1778 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1779 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1780 }
1781
1782 if (total_size > 0)
1783 {
1784 if (flag_stack_check)
1785 {
d819917f 1786 /* We compare against total_size-1 because
644459d0 1787 ($sp >= total_size) <=> ($sp > total_size-1) */
1788 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1789 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1790 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1791 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1792 {
1793 emit_move_insn (scratch_v4si, size_v4si);
1794 size_v4si = scratch_v4si;
1795 }
1796 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1797 emit_insn (gen_vec_extractv4si
1798 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1799 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1800 }
1801
1802 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1803 the value of the previous $sp because we save it as the back
1804 chain. */
1805 if (total_size <= 2000)
1806 {
1807 /* In this case we save the back chain first. */
1808 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1809 insn =
1810 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1811 }
1812 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1813 {
1814 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1815 insn =
1816 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1817 }
1818 else
1819 {
1820 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1821 insn =
1822 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1823 }
1824 RTX_FRAME_RELATED_P (insn) = 1;
1825 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1826 REG_NOTES (insn) =
1827 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1828
1829 if (total_size > 2000)
1830 {
1831 /* Save the back chain ptr */
1832 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1833 }
1834
1835 if (frame_pointer_needed)
1836 {
1837 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1838 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1839 + current_function_outgoing_args_size;
1840 /* Set the new frame_pointer */
d8dfeb55 1841 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1842 RTX_FRAME_RELATED_P (insn) = 1;
1843 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1844 REG_NOTES (insn) =
1845 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1846 real, REG_NOTES (insn));
5df189be 1847 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1848 }
1849 }
1850
1851 emit_note (NOTE_INSN_DELETED);
1852}
1853
1854void
1855spu_expand_epilogue (bool sibcall_p)
1856{
1857 int size = get_frame_size (), offset, regno;
1858 HOST_WIDE_INT saved_regs_size, total_size;
1859 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1860 rtx jump, scratch_reg_0;
1861
1862 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1863 the "toplevel" insn chain. */
1864 emit_note (NOTE_INSN_DELETED);
1865
1866 if (spu_naked_function_p (current_function_decl))
1867 return;
1868
1869 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1870
1871 saved_regs_size = spu_saved_regs_size ();
1872 total_size = size + saved_regs_size
1873 + current_function_outgoing_args_size
1874 + current_function_pretend_args_size;
1875
1876 if (!current_function_is_leaf
1877 || current_function_calls_alloca || total_size > 0)
1878 total_size += STACK_POINTER_OFFSET;
1879
1880 if (total_size > 0)
1881 {
1882 if (current_function_calls_alloca)
644459d0 1883 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1884 else
1885 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1886
1887
1888 if (saved_regs_size > 0)
1889 {
1890 offset = -current_function_pretend_args_size;
1891 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1892 if (need_to_save_reg (regno, 1))
1893 {
1894 offset -= 0x10;
1895 frame_emit_load (regno, sp_reg, offset);
1896 }
1897 }
1898 }
1899
1900 if (!current_function_is_leaf)
1901 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1902
1903 if (!sibcall_p)
1904 {
1905 emit_insn (gen_rtx_USE
1906 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1907 jump = emit_jump_insn (gen__return ());
1908 emit_barrier_after (jump);
1909 }
1910
1911 emit_note (NOTE_INSN_DELETED);
1912}
1913
1914rtx
1915spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1916{
1917 if (count != 0)
1918 return 0;
1919 /* This is inefficient because it ends up copying to a save-register
1920 which then gets saved even though $lr has already been saved. But
1921 it does generate better code for leaf functions and we don't need
1922 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1923 used for __builtin_return_address anyway, so maybe we don't care if
1924 it's inefficient. */
1925 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1926}
1927\f
1928
1929/* Given VAL, generate a constant appropriate for MODE.
1930 If MODE is a vector mode, every element will be VAL.
1931 For TImode, VAL will be zero extended to 128 bits. */
1932rtx
1933spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1934{
1935 rtx inner;
1936 rtvec v;
1937 int units, i;
1938
1939 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1940 || GET_MODE_CLASS (mode) == MODE_FLOAT
1941 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1942 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1943
1944 if (GET_MODE_CLASS (mode) == MODE_INT)
1945 return immed_double_const (val, 0, mode);
1946
1947 /* val is the bit representation of the float */
1948 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1949 return hwint_to_const_double (mode, val);
1950
1951 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1952 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1953 else
1954 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1955
1956 units = GET_MODE_NUNITS (mode);
1957
1958 v = rtvec_alloc (units);
1959
1960 for (i = 0; i < units; ++i)
1961 RTVEC_ELT (v, i) = inner;
1962
1963 return gen_rtx_CONST_VECTOR (mode, v);
1964}
1965\f
1966/* branch hint stuff */
1967
1968/* The hardware requires 8 insns between a hint and the branch it
1969 effects. This variable describes how many rtl instructions the
1970 compiler needs to see before inserting a hint. (FIXME: We should
1971 accept less and insert nops to enforce it because hinting is always
1972 profitable for performance, but we do need to be careful of code
1973 size.) */
1974int spu_hint_dist = (8 * 4);
1975
5474166e 1976/* Create a MODE vector constant from 4 ints. */
1977rtx
1978spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1979{
1980 unsigned char arr[16];
1981 arr[0] = (a >> 24) & 0xff;
1982 arr[1] = (a >> 16) & 0xff;
1983 arr[2] = (a >> 8) & 0xff;
1984 arr[3] = (a >> 0) & 0xff;
1985 arr[4] = (b >> 24) & 0xff;
1986 arr[5] = (b >> 16) & 0xff;
1987 arr[6] = (b >> 8) & 0xff;
1988 arr[7] = (b >> 0) & 0xff;
1989 arr[8] = (c >> 24) & 0xff;
1990 arr[9] = (c >> 16) & 0xff;
1991 arr[10] = (c >> 8) & 0xff;
1992 arr[11] = (c >> 0) & 0xff;
1993 arr[12] = (d >> 24) & 0xff;
1994 arr[13] = (d >> 16) & 0xff;
1995 arr[14] = (d >> 8) & 0xff;
1996 arr[15] = (d >> 0) & 0xff;
1997 return array_to_constant(mode, arr);
1998}
1999
644459d0 2000/* An array of these is used to propagate hints to predecessor blocks. */
2001struct spu_bb_info
2002{
fa7637bd 2003 rtx prop_jump; /* propagated from another block */
2004 basic_block bb; /* the original block. */
644459d0 2005};
2006
2007/* The special $hbr register is used to prevent the insn scheduler from
2008 moving hbr insns across instructions which invalidate them. It
2009 should only be used in a clobber, and this function searches for
2010 insns which clobber it. */
2011static bool
2012insn_clobbers_hbr (rtx insn)
2013{
2014 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2015 {
2016 rtx parallel = PATTERN (insn);
2017 rtx clobber;
2018 int j;
2019 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2020 {
2021 clobber = XVECEXP (parallel, 0, j);
2022 if (GET_CODE (clobber) == CLOBBER
2023 && GET_CODE (XEXP (clobber, 0)) == REG
2024 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2025 return 1;
2026 }
2027 }
2028 return 0;
2029}
2030
2031static void
2032spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2033{
2034 rtx branch_label;
2035 rtx hint, insn, prev, next;
2036
2037 if (before == 0 || branch == 0 || target == 0)
2038 return;
2039
2040 if (distance > 600)
2041 return;
2042
2043
2044 branch_label = gen_label_rtx ();
2045 LABEL_NUSES (branch_label)++;
2046 LABEL_PRESERVE_P (branch_label) = 1;
2047 insn = emit_label_before (branch_label, branch);
2048 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2049
2050 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2051 the current insn is pipe0, dual issue with it. */
2052 prev = prev_active_insn (before);
2053 if (prev && get_pipe (prev) == 0)
2054 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2055 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2056 {
2057 next = next_active_insn (before);
2058 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2059 if (next)
2060 PUT_MODE (next, TImode);
2061 }
2062 else
2063 {
2064 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2065 PUT_MODE (hint, TImode);
2066 }
2067 recog_memoized (hint);
2068}
2069
2070/* Returns 0 if we don't want a hint for this branch. Otherwise return
2071 the rtx for the branch target. */
2072static rtx
2073get_branch_target (rtx branch)
2074{
2075 if (GET_CODE (branch) == JUMP_INSN)
2076 {
2077 rtx set, src;
2078
2079 /* Return statements */
2080 if (GET_CODE (PATTERN (branch)) == RETURN)
2081 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2082
2083 /* jump table */
2084 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2085 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2086 return 0;
2087
2088 set = single_set (branch);
2089 src = SET_SRC (set);
2090 if (GET_CODE (SET_DEST (set)) != PC)
2091 abort ();
2092
2093 if (GET_CODE (src) == IF_THEN_ELSE)
2094 {
2095 rtx lab = 0;
2096 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2097 if (note)
2098 {
2099 /* If the more probable case is not a fall through, then
2100 try a branch hint. */
2101 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2102 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2103 && GET_CODE (XEXP (src, 1)) != PC)
2104 lab = XEXP (src, 1);
2105 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2106 && GET_CODE (XEXP (src, 2)) != PC)
2107 lab = XEXP (src, 2);
2108 }
2109 if (lab)
2110 {
2111 if (GET_CODE (lab) == RETURN)
2112 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2113 return lab;
2114 }
2115 return 0;
2116 }
2117
2118 return src;
2119 }
2120 else if (GET_CODE (branch) == CALL_INSN)
2121 {
2122 rtx call;
2123 /* All of our call patterns are in a PARALLEL and the CALL is
2124 the first pattern in the PARALLEL. */
2125 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2126 abort ();
2127 call = XVECEXP (PATTERN (branch), 0, 0);
2128 if (GET_CODE (call) == SET)
2129 call = SET_SRC (call);
2130 if (GET_CODE (call) != CALL)
2131 abort ();
2132 return XEXP (XEXP (call, 0), 0);
2133 }
2134 return 0;
2135}
2136
2137static void
2138insert_branch_hints (void)
2139{
2140 struct spu_bb_info *spu_bb_info;
2141 rtx branch, insn, next;
2142 rtx branch_target = 0;
2143 int branch_addr = 0, insn_addr, head_addr;
2144 basic_block bb;
2145 unsigned int j;
2146
2147 spu_bb_info =
2148 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2149 sizeof (struct spu_bb_info));
2150
2151 /* We need exact insn addresses and lengths. */
2152 shorten_branches (get_insns ());
2153
2154 FOR_EACH_BB_REVERSE (bb)
2155 {
2156 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2157 branch = 0;
2158 if (spu_bb_info[bb->index].prop_jump)
2159 {
2160 branch = spu_bb_info[bb->index].prop_jump;
2161 branch_target = get_branch_target (branch);
2162 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2163 }
2164 /* Search from end of a block to beginning. In this loop, find
2165 jumps which need a branch and emit them only when:
2166 - it's an indirect branch and we're at the insn which sets
2167 the register
2168 - we're at an insn that will invalidate the hint. e.g., a
2169 call, another hint insn, inline asm that clobbers $hbr, and
2170 some inlined operations (divmodsi4). Don't consider jumps
2171 because they are only at the end of a block and are
2172 considered when we are deciding whether to propagate
2173 - we're getting too far away from the branch. The hbr insns
5b865faf 2174 only have a signed 10-bit offset
644459d0 2175 We go back as far as possible so the branch will be considered
2176 for propagation when we get to the beginning of the block. */
2177 next = 0;
2178 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2179 {
2180 if (INSN_P (insn))
2181 {
2182 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2183 if (branch && next
2184 && ((GET_CODE (branch_target) == REG
2185 && set_of (branch_target, insn) != NULL_RTX)
2186 || insn_clobbers_hbr (insn)
2187 || branch_addr - insn_addr > 600))
2188 {
2189 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2190 if (insn != BB_END (bb)
2191 && branch_addr - next_addr >= spu_hint_dist)
2192 {
2193 if (dump_file)
2194 fprintf (dump_file,
2195 "hint for %i in block %i before %i\n",
2196 INSN_UID (branch), bb->index, INSN_UID (next));
2197 spu_emit_branch_hint (next, branch, branch_target,
2198 branch_addr - next_addr);
2199 }
2200 branch = 0;
2201 }
2202
2203 /* JUMP_P will only be true at the end of a block. When
2204 branch is already set it means we've previously decided
2205 to propagate a hint for that branch into this block. */
2206 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2207 {
2208 branch = 0;
2209 if ((branch_target = get_branch_target (insn)))
2210 {
2211 branch = insn;
2212 branch_addr = insn_addr;
2213 }
2214 }
2215
2216 /* When a branch hint is emitted it will be inserted
2217 before "next". Make sure next is the beginning of a
2218 cycle to minimize impact on the scheduled insns. */
2219 if (GET_MODE (insn) == TImode)
2220 next = insn;
2221 }
2222 if (insn == BB_HEAD (bb))
2223 break;
2224 }
2225
2226 if (branch)
2227 {
2228 /* If we haven't emitted a hint for this branch yet, it might
2229 be profitable to emit it in one of the predecessor blocks,
2230 especially for loops. */
2231 rtx bbend;
2232 basic_block prev = 0, prop = 0, prev2 = 0;
2233 int loop_exit = 0, simple_loop = 0;
2234 int next_addr = 0;
2235 if (next)
2236 next_addr = INSN_ADDRESSES (INSN_UID (next));
2237
2238 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2239 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2240 prev = EDGE_PRED (bb, j)->src;
2241 else
2242 prev2 = EDGE_PRED (bb, j)->src;
2243
2244 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2245 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2246 loop_exit = 1;
2247 else if (EDGE_SUCC (bb, j)->dest == bb)
2248 simple_loop = 1;
2249
2250 /* If this branch is a loop exit then propagate to previous
2251 fallthru block. This catches the cases when it is a simple
2252 loop or when there is an initial branch into the loop. */
2253 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2254 prop = prev;
2255
2256 /* If there is only one adjacent predecessor. Don't propagate
2257 outside this loop. This loop_depth test isn't perfect, but
2258 I'm not sure the loop_father member is valid at this point. */
2259 else if (prev && single_pred_p (bb)
2260 && prev->loop_depth == bb->loop_depth)
2261 prop = prev;
2262
2263 /* If this is the JOIN block of a simple IF-THEN then
80777cd8 2264 propagate the hint to the HEADER block. */
644459d0 2265 else if (prev && prev2
2266 && EDGE_COUNT (bb->preds) == 2
2267 && EDGE_COUNT (prev->preds) == 1
2268 && EDGE_PRED (prev, 0)->src == prev2
2269 && prev2->loop_depth == bb->loop_depth
2270 && GET_CODE (branch_target) != REG)
2271 prop = prev;
2272
2273 /* Don't propagate when:
2274 - this is a simple loop and the hint would be too far
2275 - this is not a simple loop and there are 16 insns in
2276 this block already
2277 - the predecessor block ends in a branch that will be
2278 hinted
2279 - the predecessor block ends in an insn that invalidates
2280 the hint */
2281 if (prop
2282 && prop->index >= 0
2283 && (bbend = BB_END (prop))
2284 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2285 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2286 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2287 {
2288 if (dump_file)
2289 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2290 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2291 bb->index, prop->index, bb->loop_depth,
2292 INSN_UID (branch), loop_exit, simple_loop,
2293 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2294
2295 spu_bb_info[prop->index].prop_jump = branch;
2296 spu_bb_info[prop->index].bb = bb;
2297 }
2298 else if (next && branch_addr - next_addr >= spu_hint_dist)
2299 {
2300 if (dump_file)
2301 fprintf (dump_file, "hint for %i in block %i before %i\n",
2302 INSN_UID (branch), bb->index, INSN_UID (next));
2303 spu_emit_branch_hint (next, branch, branch_target,
2304 branch_addr - next_addr);
2305 }
2306 branch = 0;
2307 }
2308 }
2309 free (spu_bb_info);
2310}
2311\f
2312/* Emit a nop for INSN such that the two will dual issue. This assumes
2313 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2314 We check for TImode to handle a MULTI1 insn which has dual issued its
2315 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2316 ADDR_VEC insns. */
2317static void
2318emit_nop_for_insn (rtx insn)
2319{
2320 int p;
2321 rtx new_insn;
2322 p = get_pipe (insn);
2323 if (p == 1 && GET_MODE (insn) == TImode)
2324 {
2325 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2326 PUT_MODE (new_insn, TImode);
2327 PUT_MODE (insn, VOIDmode);
2328 }
2329 else
2330 new_insn = emit_insn_after (gen_lnop (), insn);
2331}
2332
2333/* Insert nops in basic blocks to meet dual issue alignment
2334 requirements. */
2335static void
2336insert_nops (void)
2337{
2338 rtx insn, next_insn, prev_insn;
2339 int length;
2340 int addr;
2341
2342 /* This sets up INSN_ADDRESSES. */
2343 shorten_branches (get_insns ());
2344
2345 /* Keep track of length added by nops. */
2346 length = 0;
2347
2348 prev_insn = 0;
2349 for (insn = get_insns (); insn; insn = next_insn)
2350 {
2351 next_insn = next_active_insn (insn);
2352 addr = INSN_ADDRESSES (INSN_UID (insn));
2353 if (GET_MODE (insn) == TImode
2354 && next_insn
2355 && GET_MODE (next_insn) != TImode
2356 && ((addr + length) & 7) != 0)
2357 {
2358 /* prev_insn will always be set because the first insn is
2359 always 8-byte aligned. */
2360 emit_nop_for_insn (prev_insn);
2361 length += 4;
2362 }
2363 prev_insn = insn;
2364 }
2365}
2366
2367static void
2368spu_machine_dependent_reorg (void)
2369{
2370 if (optimize > 0)
2371 {
2372 if (TARGET_BRANCH_HINTS)
2373 insert_branch_hints ();
2374 insert_nops ();
2375 }
2376}
2377\f
2378
2379/* Insn scheduling routines, primarily for dual issue. */
2380static int
2381spu_sched_issue_rate (void)
2382{
2383 return 2;
2384}
2385
2386static int
2387spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2388 int verbose ATTRIBUTE_UNUSED, rtx insn,
2389 int can_issue_more)
2390{
2391 if (GET_CODE (PATTERN (insn)) != USE
2392 && GET_CODE (PATTERN (insn)) != CLOBBER
2393 && get_pipe (insn) != -2)
2394 can_issue_more--;
2395 return can_issue_more;
2396}
2397
2398static int
2399get_pipe (rtx insn)
2400{
2401 enum attr_type t;
2402 /* Handle inline asm */
2403 if (INSN_CODE (insn) == -1)
2404 return -1;
2405 t = get_attr_type (insn);
2406 switch (t)
2407 {
2408 case TYPE_CONVERT:
2409 return -2;
2410 case TYPE_MULTI0:
2411 return -1;
2412
2413 case TYPE_FX2:
2414 case TYPE_FX3:
2415 case TYPE_SPR:
2416 case TYPE_NOP:
2417 case TYPE_FXB:
2418 case TYPE_FPD:
2419 case TYPE_FP6:
2420 case TYPE_FP7:
2421 case TYPE_IPREFETCH:
2422 return 0;
2423
2424 case TYPE_LNOP:
2425 case TYPE_SHUF:
2426 case TYPE_LOAD:
2427 case TYPE_STORE:
2428 case TYPE_BR:
2429 case TYPE_MULTI1:
2430 case TYPE_HBR:
2431 return 1;
2432 default:
2433 abort ();
2434 }
2435}
2436
2437static int
2438spu_sched_adjust_priority (rtx insn, int pri)
2439{
2440 int p = get_pipe (insn);
2441 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2442 * scheduling. */
2443 if (GET_CODE (PATTERN (insn)) == USE
2444 || GET_CODE (PATTERN (insn)) == CLOBBER
2445 || p == -2)
2446 return pri + 100;
2447 /* Schedule pipe0 insns early for greedier dual issue. */
2448 if (p != 1)
2449 return pri + 50;
2450 return pri;
2451}
2452
2453/* INSN is dependent on DEP_INSN. */
2454static int
2455spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2456 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2457{
2458 if (GET_CODE (insn) == CALL_INSN)
2459 return cost - 2;
2460 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2461 scheduler makes every insn in a block anti-dependent on the final
2462 jump_insn. We adjust here so higher cost insns will get scheduled
2463 earlier. */
2464 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 2465 return insn_cost (dep_insn) - 3;
644459d0 2466 return cost;
2467}
2468\f
2469/* Create a CONST_DOUBLE from a string. */
2470struct rtx_def *
2471spu_float_const (const char *string, enum machine_mode mode)
2472{
2473 REAL_VALUE_TYPE value;
2474 value = REAL_VALUE_ATOF (string, mode);
2475 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2476}
2477
644459d0 2478int
2479spu_constant_address_p (rtx x)
2480{
2481 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2482 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2483 || GET_CODE (x) == HIGH);
2484}
2485
2486static enum spu_immediate
2487which_immediate_load (HOST_WIDE_INT val)
2488{
2489 gcc_assert (val == trunc_int_for_mode (val, SImode));
2490
2491 if (val >= -0x8000 && val <= 0x7fff)
2492 return SPU_IL;
2493 if (val >= 0 && val <= 0x3ffff)
2494 return SPU_ILA;
2495 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2496 return SPU_ILH;
2497 if ((val & 0xffff) == 0)
2498 return SPU_ILHU;
2499
2500 return SPU_NONE;
2501}
2502
dea01258 2503/* Return true when OP can be loaded by one of the il instructions, or
2504 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 2505int
2506immediate_load_p (rtx op, enum machine_mode mode)
dea01258 2507{
2508 if (CONSTANT_P (op))
2509 {
2510 enum immediate_class c = classify_immediate (op, mode);
5df189be 2511 return c == IC_IL1 || c == IC_IL1s
3072d30e 2512 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 2513 }
2514 return 0;
2515}
2516
2517/* Return true if the first SIZE bytes of arr is a constant that can be
2518 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2519 represent the size and offset of the instruction to use. */
2520static int
2521cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2522{
2523 int cpat, run, i, start;
2524 cpat = 1;
2525 run = 0;
2526 start = -1;
2527 for (i = 0; i < size && cpat; i++)
2528 if (arr[i] != i+16)
2529 {
2530 if (!run)
2531 {
2532 start = i;
2533 if (arr[i] == 3)
2534 run = 1;
2535 else if (arr[i] == 2 && arr[i+1] == 3)
2536 run = 2;
2537 else if (arr[i] == 0)
2538 {
2539 while (arr[i+run] == run && i+run < 16)
2540 run++;
2541 if (run != 4 && run != 8)
2542 cpat = 0;
2543 }
2544 else
2545 cpat = 0;
2546 if ((i & (run-1)) != 0)
2547 cpat = 0;
2548 i += run;
2549 }
2550 else
2551 cpat = 0;
2552 }
b01a6dc3 2553 if (cpat && (run || size < 16))
dea01258 2554 {
2555 if (run == 0)
2556 run = 1;
2557 if (prun)
2558 *prun = run;
2559 if (pstart)
2560 *pstart = start == -1 ? 16-run : start;
2561 return 1;
2562 }
2563 return 0;
2564}
2565
2566/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 2567 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 2568static enum immediate_class
2569classify_immediate (rtx op, enum machine_mode mode)
644459d0 2570{
2571 HOST_WIDE_INT val;
2572 unsigned char arr[16];
5df189be 2573 int i, j, repeated, fsmbi, repeat;
dea01258 2574
2575 gcc_assert (CONSTANT_P (op));
2576
644459d0 2577 if (GET_MODE (op) != VOIDmode)
2578 mode = GET_MODE (op);
2579
dea01258 2580 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 2581 if (!flag_pic
2582 && mode == V4SImode
dea01258 2583 && GET_CODE (op) == CONST_VECTOR
2584 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2585 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2586 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2587 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2588 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2589 op = CONST_VECTOR_ELT (op, 0);
644459d0 2590
dea01258 2591 switch (GET_CODE (op))
2592 {
2593 case SYMBOL_REF:
2594 case LABEL_REF:
2595 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 2596
dea01258 2597 case CONST:
0cfc65d4 2598 /* We can never know if the resulting address fits in 18 bits and can be
2599 loaded with ila. For now, assume the address will not overflow if
2600 the displacement is "small" (fits 'K' constraint). */
2601 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
2602 {
2603 rtx sym = XEXP (XEXP (op, 0), 0);
2604 rtx cst = XEXP (XEXP (op, 0), 1);
2605
2606 if (GET_CODE (sym) == SYMBOL_REF
2607 && GET_CODE (cst) == CONST_INT
2608 && satisfies_constraint_K (cst))
2609 return IC_IL1s;
2610 }
2611 return IC_IL2s;
644459d0 2612
dea01258 2613 case HIGH:
2614 return IC_IL1s;
2615
2616 case CONST_VECTOR:
2617 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2618 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2619 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2620 return IC_POOL;
2621 /* Fall through. */
2622
2623 case CONST_INT:
2624 case CONST_DOUBLE:
2625 constant_to_array (mode, op, arr);
644459d0 2626
dea01258 2627 /* Check that each 4-byte slot is identical. */
2628 repeated = 1;
2629 for (i = 4; i < 16; i += 4)
2630 for (j = 0; j < 4; j++)
2631 if (arr[j] != arr[i + j])
2632 repeated = 0;
2633
2634 if (repeated)
2635 {
2636 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2637 val = trunc_int_for_mode (val, SImode);
2638
2639 if (which_immediate_load (val) != SPU_NONE)
2640 return IC_IL1;
2641 }
2642
2643 /* Any mode of 2 bytes or smaller can be loaded with an il
2644 instruction. */
2645 gcc_assert (GET_MODE_SIZE (mode) > 2);
2646
2647 fsmbi = 1;
5df189be 2648 repeat = 0;
dea01258 2649 for (i = 0; i < 16 && fsmbi; i++)
5df189be 2650 if (arr[i] != 0 && repeat == 0)
2651 repeat = arr[i];
2652 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 2653 fsmbi = 0;
2654 if (fsmbi)
5df189be 2655 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 2656
2657 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2658 return IC_CPAT;
2659
2660 if (repeated)
2661 return IC_IL2;
2662
2663 return IC_POOL;
2664 default:
2665 break;
2666 }
2667 gcc_unreachable ();
644459d0 2668}
2669
2670static enum spu_immediate
2671which_logical_immediate (HOST_WIDE_INT val)
2672{
2673 gcc_assert (val == trunc_int_for_mode (val, SImode));
2674
2675 if (val >= -0x200 && val <= 0x1ff)
2676 return SPU_ORI;
2677 if (val >= 0 && val <= 0xffff)
2678 return SPU_IOHL;
2679 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2680 {
2681 val = trunc_int_for_mode (val, HImode);
2682 if (val >= -0x200 && val <= 0x1ff)
2683 return SPU_ORHI;
2684 if ((val & 0xff) == ((val >> 8) & 0xff))
2685 {
2686 val = trunc_int_for_mode (val, QImode);
2687 if (val >= -0x200 && val <= 0x1ff)
2688 return SPU_ORBI;
2689 }
2690 }
2691 return SPU_NONE;
2692}
2693
5df189be 2694/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2695 CONST_DOUBLEs. */
2696static int
2697const_vector_immediate_p (rtx x)
2698{
2699 int i;
2700 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2701 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2702 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2703 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2704 return 0;
2705 return 1;
2706}
2707
644459d0 2708int
2709logical_immediate_p (rtx op, enum machine_mode mode)
2710{
2711 HOST_WIDE_INT val;
2712 unsigned char arr[16];
2713 int i, j;
2714
2715 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2716 || GET_CODE (op) == CONST_VECTOR);
2717
5df189be 2718 if (GET_CODE (op) == CONST_VECTOR
2719 && !const_vector_immediate_p (op))
2720 return 0;
2721
644459d0 2722 if (GET_MODE (op) != VOIDmode)
2723 mode = GET_MODE (op);
2724
2725 constant_to_array (mode, op, arr);
2726
2727 /* Check that bytes are repeated. */
2728 for (i = 4; i < 16; i += 4)
2729 for (j = 0; j < 4; j++)
2730 if (arr[j] != arr[i + j])
2731 return 0;
2732
2733 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2734 val = trunc_int_for_mode (val, SImode);
2735
2736 i = which_logical_immediate (val);
2737 return i != SPU_NONE && i != SPU_IOHL;
2738}
2739
2740int
2741iohl_immediate_p (rtx op, enum machine_mode mode)
2742{
2743 HOST_WIDE_INT val;
2744 unsigned char arr[16];
2745 int i, j;
2746
2747 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2748 || GET_CODE (op) == CONST_VECTOR);
2749
5df189be 2750 if (GET_CODE (op) == CONST_VECTOR
2751 && !const_vector_immediate_p (op))
2752 return 0;
2753
644459d0 2754 if (GET_MODE (op) != VOIDmode)
2755 mode = GET_MODE (op);
2756
2757 constant_to_array (mode, op, arr);
2758
2759 /* Check that bytes are repeated. */
2760 for (i = 4; i < 16; i += 4)
2761 for (j = 0; j < 4; j++)
2762 if (arr[j] != arr[i + j])
2763 return 0;
2764
2765 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2766 val = trunc_int_for_mode (val, SImode);
2767
2768 return val >= 0 && val <= 0xffff;
2769}
2770
2771int
2772arith_immediate_p (rtx op, enum machine_mode mode,
2773 HOST_WIDE_INT low, HOST_WIDE_INT high)
2774{
2775 HOST_WIDE_INT val;
2776 unsigned char arr[16];
2777 int bytes, i, j;
2778
2779 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2780 || GET_CODE (op) == CONST_VECTOR);
2781
5df189be 2782 if (GET_CODE (op) == CONST_VECTOR
2783 && !const_vector_immediate_p (op))
2784 return 0;
2785
644459d0 2786 if (GET_MODE (op) != VOIDmode)
2787 mode = GET_MODE (op);
2788
2789 constant_to_array (mode, op, arr);
2790
2791 if (VECTOR_MODE_P (mode))
2792 mode = GET_MODE_INNER (mode);
2793
2794 bytes = GET_MODE_SIZE (mode);
2795 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2796
2797 /* Check that bytes are repeated. */
2798 for (i = bytes; i < 16; i += bytes)
2799 for (j = 0; j < bytes; j++)
2800 if (arr[j] != arr[i + j])
2801 return 0;
2802
2803 val = arr[0];
2804 for (j = 1; j < bytes; j++)
2805 val = (val << 8) | arr[j];
2806
2807 val = trunc_int_for_mode (val, mode);
2808
2809 return val >= low && val <= high;
2810}
2811
2812/* We accept:
5b865faf 2813 - any 32-bit constant (SImode, SFmode)
644459d0 2814 - any constant that can be generated with fsmbi (any mode)
5b865faf 2815 - a 64-bit constant where the high and low bits are identical
644459d0 2816 (DImode, DFmode)
5b865faf 2817 - a 128-bit constant where the four 32-bit words match. */
644459d0 2818int
2819spu_legitimate_constant_p (rtx x)
2820{
5df189be 2821 if (GET_CODE (x) == HIGH)
2822 x = XEXP (x, 0);
644459d0 2823 /* V4SI with all identical symbols is valid. */
5df189be 2824 if (!flag_pic
2825 && GET_MODE (x) == V4SImode
644459d0 2826 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2827 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 2828 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 2829 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2830 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2831 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2832
5df189be 2833 if (GET_CODE (x) == CONST_VECTOR
2834 && !const_vector_immediate_p (x))
2835 return 0;
644459d0 2836 return 1;
2837}
2838
2839/* Valid address are:
2840 - symbol_ref, label_ref, const
2841 - reg
2842 - reg + const, where either reg or const is 16 byte aligned
2843 - reg + reg, alignment doesn't matter
2844 The alignment matters in the reg+const case because lqd and stqd
2845 ignore the 4 least significant bits of the const. (TODO: It might be
2846 preferable to allow any alignment and fix it up when splitting.) */
2847int
2848spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2849 rtx x, int reg_ok_strict)
2850{
2851 if (mode == TImode && GET_CODE (x) == AND
2852 && GET_CODE (XEXP (x, 1)) == CONST_INT
2853 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2854 x = XEXP (x, 0);
2855 switch (GET_CODE (x))
2856 {
2857 case SYMBOL_REF:
2858 case LABEL_REF:
2859 return !TARGET_LARGE_MEM;
2860
2861 case CONST:
0cfc65d4 2862 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
2863 {
2864 rtx sym = XEXP (XEXP (x, 0), 0);
2865 rtx cst = XEXP (XEXP (x, 0), 1);
2866
2867 /* Accept any symbol_ref + constant, assuming it does not
2868 wrap around the local store addressability limit. */
2869 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
2870 return 1;
2871 }
2872 return 0;
644459d0 2873
2874 case CONST_INT:
2875 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2876
2877 case SUBREG:
2878 x = XEXP (x, 0);
2879 gcc_assert (GET_CODE (x) == REG);
2880
2881 case REG:
2882 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2883
2884 case PLUS:
2885 case LO_SUM:
2886 {
2887 rtx op0 = XEXP (x, 0);
2888 rtx op1 = XEXP (x, 1);
2889 if (GET_CODE (op0) == SUBREG)
2890 op0 = XEXP (op0, 0);
2891 if (GET_CODE (op1) == SUBREG)
2892 op1 = XEXP (op1, 0);
2893 /* We can't just accept any aligned register because CSE can
2894 change it to a register that is not marked aligned and then
2895 recog will fail. So we only accept frame registers because
2896 they will only be changed to other frame registers. */
2897 if (GET_CODE (op0) == REG
2898 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2899 && GET_CODE (op1) == CONST_INT
2900 && INTVAL (op1) >= -0x2000
2901 && INTVAL (op1) <= 0x1fff
5df189be 2902 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 2903 return 1;
2904 if (GET_CODE (op0) == REG
2905 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2906 && GET_CODE (op1) == REG
2907 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2908 return 1;
2909 }
2910 break;
2911
2912 default:
2913 break;
2914 }
2915 return 0;
2916}
2917
2918/* When the address is reg + const_int, force the const_int into a
fa7637bd 2919 register. */
644459d0 2920rtx
2921spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2922 enum machine_mode mode)
2923{
2924 rtx op0, op1;
2925 /* Make sure both operands are registers. */
2926 if (GET_CODE (x) == PLUS)
2927 {
2928 op0 = XEXP (x, 0);
2929 op1 = XEXP (x, 1);
2930 if (ALIGNED_SYMBOL_REF_P (op0))
2931 {
2932 op0 = force_reg (Pmode, op0);
2933 mark_reg_pointer (op0, 128);
2934 }
2935 else if (GET_CODE (op0) != REG)
2936 op0 = force_reg (Pmode, op0);
2937 if (ALIGNED_SYMBOL_REF_P (op1))
2938 {
2939 op1 = force_reg (Pmode, op1);
2940 mark_reg_pointer (op1, 128);
2941 }
2942 else if (GET_CODE (op1) != REG)
2943 op1 = force_reg (Pmode, op1);
2944 x = gen_rtx_PLUS (Pmode, op0, op1);
2945 if (spu_legitimate_address (mode, x, 0))
2946 return x;
2947 }
2948 return NULL_RTX;
2949}
2950
2951/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2952 struct attribute_spec.handler. */
2953static tree
2954spu_handle_fndecl_attribute (tree * node,
2955 tree name,
2956 tree args ATTRIBUTE_UNUSED,
2957 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2958{
2959 if (TREE_CODE (*node) != FUNCTION_DECL)
2960 {
2961 warning (0, "`%s' attribute only applies to functions",
2962 IDENTIFIER_POINTER (name));
2963 *no_add_attrs = true;
2964 }
2965
2966 return NULL_TREE;
2967}
2968
2969/* Handle the "vector" attribute. */
2970static tree
2971spu_handle_vector_attribute (tree * node, tree name,
2972 tree args ATTRIBUTE_UNUSED,
2973 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2974{
2975 tree type = *node, result = NULL_TREE;
2976 enum machine_mode mode;
2977 int unsigned_p;
2978
2979 while (POINTER_TYPE_P (type)
2980 || TREE_CODE (type) == FUNCTION_TYPE
2981 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2982 type = TREE_TYPE (type);
2983
2984 mode = TYPE_MODE (type);
2985
2986 unsigned_p = TYPE_UNSIGNED (type);
2987 switch (mode)
2988 {
2989 case DImode:
2990 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2991 break;
2992 case SImode:
2993 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2994 break;
2995 case HImode:
2996 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2997 break;
2998 case QImode:
2999 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3000 break;
3001 case SFmode:
3002 result = V4SF_type_node;
3003 break;
3004 case DFmode:
3005 result = V2DF_type_node;
3006 break;
3007 default:
3008 break;
3009 }
3010
3011 /* Propagate qualifiers attached to the element type
3012 onto the vector type. */
3013 if (result && result != type && TYPE_QUALS (type))
3014 result = build_qualified_type (result, TYPE_QUALS (type));
3015
3016 *no_add_attrs = true; /* No need to hang on to the attribute. */
3017
3018 if (!result)
3019 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3020 else
3021 *node = reconstruct_complex_type (*node, result);
3022
3023 return NULL_TREE;
3024}
3025
f2b32076 3026/* Return nonzero if FUNC is a naked function. */
644459d0 3027static int
3028spu_naked_function_p (tree func)
3029{
3030 tree a;
3031
3032 if (TREE_CODE (func) != FUNCTION_DECL)
3033 abort ();
3034
3035 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3036 return a != NULL_TREE;
3037}
3038
3039int
3040spu_initial_elimination_offset (int from, int to)
3041{
3042 int saved_regs_size = spu_saved_regs_size ();
3043 int sp_offset = 0;
3044 if (!current_function_is_leaf || current_function_outgoing_args_size
3045 || get_frame_size () || saved_regs_size)
3046 sp_offset = STACK_POINTER_OFFSET;
3047 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3048 return (sp_offset + current_function_outgoing_args_size);
3049 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3050 return 0;
3051 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3052 return sp_offset + current_function_outgoing_args_size
3053 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3054 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3055 return get_frame_size () + saved_regs_size + sp_offset;
3056 return 0;
3057}
3058
3059rtx
fb80456a 3060spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3061{
3062 enum machine_mode mode = TYPE_MODE (type);
3063 int byte_size = ((mode == BLKmode)
3064 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3065
3066 /* Make sure small structs are left justified in a register. */
3067 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3068 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3069 {
3070 enum machine_mode smode;
3071 rtvec v;
3072 int i;
3073 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3074 int n = byte_size / UNITS_PER_WORD;
3075 v = rtvec_alloc (nregs);
3076 for (i = 0; i < n; i++)
3077 {
3078 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3079 gen_rtx_REG (TImode,
3080 FIRST_RETURN_REGNUM
3081 + i),
3082 GEN_INT (UNITS_PER_WORD * i));
3083 byte_size -= UNITS_PER_WORD;
3084 }
3085
3086 if (n < nregs)
3087 {
3088 if (byte_size < 4)
3089 byte_size = 4;
3090 smode =
3091 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3092 RTVEC_ELT (v, n) =
3093 gen_rtx_EXPR_LIST (VOIDmode,
3094 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3095 GEN_INT (UNITS_PER_WORD * n));
3096 }
3097 return gen_rtx_PARALLEL (mode, v);
3098 }
3099 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3100}
3101
3102rtx
3103spu_function_arg (CUMULATIVE_ARGS cum,
3104 enum machine_mode mode,
3105 tree type, int named ATTRIBUTE_UNUSED)
3106{
3107 int byte_size;
3108
3109 if (cum >= MAX_REGISTER_ARGS)
3110 return 0;
3111
3112 byte_size = ((mode == BLKmode)
3113 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3114
3115 /* The ABI does not allow parameters to be passed partially in
3116 reg and partially in stack. */
3117 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3118 return 0;
3119
3120 /* Make sure small structs are left justified in a register. */
3121 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3122 && byte_size < UNITS_PER_WORD && byte_size > 0)
3123 {
3124 enum machine_mode smode;
3125 rtx gr_reg;
3126 if (byte_size < 4)
3127 byte_size = 4;
3128 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3129 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3130 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3131 const0_rtx);
3132 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3133 }
3134 else
3135 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3136}
3137
3138/* Variable sized types are passed by reference. */
3139static bool
3140spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3141 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3142 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3143{
3144 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3145}
3146\f
3147
3148/* Var args. */
3149
3150/* Create and return the va_list datatype.
3151
3152 On SPU, va_list is an array type equivalent to
3153
3154 typedef struct __va_list_tag
3155 {
3156 void *__args __attribute__((__aligned(16)));
3157 void *__skip __attribute__((__aligned(16)));
3158
3159 } va_list[1];
3160
fa7637bd 3161 where __args points to the arg that will be returned by the next
644459d0 3162 va_arg(), and __skip points to the previous stack frame such that
3163 when __args == __skip we should advance __args by 32 bytes. */
3164static tree
3165spu_build_builtin_va_list (void)
3166{
3167 tree f_args, f_skip, record, type_decl;
3168 bool owp;
3169
3170 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3171
3172 type_decl =
3173 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3174
3175 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3176 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3177
3178 DECL_FIELD_CONTEXT (f_args) = record;
3179 DECL_ALIGN (f_args) = 128;
3180 DECL_USER_ALIGN (f_args) = 1;
3181
3182 DECL_FIELD_CONTEXT (f_skip) = record;
3183 DECL_ALIGN (f_skip) = 128;
3184 DECL_USER_ALIGN (f_skip) = 1;
3185
3186 TREE_CHAIN (record) = type_decl;
3187 TYPE_NAME (record) = type_decl;
3188 TYPE_FIELDS (record) = f_args;
3189 TREE_CHAIN (f_args) = f_skip;
3190
3191 /* We know this is being padded and we want it too. It is an internal
3192 type so hide the warnings from the user. */
3193 owp = warn_padded;
3194 warn_padded = false;
3195
3196 layout_type (record);
3197
3198 warn_padded = owp;
3199
3200 /* The correct type is an array type of one element. */
3201 return build_array_type (record, build_index_type (size_zero_node));
3202}
3203
3204/* Implement va_start by filling the va_list structure VALIST.
3205 NEXTARG points to the first anonymous stack argument.
3206
3207 The following global variables are used to initialize
3208 the va_list structure:
3209
3210 current_function_args_info;
3211 the CUMULATIVE_ARGS for this function
3212
3213 current_function_arg_offset_rtx:
3214 holds the offset of the first anonymous stack argument
3215 (relative to the virtual arg pointer). */
3216
3217void
3218spu_va_start (tree valist, rtx nextarg)
3219{
3220 tree f_args, f_skip;
3221 tree args, skip, t;
3222
3223 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3224 f_skip = TREE_CHAIN (f_args);
3225
3226 valist = build_va_arg_indirect_ref (valist);
3227 args =
3228 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3229 skip =
3230 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3231
3232 /* Find the __args area. */
3233 t = make_tree (TREE_TYPE (args), nextarg);
3234 if (current_function_pretend_args_size > 0)
0de36bdb 3235 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3236 size_int (-STACK_POINTER_OFFSET));
35cc02b5 3237 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
644459d0 3238 TREE_SIDE_EFFECTS (t) = 1;
3239 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3240
3241 /* Find the __skip area. */
3242 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3243 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3244 size_int (current_function_pretend_args_size
3245 - STACK_POINTER_OFFSET));
35cc02b5 3246 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
644459d0 3247 TREE_SIDE_EFFECTS (t) = 1;
3248 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3249}
3250
3251/* Gimplify va_arg by updating the va_list structure
3252 VALIST as required to retrieve an argument of type
3253 TYPE, and returning that argument.
3254
3255 ret = va_arg(VALIST, TYPE);
3256
3257 generates code equivalent to:
3258
3259 paddedsize = (sizeof(TYPE) + 15) & -16;
3260 if (VALIST.__args + paddedsize > VALIST.__skip
3261 && VALIST.__args <= VALIST.__skip)
3262 addr = VALIST.__skip + 32;
3263 else
3264 addr = VALIST.__args;
3265 VALIST.__args = addr + paddedsize;
3266 ret = *(TYPE *)addr;
3267 */
3268static tree
3269spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3270 tree * post_p ATTRIBUTE_UNUSED)
3271{
3272 tree f_args, f_skip;
3273 tree args, skip;
3274 HOST_WIDE_INT size, rsize;
3275 tree paddedsize, addr, tmp;
3276 bool pass_by_reference_p;
3277
3278 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3279 f_skip = TREE_CHAIN (f_args);
3280
3281 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3282 args =
3283 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3284 skip =
3285 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3286
3287 addr = create_tmp_var (ptr_type_node, "va_arg");
3288 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3289
3290 /* if an object is dynamically sized, a pointer to it is passed
3291 instead of the object itself. */
3292 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3293 false);
3294 if (pass_by_reference_p)
3295 type = build_pointer_type (type);
3296 size = int_size_in_bytes (type);
3297 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3298
3299 /* build conditional expression to calculate addr. The expression
3300 will be gimplified later. */
0de36bdb 3301 paddedsize = size_int (rsize);
3302 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
644459d0 3303 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3304 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3305 build2 (LE_EXPR, boolean_type_node, args, skip));
3306
3307 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
0de36bdb 3308 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
3309 size_int (32)), args);
644459d0 3310
35cc02b5 3311 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
644459d0 3312 gimplify_and_add (tmp, pre_p);
3313
3314 /* update VALIST.__args */
0de36bdb 3315 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
35cc02b5 3316 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
644459d0 3317 gimplify_and_add (tmp, pre_p);
3318
3319 addr = fold_convert (build_pointer_type (type), addr);
3320
3321 if (pass_by_reference_p)
3322 addr = build_va_arg_indirect_ref (addr);
3323
3324 return build_va_arg_indirect_ref (addr);
3325}
3326
3327/* Save parameter registers starting with the register that corresponds
3328 to the first unnamed parameters. If the first unnamed parameter is
3329 in the stack then save no registers. Set pretend_args_size to the
3330 amount of space needed to save the registers. */
3331void
3332spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3333 tree type, int *pretend_size, int no_rtl)
3334{
3335 if (!no_rtl)
3336 {
3337 rtx tmp;
3338 int regno;
3339 int offset;
3340 int ncum = *cum;
3341
3342 /* cum currently points to the last named argument, we want to
3343 start at the next argument. */
3344 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3345
3346 offset = -STACK_POINTER_OFFSET;
3347 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3348 {
3349 tmp = gen_frame_mem (V4SImode,
3350 plus_constant (virtual_incoming_args_rtx,
3351 offset));
3352 emit_move_insn (tmp,
3353 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3354 offset += 16;
3355 }
3356 *pretend_size = offset + STACK_POINTER_OFFSET;
3357 }
3358}
3359\f
3360void
3361spu_conditional_register_usage (void)
3362{
3363 if (flag_pic)
3364 {
3365 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3366 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3367 }
644459d0 3368}
3369
3370/* This is called to decide when we can simplify a load instruction. We
3371 must only return true for registers which we know will always be
3372 aligned. Taking into account that CSE might replace this reg with
3373 another one that has not been marked aligned.
3374 So this is really only true for frame, stack and virtual registers,
fa7637bd 3375 which we know are always aligned and should not be adversely effected
3376 by CSE. */
644459d0 3377static int
3378regno_aligned_for_load (int regno)
3379{
3380 return regno == FRAME_POINTER_REGNUM
5df189be 3381 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
644459d0 3382 || regno == STACK_POINTER_REGNUM
5df189be 3383 || (regno >= FIRST_VIRTUAL_REGISTER
3384 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 3385}
3386
3387/* Return TRUE when mem is known to be 16-byte aligned. */
3388int
3389aligned_mem_p (rtx mem)
3390{
3391 if (MEM_ALIGN (mem) >= 128)
3392 return 1;
3393 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3394 return 1;
3395 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3396 {
3397 rtx p0 = XEXP (XEXP (mem, 0), 0);
3398 rtx p1 = XEXP (XEXP (mem, 0), 1);
3399 if (regno_aligned_for_load (REGNO (p0)))
3400 {
3401 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3402 return 1;
3403 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3404 return 1;
3405 }
3406 }
3407 else if (GET_CODE (XEXP (mem, 0)) == REG)
3408 {
3409 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3410 return 1;
3411 }
3412 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3413 return 1;
3414 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3415 {
3416 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3417 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3418 if (GET_CODE (p0) == SYMBOL_REF
3419 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3420 return 1;
3421 }
3422 return 0;
3423}
3424
69ced2d6 3425/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3426 into its SYMBOL_REF_FLAGS. */
3427static void
3428spu_encode_section_info (tree decl, rtx rtl, int first)
3429{
3430 default_encode_section_info (decl, rtl, first);
3431
3432 /* If a variable has a forced alignment to < 16 bytes, mark it with
3433 SYMBOL_FLAG_ALIGN1. */
3434 if (TREE_CODE (decl) == VAR_DECL
3435 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3436 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3437}
3438
644459d0 3439/* Return TRUE if we are certain the mem refers to a complete object
3440 which is both 16-byte aligned and padded to a 16-byte boundary. This
3441 would make it safe to store with a single instruction.
3442 We guarantee the alignment and padding for static objects by aligning
3443 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3444 FIXME: We currently cannot guarantee this for objects on the stack
3445 because assign_parm_setup_stack calls assign_stack_local with the
3446 alignment of the parameter mode and in that case the alignment never
3447 gets adjusted by LOCAL_ALIGNMENT. */
3448static int
3449store_with_one_insn_p (rtx mem)
3450{
3451 rtx addr = XEXP (mem, 0);
3452 if (GET_MODE (mem) == BLKmode)
3453 return 0;
3454 /* Only static objects. */
3455 if (GET_CODE (addr) == SYMBOL_REF)
3456 {
3457 /* We use the associated declaration to make sure the access is
fa7637bd 3458 referring to the whole object.
644459d0 3459 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3460 if it is necessary. Will there be cases where one exists, and
3461 the other does not? Will there be cases where both exist, but
3462 have different types? */
3463 tree decl = MEM_EXPR (mem);
3464 if (decl
3465 && TREE_CODE (decl) == VAR_DECL
3466 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3467 return 1;
3468 decl = SYMBOL_REF_DECL (addr);
3469 if (decl
3470 && TREE_CODE (decl) == VAR_DECL
3471 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3472 return 1;
3473 }
3474 return 0;
3475}
3476
3477int
3478spu_expand_mov (rtx * ops, enum machine_mode mode)
3479{
3480 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3481 abort ();
3482
3483 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3484 {
3485 rtx from = SUBREG_REG (ops[1]);
3486 enum machine_mode imode = GET_MODE (from);
3487
3488 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3489 && GET_MODE_CLASS (imode) == MODE_INT
3490 && subreg_lowpart_p (ops[1]));
3491
3492 if (GET_MODE_SIZE (imode) < 4)
3493 {
3494 from = gen_rtx_SUBREG (SImode, from, 0);
3495 imode = SImode;
3496 }
3497
3498 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3499 {
99bdde56 3500 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 3501 emit_insn (GEN_FCN (icode) (ops[0], from));
3502 }
3503 else
3504 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3505 return 1;
3506 }
3507
3508 /* At least one of the operands needs to be a register. */
3509 if ((reload_in_progress | reload_completed) == 0
3510 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3511 {
3512 rtx temp = force_reg (mode, ops[1]);
3513 emit_move_insn (ops[0], temp);
3514 return 1;
3515 }
3516 if (reload_in_progress || reload_completed)
3517 {
dea01258 3518 if (CONSTANT_P (ops[1]))
3519 return spu_split_immediate (ops);
644459d0 3520 return 0;
3521 }
3522 else
3523 {
3524 if (GET_CODE (ops[0]) == MEM)
3525 {
3526 if (!spu_valid_move (ops))
3527 {
3528 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3529 gen_reg_rtx (TImode)));
3530 return 1;
3531 }
3532 }
3533 else if (GET_CODE (ops[1]) == MEM)
3534 {
3535 if (!spu_valid_move (ops))
3536 {
3537 emit_insn (gen_load
3538 (ops[0], ops[1], gen_reg_rtx (TImode),
3539 gen_reg_rtx (SImode)));
3540 return 1;
3541 }
3542 }
3543 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3544 extend them. */
3545 if (GET_CODE (ops[1]) == CONST_INT)
3546 {
3547 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3548 if (val != INTVAL (ops[1]))
3549 {
3550 emit_move_insn (ops[0], GEN_INT (val));
3551 return 1;
3552 }
3553 }
3554 }
3555 return 0;
3556}
3557
3558static int
3559reg_align (rtx reg)
3560{
3561 /* For now, only frame registers are known to be aligned at all times.
3562 We can't trust REGNO_POINTER_ALIGN because optimization will move
3563 registers around, potentially changing an "aligned" register in an
3564 address to an unaligned register, which would result in an invalid
3565 address. */
3566 int regno = REGNO (reg);
3567 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3568}
3569
3570void
3571spu_split_load (rtx * ops)
3572{
3573 enum machine_mode mode = GET_MODE (ops[0]);
3574 rtx addr, load, rot, mem, p0, p1;
3575 int rot_amt;
3576
3577 addr = XEXP (ops[1], 0);
3578
3579 rot = 0;
3580 rot_amt = 0;
3581 if (GET_CODE (addr) == PLUS)
3582 {
3583 /* 8 cases:
3584 aligned reg + aligned reg => lqx
3585 aligned reg + unaligned reg => lqx, rotqby
3586 aligned reg + aligned const => lqd
3587 aligned reg + unaligned const => lqd, rotqbyi
3588 unaligned reg + aligned reg => lqx, rotqby
3589 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3590 unaligned reg + aligned const => lqd, rotqby
3591 unaligned reg + unaligned const -> not allowed by legitimate address
3592 */
3593 p0 = XEXP (addr, 0);
3594 p1 = XEXP (addr, 1);
3595 if (reg_align (p0) < 128)
3596 {
3597 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3598 {
3599 emit_insn (gen_addsi3 (ops[3], p0, p1));
3600 rot = ops[3];
3601 }
3602 else
3603 rot = p0;
3604 }
3605 else
3606 {
3607 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3608 {
3609 rot_amt = INTVAL (p1) & 15;
3610 p1 = GEN_INT (INTVAL (p1) & -16);
3611 addr = gen_rtx_PLUS (SImode, p0, p1);
3612 }
3613 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3614 rot = p1;
3615 }
3616 }
3617 else if (GET_CODE (addr) == REG)
3618 {
3619 if (reg_align (addr) < 128)
3620 rot = addr;
3621 }
3622 else if (GET_CODE (addr) == CONST)
3623 {
3624 if (GET_CODE (XEXP (addr, 0)) == PLUS
3625 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3626 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3627 {
3628 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3629 if (rot_amt & -16)
3630 addr = gen_rtx_CONST (Pmode,
3631 gen_rtx_PLUS (Pmode,
3632 XEXP (XEXP (addr, 0), 0),
3633 GEN_INT (rot_amt & -16)));
3634 else
3635 addr = XEXP (XEXP (addr, 0), 0);
3636 }
3637 else
3638 rot = addr;
3639 }
3640 else if (GET_CODE (addr) == CONST_INT)
3641 {
3642 rot_amt = INTVAL (addr);
3643 addr = GEN_INT (rot_amt & -16);
3644 }
3645 else if (!ALIGNED_SYMBOL_REF_P (addr))
3646 rot = addr;
3647
3648 if (GET_MODE_SIZE (mode) < 4)
3649 rot_amt += GET_MODE_SIZE (mode) - 4;
3650
3651 rot_amt &= 15;
3652
3653 if (rot && rot_amt)
3654 {
3655 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3656 rot = ops[3];
3657 rot_amt = 0;
3658 }
3659
3660 load = ops[2];
3661
3662 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3663 mem = change_address (ops[1], TImode, addr);
3664
e04cf423 3665 emit_insn (gen_movti (load, mem));
644459d0 3666
3667 if (rot)
3668 emit_insn (gen_rotqby_ti (load, load, rot));
3669 else if (rot_amt)
3670 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3671
3672 if (reload_completed)
3673 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3674 else
3675 emit_insn (gen_spu_convert (ops[0], load));
3676}
3677
3678void
3679spu_split_store (rtx * ops)
3680{
3681 enum machine_mode mode = GET_MODE (ops[0]);
3682 rtx pat = ops[2];
3683 rtx reg = ops[3];
3684 rtx addr, p0, p1, p1_lo, smem;
3685 int aform;
3686 int scalar;
3687
3688 addr = XEXP (ops[0], 0);
3689
3690 if (GET_CODE (addr) == PLUS)
3691 {
3692 /* 8 cases:
3693 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3694 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3695 aligned reg + aligned const => lqd, c?d, shuf, stqx
3696 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3697 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3698 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3699 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3700 unaligned reg + unaligned const -> not allowed by legitimate address
3701 */
3702 aform = 0;
3703 p0 = XEXP (addr, 0);
3704 p1 = p1_lo = XEXP (addr, 1);
3705 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3706 {
3707 p1_lo = GEN_INT (INTVAL (p1) & 15);
3708 p1 = GEN_INT (INTVAL (p1) & -16);
3709 addr = gen_rtx_PLUS (SImode, p0, p1);
3710 }
3711 }
3712 else if (GET_CODE (addr) == REG)
3713 {
3714 aform = 0;
3715 p0 = addr;
3716 p1 = p1_lo = const0_rtx;
3717 }
3718 else
3719 {
3720 aform = 1;
3721 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3722 p1 = 0; /* aform doesn't use p1 */
3723 p1_lo = addr;
3724 if (ALIGNED_SYMBOL_REF_P (addr))
3725 p1_lo = const0_rtx;
3726 else if (GET_CODE (addr) == CONST)
3727 {
3728 if (GET_CODE (XEXP (addr, 0)) == PLUS
3729 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3730 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3731 {
3732 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3733 if ((v & -16) != 0)
3734 addr = gen_rtx_CONST (Pmode,
3735 gen_rtx_PLUS (Pmode,
3736 XEXP (XEXP (addr, 0), 0),
3737 GEN_INT (v & -16)));
3738 else
3739 addr = XEXP (XEXP (addr, 0), 0);
3740 p1_lo = GEN_INT (v & 15);
3741 }
3742 }
3743 else if (GET_CODE (addr) == CONST_INT)
3744 {
3745 p1_lo = GEN_INT (INTVAL (addr) & 15);
3746 addr = GEN_INT (INTVAL (addr) & -16);
3747 }
3748 }
3749
e04cf423 3750 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3751
644459d0 3752 scalar = store_with_one_insn_p (ops[0]);
3753 if (!scalar)
3754 {
3755 /* We could copy the flags from the ops[0] MEM to mem here,
3756 We don't because we want this load to be optimized away if
3757 possible, and copying the flags will prevent that in certain
3758 cases, e.g. consider the volatile flag. */
3759
e04cf423 3760 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3761 set_mem_alias_set (lmem, 0);
3762 emit_insn (gen_movti (reg, lmem));
644459d0 3763
3764 if (!p0 || reg_align (p0) >= 128)
3765 p0 = stack_pointer_rtx;
3766 if (!p1_lo)
3767 p1_lo = const0_rtx;
3768
3769 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3770 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3771 }
3772 else if (reload_completed)
3773 {
3774 if (GET_CODE (ops[1]) == REG)
3775 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3776 else if (GET_CODE (ops[1]) == SUBREG)
3777 emit_move_insn (reg,
3778 gen_rtx_REG (GET_MODE (reg),
3779 REGNO (SUBREG_REG (ops[1]))));
3780 else
3781 abort ();
3782 }
3783 else
3784 {
3785 if (GET_CODE (ops[1]) == REG)
3786 emit_insn (gen_spu_convert (reg, ops[1]));
3787 else if (GET_CODE (ops[1]) == SUBREG)
3788 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3789 else
3790 abort ();
3791 }
3792
3793 if (GET_MODE_SIZE (mode) < 4 && scalar)
3794 emit_insn (gen_shlqby_ti
3795 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3796
644459d0 3797 smem = change_address (ops[0], TImode, addr);
3798 /* We can't use the previous alias set because the memory has changed
3799 size and can potentially overlap objects of other types. */
3800 set_mem_alias_set (smem, 0);
3801
e04cf423 3802 emit_insn (gen_movti (smem, reg));
644459d0 3803}
3804
3805/* Return TRUE if X is MEM which is a struct member reference
3806 and the member can safely be loaded and stored with a single
3807 instruction because it is padded. */
3808static int
3809mem_is_padded_component_ref (rtx x)
3810{
3811 tree t = MEM_EXPR (x);
3812 tree r;
3813 if (!t || TREE_CODE (t) != COMPONENT_REF)
3814 return 0;
3815 t = TREE_OPERAND (t, 1);
3816 if (!t || TREE_CODE (t) != FIELD_DECL
3817 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3818 return 0;
3819 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3820 r = DECL_FIELD_CONTEXT (t);
3821 if (!r || TREE_CODE (r) != RECORD_TYPE)
3822 return 0;
3823 /* Make sure they are the same mode */
3824 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3825 return 0;
3826 /* If there are no following fields then the field alignment assures
fa7637bd 3827 the structure is padded to the alignment which means this field is
3828 padded too. */
644459d0 3829 if (TREE_CHAIN (t) == 0)
3830 return 1;
3831 /* If the following field is also aligned then this field will be
3832 padded. */
3833 t = TREE_CHAIN (t);
3834 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3835 return 1;
3836 return 0;
3837}
3838
c7b91b14 3839/* Parse the -mfixed-range= option string. */
3840static void
3841fix_range (const char *const_str)
3842{
3843 int i, first, last;
3844 char *str, *dash, *comma;
3845
3846 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3847 REG2 are either register names or register numbers. The effect
3848 of this option is to mark the registers in the range from REG1 to
3849 REG2 as ``fixed'' so they won't be used by the compiler. */
3850
3851 i = strlen (const_str);
3852 str = (char *) alloca (i + 1);
3853 memcpy (str, const_str, i + 1);
3854
3855 while (1)
3856 {
3857 dash = strchr (str, '-');
3858 if (!dash)
3859 {
3860 warning (0, "value of -mfixed-range must have form REG1-REG2");
3861 return;
3862 }
3863 *dash = '\0';
3864 comma = strchr (dash + 1, ',');
3865 if (comma)
3866 *comma = '\0';
3867
3868 first = decode_reg_name (str);
3869 if (first < 0)
3870 {
3871 warning (0, "unknown register name: %s", str);
3872 return;
3873 }
3874
3875 last = decode_reg_name (dash + 1);
3876 if (last < 0)
3877 {
3878 warning (0, "unknown register name: %s", dash + 1);
3879 return;
3880 }
3881
3882 *dash = '-';
3883
3884 if (first > last)
3885 {
3886 warning (0, "%s-%s is an empty range", str, dash + 1);
3887 return;
3888 }
3889
3890 for (i = first; i <= last; ++i)
3891 fixed_regs[i] = call_used_regs[i] = 1;
3892
3893 if (!comma)
3894 break;
3895
3896 *comma = ',';
3897 str = comma + 1;
3898 }
3899}
3900
644459d0 3901int
3902spu_valid_move (rtx * ops)
3903{
3904 enum machine_mode mode = GET_MODE (ops[0]);
3905 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3906 return 0;
3907
3908 /* init_expr_once tries to recog against load and store insns to set
3909 the direct_load[] and direct_store[] arrays. We always want to
3910 consider those loads and stores valid. init_expr_once is called in
3911 the context of a dummy function which does not have a decl. */
3912 if (cfun->decl == 0)
3913 return 1;
3914
3915 /* Don't allows loads/stores which would require more than 1 insn.
3916 During and after reload we assume loads and stores only take 1
3917 insn. */
3918 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3919 {
3920 if (GET_CODE (ops[0]) == MEM
3921 && (GET_MODE_SIZE (mode) < 4
3922 || !(store_with_one_insn_p (ops[0])
3923 || mem_is_padded_component_ref (ops[0]))))
3924 return 0;
3925 if (GET_CODE (ops[1]) == MEM
3926 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3927 return 0;
3928 }
3929 return 1;
3930}
3931
3932/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3933 can be generated using the fsmbi instruction. */
3934int
3935fsmbi_const_p (rtx x)
3936{
dea01258 3937 if (CONSTANT_P (x))
3938 {
5df189be 3939 /* We can always choose TImode for CONST_INT because the high bits
dea01258 3940 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 3941 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 3942 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 3943 }
3944 return 0;
3945}
3946
3947/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3948 can be generated using the cbd, chd, cwd or cdd instruction. */
3949int
3950cpat_const_p (rtx x, enum machine_mode mode)
3951{
3952 if (CONSTANT_P (x))
3953 {
3954 enum immediate_class c = classify_immediate (x, mode);
3955 return c == IC_CPAT;
3956 }
3957 return 0;
3958}
644459d0 3959
dea01258 3960rtx
3961gen_cpat_const (rtx * ops)
3962{
3963 unsigned char dst[16];
3964 int i, offset, shift, isize;
3965 if (GET_CODE (ops[3]) != CONST_INT
3966 || GET_CODE (ops[2]) != CONST_INT
3967 || (GET_CODE (ops[1]) != CONST_INT
3968 && GET_CODE (ops[1]) != REG))
3969 return 0;
3970 if (GET_CODE (ops[1]) == REG
3971 && (!REG_POINTER (ops[1])
3972 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3973 return 0;
644459d0 3974
3975 for (i = 0; i < 16; i++)
dea01258 3976 dst[i] = i + 16;
3977 isize = INTVAL (ops[3]);
3978 if (isize == 1)
3979 shift = 3;
3980 else if (isize == 2)
3981 shift = 2;
3982 else
3983 shift = 0;
3984 offset = (INTVAL (ops[2]) +
3985 (GET_CODE (ops[1]) ==
3986 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3987 for (i = 0; i < isize; i++)
3988 dst[offset + i] = i + shift;
3989 return array_to_constant (TImode, dst);
644459d0 3990}
3991
3992/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3993 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3994 than 16 bytes, the value is repeated across the rest of the array. */
3995void
3996constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3997{
3998 HOST_WIDE_INT val;
3999 int i, j, first;
4000
4001 memset (arr, 0, 16);
4002 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4003 if (GET_CODE (x) == CONST_INT
4004 || (GET_CODE (x) == CONST_DOUBLE
4005 && (mode == SFmode || mode == DFmode)))
4006 {
4007 gcc_assert (mode != VOIDmode && mode != BLKmode);
4008
4009 if (GET_CODE (x) == CONST_DOUBLE)
4010 val = const_double_to_hwint (x);
4011 else
4012 val = INTVAL (x);
4013 first = GET_MODE_SIZE (mode) - 1;
4014 for (i = first; i >= 0; i--)
4015 {
4016 arr[i] = val & 0xff;
4017 val >>= 8;
4018 }
4019 /* Splat the constant across the whole array. */
4020 for (j = 0, i = first + 1; i < 16; i++)
4021 {
4022 arr[i] = arr[j];
4023 j = (j == first) ? 0 : j + 1;
4024 }
4025 }
4026 else if (GET_CODE (x) == CONST_DOUBLE)
4027 {
4028 val = CONST_DOUBLE_LOW (x);
4029 for (i = 15; i >= 8; i--)
4030 {
4031 arr[i] = val & 0xff;
4032 val >>= 8;
4033 }
4034 val = CONST_DOUBLE_HIGH (x);
4035 for (i = 7; i >= 0; i--)
4036 {
4037 arr[i] = val & 0xff;
4038 val >>= 8;
4039 }
4040 }
4041 else if (GET_CODE (x) == CONST_VECTOR)
4042 {
4043 int units;
4044 rtx elt;
4045 mode = GET_MODE_INNER (mode);
4046 units = CONST_VECTOR_NUNITS (x);
4047 for (i = 0; i < units; i++)
4048 {
4049 elt = CONST_VECTOR_ELT (x, i);
4050 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4051 {
4052 if (GET_CODE (elt) == CONST_DOUBLE)
4053 val = const_double_to_hwint (elt);
4054 else
4055 val = INTVAL (elt);
4056 first = GET_MODE_SIZE (mode) - 1;
4057 if (first + i * GET_MODE_SIZE (mode) > 16)
4058 abort ();
4059 for (j = first; j >= 0; j--)
4060 {
4061 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4062 val >>= 8;
4063 }
4064 }
4065 }
4066 }
4067 else
4068 gcc_unreachable();
4069}
4070
4071/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4072 smaller than 16 bytes, use the bytes that would represent that value
4073 in a register, e.g., for QImode return the value of arr[3]. */
4074rtx
4075array_to_constant (enum machine_mode mode, unsigned char arr[16])
4076{
4077 enum machine_mode inner_mode;
4078 rtvec v;
4079 int units, size, i, j, k;
4080 HOST_WIDE_INT val;
4081
4082 if (GET_MODE_CLASS (mode) == MODE_INT
4083 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4084 {
4085 j = GET_MODE_SIZE (mode);
4086 i = j < 4 ? 4 - j : 0;
4087 for (val = 0; i < j; i++)
4088 val = (val << 8) | arr[i];
4089 val = trunc_int_for_mode (val, mode);
4090 return GEN_INT (val);
4091 }
4092
4093 if (mode == TImode)
4094 {
4095 HOST_WIDE_INT high;
4096 for (i = high = 0; i < 8; i++)
4097 high = (high << 8) | arr[i];
4098 for (i = 8, val = 0; i < 16; i++)
4099 val = (val << 8) | arr[i];
4100 return immed_double_const (val, high, TImode);
4101 }
4102 if (mode == SFmode)
4103 {
4104 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4105 val = trunc_int_for_mode (val, SImode);
171b6d22 4106 return hwint_to_const_double (SFmode, val);
644459d0 4107 }
4108 if (mode == DFmode)
4109 {
4110 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4111 val <<= 32;
4112 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
171b6d22 4113 return hwint_to_const_double (DFmode, val);
644459d0 4114 }
4115
4116 if (!VECTOR_MODE_P (mode))
4117 abort ();
4118
4119 units = GET_MODE_NUNITS (mode);
4120 size = GET_MODE_UNIT_SIZE (mode);
4121 inner_mode = GET_MODE_INNER (mode);
4122 v = rtvec_alloc (units);
4123
4124 for (k = i = 0; i < units; ++i)
4125 {
4126 val = 0;
4127 for (j = 0; j < size; j++, k++)
4128 val = (val << 8) | arr[k];
4129
4130 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4131 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4132 else
4133 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4134 }
4135 if (k > 16)
4136 abort ();
4137
4138 return gen_rtx_CONST_VECTOR (mode, v);
4139}
4140
4141static void
4142reloc_diagnostic (rtx x)
4143{
4144 tree loc_decl, decl = 0;
4145 const char *msg;
4146 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4147 return;
4148
4149 if (GET_CODE (x) == SYMBOL_REF)
4150 decl = SYMBOL_REF_DECL (x);
4151 else if (GET_CODE (x) == CONST
4152 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4153 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4154
4155 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4156 if (decl && !DECL_P (decl))
4157 decl = 0;
4158
4159 /* We use last_assemble_variable_decl to get line information. It's
4160 not always going to be right and might not even be close, but will
4161 be right for the more common cases. */
5df189be 4162 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4163 loc_decl = decl;
4164 else
4165 loc_decl = last_assemble_variable_decl;
4166
4167 /* The decl could be a string constant. */
4168 if (decl && DECL_P (decl))
4169 msg = "%Jcreating run-time relocation for %qD";
4170 else
4171 msg = "creating run-time relocation";
4172
99369027 4173 if (TARGET_WARN_RELOC)
644459d0 4174 warning (0, msg, loc_decl, decl);
99369027 4175 else
4176 error (msg, loc_decl, decl);
644459d0 4177}
4178
4179/* Hook into assemble_integer so we can generate an error for run-time
4180 relocations. The SPU ABI disallows them. */
4181static bool
4182spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4183{
4184 /* By default run-time relocations aren't supported, but we allow them
4185 in case users support it in their own run-time loader. And we provide
4186 a warning for those users that don't. */
4187 if ((GET_CODE (x) == SYMBOL_REF)
4188 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4189 reloc_diagnostic (x);
4190
4191 return default_assemble_integer (x, size, aligned_p);
4192}
4193
4194static void
4195spu_asm_globalize_label (FILE * file, const char *name)
4196{
4197 fputs ("\t.global\t", file);
4198 assemble_name (file, name);
4199 fputs ("\n", file);
4200}
4201
4202static bool
4203spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4204{
4205 enum machine_mode mode = GET_MODE (x);
4206 int cost = COSTS_N_INSNS (2);
4207
4208 /* Folding to a CONST_VECTOR will use extra space but there might
4209 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4210 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4211 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4212 because this cost will only be compared against a single insn.
4213 if (code == CONST_VECTOR)
4214 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4215 */
4216
4217 /* Use defaults for float operations. Not accurate but good enough. */
4218 if (mode == DFmode)
4219 {
4220 *total = COSTS_N_INSNS (13);
4221 return true;
4222 }
4223 if (mode == SFmode)
4224 {
4225 *total = COSTS_N_INSNS (6);
4226 return true;
4227 }
4228 switch (code)
4229 {
4230 case CONST_INT:
4231 if (satisfies_constraint_K (x))
4232 *total = 0;
4233 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4234 *total = COSTS_N_INSNS (1);
4235 else
4236 *total = COSTS_N_INSNS (3);
4237 return true;
4238
4239 case CONST:
4240 *total = COSTS_N_INSNS (3);
4241 return true;
4242
4243 case LABEL_REF:
4244 case SYMBOL_REF:
4245 *total = COSTS_N_INSNS (0);
4246 return true;
4247
4248 case CONST_DOUBLE:
4249 *total = COSTS_N_INSNS (5);
4250 return true;
4251
4252 case FLOAT_EXTEND:
4253 case FLOAT_TRUNCATE:
4254 case FLOAT:
4255 case UNSIGNED_FLOAT:
4256 case FIX:
4257 case UNSIGNED_FIX:
4258 *total = COSTS_N_INSNS (7);
4259 return true;
4260
4261 case PLUS:
4262 if (mode == TImode)
4263 {
4264 *total = COSTS_N_INSNS (9);
4265 return true;
4266 }
4267 break;
4268
4269 case MULT:
4270 cost =
4271 GET_CODE (XEXP (x, 0)) ==
4272 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4273 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4274 {
4275 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4276 {
4277 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4278 cost = COSTS_N_INSNS (14);
4279 if ((val & 0xffff) == 0)
4280 cost = COSTS_N_INSNS (9);
4281 else if (val > 0 && val < 0x10000)
4282 cost = COSTS_N_INSNS (11);
4283 }
4284 }
4285 *total = cost;
4286 return true;
4287 case DIV:
4288 case UDIV:
4289 case MOD:
4290 case UMOD:
4291 *total = COSTS_N_INSNS (20);
4292 return true;
4293 case ROTATE:
4294 case ROTATERT:
4295 case ASHIFT:
4296 case ASHIFTRT:
4297 case LSHIFTRT:
4298 *total = COSTS_N_INSNS (4);
4299 return true;
4300 case UNSPEC:
4301 if (XINT (x, 1) == UNSPEC_CONVERT)
4302 *total = COSTS_N_INSNS (0);
4303 else
4304 *total = COSTS_N_INSNS (4);
4305 return true;
4306 }
4307 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4308 if (GET_MODE_CLASS (mode) == MODE_INT
4309 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4310 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4311 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4312 *total = cost;
4313 return true;
4314}
4315
4316enum machine_mode
4317spu_eh_return_filter_mode (void)
4318{
4319 /* We would like this to be SImode, but sjlj exceptions seems to work
4320 only with word_mode. */
4321 return TImode;
4322}
4323
4324/* Decide whether we can make a sibling call to a function. DECL is the
4325 declaration of the function being targeted by the call and EXP is the
4326 CALL_EXPR representing the call. */
4327static bool
4328spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4329{
4330 return decl && !TARGET_LARGE_MEM;
4331}
4332
4333/* We need to correctly update the back chain pointer and the Available
4334 Stack Size (which is in the second slot of the sp register.) */
4335void
4336spu_allocate_stack (rtx op0, rtx op1)
4337{
4338 HOST_WIDE_INT v;
4339 rtx chain = gen_reg_rtx (V4SImode);
4340 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4341 rtx sp = gen_reg_rtx (V4SImode);
4342 rtx splatted = gen_reg_rtx (V4SImode);
4343 rtx pat = gen_reg_rtx (TImode);
4344
4345 /* copy the back chain so we can save it back again. */
4346 emit_move_insn (chain, stack_bot);
4347
4348 op1 = force_reg (SImode, op1);
4349
4350 v = 0x1020300010203ll;
4351 emit_move_insn (pat, immed_double_const (v, v, TImode));
4352 emit_insn (gen_shufb (splatted, op1, op1, pat));
4353
4354 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4355 emit_insn (gen_subv4si3 (sp, sp, splatted));
4356
4357 if (flag_stack_check)
4358 {
4359 rtx avail = gen_reg_rtx(SImode);
4360 rtx result = gen_reg_rtx(SImode);
4361 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4362 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4363 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4364 }
4365
4366 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4367
4368 emit_move_insn (stack_bot, chain);
4369
4370 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4371}
4372
4373void
4374spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4375{
4376 static unsigned char arr[16] =
4377 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4378 rtx temp = gen_reg_rtx (SImode);
4379 rtx temp2 = gen_reg_rtx (SImode);
4380 rtx temp3 = gen_reg_rtx (V4SImode);
4381 rtx temp4 = gen_reg_rtx (V4SImode);
4382 rtx pat = gen_reg_rtx (TImode);
4383 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4384
4385 /* Restore the backchain from the first word, sp from the second. */
4386 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4387 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4388
4389 emit_move_insn (pat, array_to_constant (TImode, arr));
4390
4391 /* Compute Available Stack Size for sp */
4392 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4393 emit_insn (gen_shufb (temp3, temp, temp, pat));
4394
4395 /* Compute Available Stack Size for back chain */
4396 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4397 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4398 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4399
4400 emit_insn (gen_addv4si3 (sp, sp, temp3));
4401 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4402}
4403
4404static void
4405spu_init_libfuncs (void)
4406{
4407 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4408 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4409 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4410 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4411 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4412 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4413 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4414 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4415 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4416 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4417 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4418
4419 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4420 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4421}
4422
4423/* Make a subreg, stripping any existing subreg. We could possibly just
4424 call simplify_subreg, but in this case we know what we want. */
4425rtx
4426spu_gen_subreg (enum machine_mode mode, rtx x)
4427{
4428 if (GET_CODE (x) == SUBREG)
4429 x = SUBREG_REG (x);
4430 if (GET_MODE (x) == mode)
4431 return x;
4432 return gen_rtx_SUBREG (mode, x, 0);
4433}
4434
4435static bool
fb80456a 4436spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 4437{
4438 return (TYPE_MODE (type) == BLKmode
4439 && ((type) == 0
4440 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4441 || int_size_in_bytes (type) >
4442 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4443}
4444\f
4445/* Create the built-in types and functions */
4446
4447struct spu_builtin_description spu_builtins[] = {
4448#define DEF_BUILTIN(fcode, icode, name, type, params) \
4449 {fcode, icode, name, type, params, NULL_TREE},
4450#include "spu-builtins.def"
4451#undef DEF_BUILTIN
4452};
4453
4454static void
4455spu_init_builtins (void)
4456{
4457 struct spu_builtin_description *d;
4458 unsigned int i;
4459
4460 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4461 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4462 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4463 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4464 V4SF_type_node = build_vector_type (float_type_node, 4);
4465 V2DF_type_node = build_vector_type (double_type_node, 2);
4466
4467 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4468 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4469 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4470 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4471
4472 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4473
4474 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4475 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4476 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4477 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4478 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4479 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4480 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4481 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4482 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4483 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4484 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4485 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4486
4487 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4488 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4489 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4490 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4491 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4492 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4493 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4494 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4495
4496 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4497 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4498
4499 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4500
4501 spu_builtin_types[SPU_BTI_PTR] =
4502 build_pointer_type (build_qualified_type
4503 (void_type_node,
4504 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4505
4506 /* For each builtin we build a new prototype. The tree code will make
4507 sure nodes are shared. */
4508 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4509 {
4510 tree p;
4511 char name[64]; /* build_function will make a copy. */
4512 int parm;
4513
4514 if (d->name == 0)
4515 continue;
4516
4517 /* find last parm */
4518 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4519 {
4520 }
4521
4522 p = void_list_node;
4523 while (parm > 1)
4524 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4525
4526 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4527
4528 sprintf (name, "__builtin_%s", d->name);
4529 d->fndecl =
4530 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4531 NULL, NULL_TREE);
a76866d3 4532 if (d->fcode == SPU_MASK_FOR_LOAD)
4533 TREE_READONLY (d->fndecl) = 1;
644459d0 4534 }
4535}
4536
cf31d486 4537void
4538spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4539{
4540 static unsigned char arr[16] =
4541 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4542
4543 rtx temp = gen_reg_rtx (Pmode);
4544 rtx temp2 = gen_reg_rtx (V4SImode);
4545 rtx temp3 = gen_reg_rtx (V4SImode);
4546 rtx pat = gen_reg_rtx (TImode);
4547 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4548
4549 emit_move_insn (pat, array_to_constant (TImode, arr));
4550
4551 /* Restore the sp. */
4552 emit_move_insn (temp, op1);
4553 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4554
4555 /* Compute available stack size for sp. */
4556 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4557 emit_insn (gen_shufb (temp3, temp, temp, pat));
4558
4559 emit_insn (gen_addv4si3 (sp, sp, temp3));
4560 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4561}
4562
644459d0 4563int
4564spu_safe_dma (HOST_WIDE_INT channel)
4565{
4566 return (channel >= 21 && channel <= 27);
4567}
4568
4569void
4570spu_builtin_splats (rtx ops[])
4571{
4572 enum machine_mode mode = GET_MODE (ops[0]);
4573 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4574 {
4575 unsigned char arr[16];
4576 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4577 emit_move_insn (ops[0], array_to_constant (mode, arr));
4578 }
5df189be 4579 else if (!flag_pic && GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
644459d0 4580 {
4581 rtvec v = rtvec_alloc (4);
4582 RTVEC_ELT (v, 0) = ops[1];
4583 RTVEC_ELT (v, 1) = ops[1];
4584 RTVEC_ELT (v, 2) = ops[1];
4585 RTVEC_ELT (v, 3) = ops[1];
4586 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4587 }
4588 else
4589 {
4590 rtx reg = gen_reg_rtx (TImode);
4591 rtx shuf;
4592 if (GET_CODE (ops[1]) != REG
4593 && GET_CODE (ops[1]) != SUBREG)
4594 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4595 switch (mode)
4596 {
4597 case V2DImode:
4598 case V2DFmode:
4599 shuf =
4600 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4601 TImode);
4602 break;
4603 case V4SImode:
4604 case V4SFmode:
4605 shuf =
4606 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4607 TImode);
4608 break;
4609 case V8HImode:
4610 shuf =
4611 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4612 TImode);
4613 break;
4614 case V16QImode:
4615 shuf =
4616 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4617 TImode);
4618 break;
4619 default:
4620 abort ();
4621 }
4622 emit_move_insn (reg, shuf);
4623 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4624 }
4625}
4626
4627void
4628spu_builtin_extract (rtx ops[])
4629{
4630 enum machine_mode mode;
4631 rtx rot, from, tmp;
4632
4633 mode = GET_MODE (ops[1]);
4634
4635 if (GET_CODE (ops[2]) == CONST_INT)
4636 {
4637 switch (mode)
4638 {
4639 case V16QImode:
4640 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4641 break;
4642 case V8HImode:
4643 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4644 break;
4645 case V4SFmode:
4646 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4647 break;
4648 case V4SImode:
4649 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4650 break;
4651 case V2DImode:
4652 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4653 break;
4654 case V2DFmode:
4655 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4656 break;
4657 default:
4658 abort ();
4659 }
4660 return;
4661 }
4662
4663 from = spu_gen_subreg (TImode, ops[1]);
4664 rot = gen_reg_rtx (TImode);
4665 tmp = gen_reg_rtx (SImode);
4666
4667 switch (mode)
4668 {
4669 case V16QImode:
4670 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4671 break;
4672 case V8HImode:
4673 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4674 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4675 break;
4676 case V4SFmode:
4677 case V4SImode:
4678 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4679 break;
4680 case V2DImode:
4681 case V2DFmode:
4682 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4683 break;
4684 default:
4685 abort ();
4686 }
4687 emit_insn (gen_rotqby_ti (rot, from, tmp));
4688
4689 emit_insn (gen_spu_convert (ops[0], rot));
4690}
4691
4692void
4693spu_builtin_insert (rtx ops[])
4694{
4695 enum machine_mode mode = GET_MODE (ops[0]);
4696 enum machine_mode imode = GET_MODE_INNER (mode);
4697 rtx mask = gen_reg_rtx (TImode);
4698 rtx offset;
4699
4700 if (GET_CODE (ops[3]) == CONST_INT)
4701 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4702 else
4703 {
4704 offset = gen_reg_rtx (SImode);
4705 emit_insn (gen_mulsi3
4706 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4707 }
4708 emit_insn (gen_cpat
4709 (mask, stack_pointer_rtx, offset,
4710 GEN_INT (GET_MODE_SIZE (imode))));
4711 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4712}
4713
4714void
4715spu_builtin_promote (rtx ops[])
4716{
4717 enum machine_mode mode, imode;
4718 rtx rot, from, offset;
4719 HOST_WIDE_INT pos;
4720
4721 mode = GET_MODE (ops[0]);
4722 imode = GET_MODE_INNER (mode);
4723
4724 from = gen_reg_rtx (TImode);
4725 rot = spu_gen_subreg (TImode, ops[0]);
4726
4727 emit_insn (gen_spu_convert (from, ops[1]));
4728
4729 if (GET_CODE (ops[2]) == CONST_INT)
4730 {
4731 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4732 if (GET_MODE_SIZE (imode) < 4)
4733 pos += 4 - GET_MODE_SIZE (imode);
4734 offset = GEN_INT (pos & 15);
4735 }
4736 else
4737 {
4738 offset = gen_reg_rtx (SImode);
4739 switch (mode)
4740 {
4741 case V16QImode:
4742 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4743 break;
4744 case V8HImode:
4745 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4746 emit_insn (gen_addsi3 (offset, offset, offset));
4747 break;
4748 case V4SFmode:
4749 case V4SImode:
4750 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4751 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4752 break;
4753 case V2DImode:
4754 case V2DFmode:
4755 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4756 break;
4757 default:
4758 abort ();
4759 }
4760 }
4761 emit_insn (gen_rotqby_ti (rot, from, offset));
4762}
4763
4764void
4765spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4766{
4767 rtx shuf = gen_reg_rtx (V4SImode);
4768 rtx insn = gen_reg_rtx (V4SImode);
4769 rtx shufc;
4770 rtx insnc;
4771 rtx mem;
4772
4773 fnaddr = force_reg (SImode, fnaddr);
4774 cxt = force_reg (SImode, cxt);
4775
4776 if (TARGET_LARGE_MEM)
4777 {
4778 rtx rotl = gen_reg_rtx (V4SImode);
4779 rtx mask = gen_reg_rtx (V4SImode);
4780 rtx bi = gen_reg_rtx (SImode);
4781 unsigned char shufa[16] = {
4782 2, 3, 0, 1, 18, 19, 16, 17,
4783 0, 1, 2, 3, 16, 17, 18, 19
4784 };
4785 unsigned char insna[16] = {
4786 0x41, 0, 0, 79,
4787 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4788 0x60, 0x80, 0, 79,
4789 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4790 };
4791
4792 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4793 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4794
4795 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4796 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4797 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4798 emit_insn (gen_selb (insn, insnc, rotl, mask));
4799
4800 mem = memory_address (Pmode, tramp);
4801 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4802
4803 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4804 mem = memory_address (Pmode, plus_constant (tramp, 16));
4805 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4806 }
4807 else
4808 {
4809 rtx scxt = gen_reg_rtx (SImode);
4810 rtx sfnaddr = gen_reg_rtx (SImode);
4811 unsigned char insna[16] = {
4812 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4813 0x30, 0, 0, 0,
4814 0, 0, 0, 0,
4815 0, 0, 0, 0
4816 };
4817
4818 shufc = gen_reg_rtx (TImode);
4819 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4820
4821 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4822 fits 18 bits and the last 4 are zeros. This will be true if
4823 the stack pointer is initialized to 0x3fff0 at program start,
4824 otherwise the ila instruction will be garbage. */
4825
4826 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4827 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4828 emit_insn (gen_cpat
4829 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4830 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4831 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4832
4833 mem = memory_address (Pmode, tramp);
4834 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4835
4836 }
4837 emit_insn (gen_sync ());
4838}
4839
4840void
4841spu_expand_sign_extend (rtx ops[])
4842{
4843 unsigned char arr[16];
4844 rtx pat = gen_reg_rtx (TImode);
4845 rtx sign, c;
4846 int i, last;
4847 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4848 if (GET_MODE (ops[1]) == QImode)
4849 {
4850 sign = gen_reg_rtx (HImode);
4851 emit_insn (gen_extendqihi2 (sign, ops[1]));
4852 for (i = 0; i < 16; i++)
4853 arr[i] = 0x12;
4854 arr[last] = 0x13;
4855 }
4856 else
4857 {
4858 for (i = 0; i < 16; i++)
4859 arr[i] = 0x10;
4860 switch (GET_MODE (ops[1]))
4861 {
4862 case HImode:
4863 sign = gen_reg_rtx (SImode);
4864 emit_insn (gen_extendhisi2 (sign, ops[1]));
4865 arr[last] = 0x03;
4866 arr[last - 1] = 0x02;
4867 break;
4868 case SImode:
4869 sign = gen_reg_rtx (SImode);
4870 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4871 for (i = 0; i < 4; i++)
4872 arr[last - i] = 3 - i;
4873 break;
4874 case DImode:
4875 sign = gen_reg_rtx (SImode);
4876 c = gen_reg_rtx (SImode);
4877 emit_insn (gen_spu_convert (c, ops[1]));
4878 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4879 for (i = 0; i < 8; i++)
4880 arr[last - i] = 7 - i;
4881 break;
4882 default:
4883 abort ();
4884 }
4885 }
4886 emit_move_insn (pat, array_to_constant (TImode, arr));
4887 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4888}
4889
4890/* expand vector initialization. If there are any constant parts,
4891 load constant parts first. Then load any non-constant parts. */
4892void
4893spu_expand_vector_init (rtx target, rtx vals)
4894{
4895 enum machine_mode mode = GET_MODE (target);
4896 int n_elts = GET_MODE_NUNITS (mode);
4897 int n_var = 0;
4898 bool all_same = true;
790c536c 4899 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 4900 int i;
4901
4902 first = XVECEXP (vals, 0, 0);
4903 for (i = 0; i < n_elts; ++i)
4904 {
4905 x = XVECEXP (vals, 0, i);
4906 if (!CONSTANT_P (x))
4907 ++n_var;
4908 else
4909 {
4910 if (first_constant == NULL_RTX)
4911 first_constant = x;
4912 }
4913 if (i > 0 && !rtx_equal_p (x, first))
4914 all_same = false;
4915 }
4916
4917 /* if all elements are the same, use splats to repeat elements */
4918 if (all_same)
4919 {
4920 if (!CONSTANT_P (first)
4921 && !register_operand (first, GET_MODE (x)))
4922 first = force_reg (GET_MODE (first), first);
4923 emit_insn (gen_spu_splats (target, first));
4924 return;
4925 }
4926
4927 /* load constant parts */
4928 if (n_var != n_elts)
4929 {
4930 if (n_var == 0)
4931 {
4932 emit_move_insn (target,
4933 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4934 }
4935 else
4936 {
4937 rtx constant_parts_rtx = copy_rtx (vals);
4938
4939 gcc_assert (first_constant != NULL_RTX);
4940 /* fill empty slots with the first constant, this increases
4941 our chance of using splats in the recursive call below. */
4942 for (i = 0; i < n_elts; ++i)
4943 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4944 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4945
4946 spu_expand_vector_init (target, constant_parts_rtx);
4947 }
4948 }
4949
4950 /* load variable parts */
4951 if (n_var != 0)
4952 {
4953 rtx insert_operands[4];
4954
4955 insert_operands[0] = target;
4956 insert_operands[2] = target;
4957 for (i = 0; i < n_elts; ++i)
4958 {
4959 x = XVECEXP (vals, 0, i);
4960 if (!CONSTANT_P (x))
4961 {
4962 if (!register_operand (x, GET_MODE (x)))
4963 x = force_reg (GET_MODE (x), x);
4964 insert_operands[1] = x;
4965 insert_operands[3] = GEN_INT (i);
4966 spu_builtin_insert (insert_operands);
4967 }
4968 }
4969 }
4970}
6352eedf 4971
5474166e 4972/* Return insn index for the vector compare instruction for given CODE,
4973 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4974
4975static int
4976get_vec_cmp_insn (enum rtx_code code,
4977 enum machine_mode dest_mode,
4978 enum machine_mode op_mode)
4979
4980{
4981 switch (code)
4982 {
4983 case EQ:
4984 if (dest_mode == V16QImode && op_mode == V16QImode)
4985 return CODE_FOR_ceq_v16qi;
4986 if (dest_mode == V8HImode && op_mode == V8HImode)
4987 return CODE_FOR_ceq_v8hi;
4988 if (dest_mode == V4SImode && op_mode == V4SImode)
4989 return CODE_FOR_ceq_v4si;
4990 if (dest_mode == V4SImode && op_mode == V4SFmode)
4991 return CODE_FOR_ceq_v4sf;
4992 if (dest_mode == V2DImode && op_mode == V2DFmode)
4993 return CODE_FOR_ceq_v2df;
4994 break;
4995 case GT:
4996 if (dest_mode == V16QImode && op_mode == V16QImode)
4997 return CODE_FOR_cgt_v16qi;
4998 if (dest_mode == V8HImode && op_mode == V8HImode)
4999 return CODE_FOR_cgt_v8hi;
5000 if (dest_mode == V4SImode && op_mode == V4SImode)
5001 return CODE_FOR_cgt_v4si;
5002 if (dest_mode == V4SImode && op_mode == V4SFmode)
5003 return CODE_FOR_cgt_v4sf;
5004 if (dest_mode == V2DImode && op_mode == V2DFmode)
5005 return CODE_FOR_cgt_v2df;
5006 break;
5007 case GTU:
5008 if (dest_mode == V16QImode && op_mode == V16QImode)
5009 return CODE_FOR_clgt_v16qi;
5010 if (dest_mode == V8HImode && op_mode == V8HImode)
5011 return CODE_FOR_clgt_v8hi;
5012 if (dest_mode == V4SImode && op_mode == V4SImode)
5013 return CODE_FOR_clgt_v4si;
5014 break;
5015 default:
5016 break;
5017 }
5018 return -1;
5019}
5020
5021/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5022 DMODE is expected destination mode. This is a recursive function. */
5023
5024static rtx
5025spu_emit_vector_compare (enum rtx_code rcode,
5026 rtx op0, rtx op1,
5027 enum machine_mode dmode)
5028{
5029 int vec_cmp_insn;
5030 rtx mask;
5031 enum machine_mode dest_mode;
5032 enum machine_mode op_mode = GET_MODE (op1);
5033
5034 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5035
5036 /* Floating point vector compare instructions uses destination V4SImode.
5037 Double floating point vector compare instructions uses destination V2DImode.
5038 Move destination to appropriate mode later. */
5039 if (dmode == V4SFmode)
5040 dest_mode = V4SImode;
5041 else if (dmode == V2DFmode)
5042 dest_mode = V2DImode;
5043 else
5044 dest_mode = dmode;
5045
5046 mask = gen_reg_rtx (dest_mode);
5047 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5048
5049 if (vec_cmp_insn == -1)
5050 {
5051 bool swap_operands = false;
5052 bool try_again = false;
5053 switch (rcode)
5054 {
5055 case LT:
5056 rcode = GT;
5057 swap_operands = true;
5058 try_again = true;
5059 break;
5060 case LTU:
5061 rcode = GTU;
5062 swap_operands = true;
5063 try_again = true;
5064 break;
5065 case NE:
5066 /* Treat A != B as ~(A==B). */
5067 {
5068 enum insn_code nor_code;
5069 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5070 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5071 gcc_assert (nor_code != CODE_FOR_nothing);
5072 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5073 if (dmode != dest_mode)
5074 {
5075 rtx temp = gen_reg_rtx (dest_mode);
5076 convert_move (temp, mask, 0);
5077 return temp;
5078 }
5079 return mask;
5080 }
5081 break;
5082 case GE:
5083 case GEU:
5084 case LE:
5085 case LEU:
5086 /* Try GT/GTU/LT/LTU OR EQ */
5087 {
5088 rtx c_rtx, eq_rtx;
5089 enum insn_code ior_code;
5090 enum rtx_code new_code;
5091
5092 switch (rcode)
5093 {
5094 case GE: new_code = GT; break;
5095 case GEU: new_code = GTU; break;
5096 case LE: new_code = LT; break;
5097 case LEU: new_code = LTU; break;
5098 default:
5099 gcc_unreachable ();
5100 }
5101
5102 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5103 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5104
99bdde56 5105 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5106 gcc_assert (ior_code != CODE_FOR_nothing);
5107 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5108 if (dmode != dest_mode)
5109 {
5110 rtx temp = gen_reg_rtx (dest_mode);
5111 convert_move (temp, mask, 0);
5112 return temp;
5113 }
5114 return mask;
5115 }
5116 break;
5117 default:
5118 gcc_unreachable ();
5119 }
5120
5121 /* You only get two chances. */
5122 if (try_again)
5123 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5124
5125 gcc_assert (vec_cmp_insn != -1);
5126
5127 if (swap_operands)
5128 {
5129 rtx tmp;
5130 tmp = op0;
5131 op0 = op1;
5132 op1 = tmp;
5133 }
5134 }
5135
5136 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5137 if (dmode != dest_mode)
5138 {
5139 rtx temp = gen_reg_rtx (dest_mode);
5140 convert_move (temp, mask, 0);
5141 return temp;
5142 }
5143 return mask;
5144}
5145
5146
5147/* Emit vector conditional expression.
5148 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5149 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5150
5151int
5152spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5153 rtx cond, rtx cc_op0, rtx cc_op1)
5154{
5155 enum machine_mode dest_mode = GET_MODE (dest);
5156 enum rtx_code rcode = GET_CODE (cond);
5157 rtx mask;
5158
5159 /* Get the vector mask for the given relational operations. */
5160 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5161
5162 emit_insn(gen_selb (dest, op2, op1, mask));
5163
5164 return 1;
5165}
5166
6352eedf 5167static rtx
5168spu_force_reg (enum machine_mode mode, rtx op)
5169{
5170 rtx x, r;
5171 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5172 {
5173 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5174 || GET_MODE (op) == BLKmode)
5175 return force_reg (mode, convert_to_mode (mode, op, 0));
5176 abort ();
5177 }
5178
5179 r = force_reg (GET_MODE (op), op);
5180 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5181 {
5182 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5183 if (x)
5184 return x;
5185 }
5186
5187 x = gen_reg_rtx (mode);
5188 emit_insn (gen_spu_convert (x, r));
5189 return x;
5190}
5191
5192static void
5193spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5194{
5195 HOST_WIDE_INT v = 0;
5196 int lsbits;
5197 /* Check the range of immediate operands. */
5198 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5199 {
5200 int range = p - SPU_BTI_7;
5df189be 5201
5202 if (!CONSTANT_P (op))
6352eedf 5203 error ("%s expects an integer literal in the range [%d, %d].",
5204 d->name,
5205 spu_builtin_range[range].low, spu_builtin_range[range].high);
5206
5207 if (GET_CODE (op) == CONST
5208 && (GET_CODE (XEXP (op, 0)) == PLUS
5209 || GET_CODE (XEXP (op, 0)) == MINUS))
5210 {
5211 v = INTVAL (XEXP (XEXP (op, 0), 1));
5212 op = XEXP (XEXP (op, 0), 0);
5213 }
5214 else if (GET_CODE (op) == CONST_INT)
5215 v = INTVAL (op);
5df189be 5216 else if (GET_CODE (op) == CONST_VECTOR
5217 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5218 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5219
5220 /* The default for v is 0 which is valid in every range. */
5221 if (v < spu_builtin_range[range].low
5222 || v > spu_builtin_range[range].high)
5223 error ("%s expects an integer literal in the range [%d, %d]. ("
5224 HOST_WIDE_INT_PRINT_DEC ")",
5225 d->name,
5226 spu_builtin_range[range].low, spu_builtin_range[range].high,
5227 v);
6352eedf 5228
5229 switch (p)
5230 {
5231 case SPU_BTI_S10_4:
5232 lsbits = 4;
5233 break;
5234 case SPU_BTI_U16_2:
5235 /* This is only used in lqa, and stqa. Even though the insns
5236 encode 16 bits of the address (all but the 2 least
5237 significant), only 14 bits are used because it is masked to
5238 be 16 byte aligned. */
5239 lsbits = 4;
5240 break;
5241 case SPU_BTI_S16_2:
5242 /* This is used for lqr and stqr. */
5243 lsbits = 2;
5244 break;
5245 default:
5246 lsbits = 0;
5247 }
5248
5249 if (GET_CODE (op) == LABEL_REF
5250 || (GET_CODE (op) == SYMBOL_REF
5251 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5252 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5253 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5254 d->name);
5255 }
5256}
5257
5258
5259static void
5df189be 5260expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5261 rtx target, rtx ops[])
5262{
5263 enum insn_code icode = d->icode;
5df189be 5264 int i = 0, a;
6352eedf 5265
5266 /* Expand the arguments into rtl. */
5267
5268 if (d->parm[0] != SPU_BTI_VOID)
5269 ops[i++] = target;
5270
5df189be 5271 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
6352eedf 5272 {
5df189be 5273 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5274 if (arg == 0)
5275 abort ();
5276 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5277 }
5278}
5279
5280static rtx
5281spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5282 tree exp, rtx target)
6352eedf 5283{
5284 rtx pat;
5285 rtx ops[8];
5286 enum insn_code icode = d->icode;
5287 enum machine_mode mode, tmode;
5288 int i, p;
5289 tree return_type;
5290
5291 /* Set up ops[] with values from arglist. */
5df189be 5292 expand_builtin_args (d, exp, target, ops);
6352eedf 5293
5294 /* Handle the target operand which must be operand 0. */
5295 i = 0;
5296 if (d->parm[0] != SPU_BTI_VOID)
5297 {
5298
5299 /* We prefer the mode specified for the match_operand otherwise
5300 use the mode from the builtin function prototype. */
5301 tmode = insn_data[d->icode].operand[0].mode;
5302 if (tmode == VOIDmode)
5303 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5304
5305 /* Try to use target because not using it can lead to extra copies
5306 and when we are using all of the registers extra copies leads
5307 to extra spills. */
5308 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5309 ops[0] = target;
5310 else
5311 target = ops[0] = gen_reg_rtx (tmode);
5312
5313 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5314 abort ();
5315
5316 i++;
5317 }
5318
a76866d3 5319 if (d->fcode == SPU_MASK_FOR_LOAD)
5320 {
5321 enum machine_mode mode = insn_data[icode].operand[1].mode;
5322 tree arg;
5323 rtx addr, op, pat;
5324
5325 /* get addr */
5df189be 5326 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5327 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5328 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5329 addr = memory_address (mode, op);
5330
5331 /* negate addr */
5332 op = gen_reg_rtx (GET_MODE (addr));
5333 emit_insn (gen_rtx_SET (VOIDmode, op,
5334 gen_rtx_NEG (GET_MODE (addr), addr)));
5335 op = gen_rtx_MEM (mode, op);
5336
5337 pat = GEN_FCN (icode) (target, op);
5338 if (!pat)
5339 return 0;
5340 emit_insn (pat);
5341 return target;
5342 }
5343
6352eedf 5344 /* Ignore align_hint, but still expand it's args in case they have
5345 side effects. */
5346 if (icode == CODE_FOR_spu_align_hint)
5347 return 0;
5348
5349 /* Handle the rest of the operands. */
5350 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5351 {
5352 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5353 mode = insn_data[d->icode].operand[i].mode;
5354 else
5355 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5356
5357 /* mode can be VOIDmode here for labels */
5358
5359 /* For specific intrinsics with an immediate operand, e.g.,
5360 si_ai(), we sometimes need to convert the scalar argument to a
5361 vector argument by splatting the scalar. */
5362 if (VECTOR_MODE_P (mode)
5363 && (GET_CODE (ops[i]) == CONST_INT
5364 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5365 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5366 {
5367 if (GET_CODE (ops[i]) == CONST_INT)
5368 ops[i] = spu_const (mode, INTVAL (ops[i]));
5369 else
5370 {
5371 rtx reg = gen_reg_rtx (mode);
5372 enum machine_mode imode = GET_MODE_INNER (mode);
5373 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5374 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5375 if (imode != GET_MODE (ops[i]))
5376 ops[i] = convert_to_mode (imode, ops[i],
5377 TYPE_UNSIGNED (spu_builtin_types
5378 [d->parm[i]]));
5379 emit_insn (gen_spu_splats (reg, ops[i]));
5380 ops[i] = reg;
5381 }
5382 }
5383
5df189be 5384 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5385
6352eedf 5386 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5387 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 5388 }
5389
5390 switch (insn_data[icode].n_operands)
5391 {
5392 case 0:
5393 pat = GEN_FCN (icode) (0);
5394 break;
5395 case 1:
5396 pat = GEN_FCN (icode) (ops[0]);
5397 break;
5398 case 2:
5399 pat = GEN_FCN (icode) (ops[0], ops[1]);
5400 break;
5401 case 3:
5402 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5403 break;
5404 case 4:
5405 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5406 break;
5407 case 5:
5408 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5409 break;
5410 case 6:
5411 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5412 break;
5413 default:
5414 abort ();
5415 }
5416
5417 if (!pat)
5418 abort ();
5419
5420 if (d->type == B_CALL || d->type == B_BISLED)
5421 emit_call_insn (pat);
5422 else if (d->type == B_JUMP)
5423 {
5424 emit_jump_insn (pat);
5425 emit_barrier ();
5426 }
5427 else
5428 emit_insn (pat);
5429
5430 return_type = spu_builtin_types[d->parm[0]];
5431 if (d->parm[0] != SPU_BTI_VOID
5432 && GET_MODE (target) != TYPE_MODE (return_type))
5433 {
5434 /* target is the return value. It should always be the mode of
5435 the builtin function prototype. */
5436 target = spu_force_reg (TYPE_MODE (return_type), target);
5437 }
5438
5439 return target;
5440}
5441
5442rtx
5443spu_expand_builtin (tree exp,
5444 rtx target,
5445 rtx subtarget ATTRIBUTE_UNUSED,
5446 enum machine_mode mode ATTRIBUTE_UNUSED,
5447 int ignore ATTRIBUTE_UNUSED)
5448{
5df189be 5449 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 5450 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 5451 struct spu_builtin_description *d;
5452
5453 if (fcode < NUM_SPU_BUILTINS)
5454 {
5455 d = &spu_builtins[fcode];
5456
5df189be 5457 return spu_expand_builtin_1 (d, exp, target);
6352eedf 5458 }
5459 abort ();
5460}
5461
e99f512d 5462/* Implement targetm.vectorize.builtin_mul_widen_even. */
5463static tree
5464spu_builtin_mul_widen_even (tree type)
5465{
e99f512d 5466 switch (TYPE_MODE (type))
5467 {
5468 case V8HImode:
5469 if (TYPE_UNSIGNED (type))
5470 return spu_builtins[SPU_MULE_0].fndecl;
5471 else
5472 return spu_builtins[SPU_MULE_1].fndecl;
5473 break;
5474 default:
5475 return NULL_TREE;
5476 }
5477}
5478
5479/* Implement targetm.vectorize.builtin_mul_widen_odd. */
5480static tree
5481spu_builtin_mul_widen_odd (tree type)
5482{
5483 switch (TYPE_MODE (type))
5484 {
5485 case V8HImode:
5486 if (TYPE_UNSIGNED (type))
5487 return spu_builtins[SPU_MULO_1].fndecl;
5488 else
5489 return spu_builtins[SPU_MULO_0].fndecl;
5490 break;
5491 default:
5492 return NULL_TREE;
5493 }
5494}
5495
a76866d3 5496/* Implement targetm.vectorize.builtin_mask_for_load. */
5497static tree
5498spu_builtin_mask_for_load (void)
5499{
5500 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5501 gcc_assert (d);
5502 return d->fndecl;
5503}
5df189be 5504
a28df51d 5505/* Implement targetm.vectorize.builtin_vectorization_cost. */
5506static int
5507spu_builtin_vectorization_cost (bool runtime_test)
5508{
5509 /* If the branch of the runtime test is taken - i.e. - the vectorized
5510 version is skipped - this incurs a misprediction cost (because the
5511 vectorized version is expected to be the fall-through). So we subtract
becfaa62 5512 the latency of a mispredicted branch from the costs that are incurred
a28df51d 5513 when the vectorized version is executed. */
5514 if (runtime_test)
5515 return -19;
5516 else
5517 return 0;
5518}
5519
0e87db76 5520/* Return true iff, data reference of TYPE can reach vector alignment (16)
5521 after applying N number of iterations. This routine does not determine
5522 how may iterations are required to reach desired alignment. */
5523
5524static bool
a9f1838b 5525spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 5526{
5527 if (is_packed)
5528 return false;
5529
5530 /* All other types are naturally aligned. */
5531 return true;
5532}
5533
d52fd16a 5534/* Count the total number of instructions in each pipe and return the
5535 maximum, which is used as the Minimum Iteration Interval (MII)
5536 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5537 -2 are instructions that can go in pipe0 or pipe1. */
5538static int
5539spu_sms_res_mii (struct ddg *g)
5540{
5541 int i;
5542 unsigned t[4] = {0, 0, 0, 0};
5543
5544 for (i = 0; i < g->num_nodes; i++)
5545 {
5546 rtx insn = g->nodes[i].insn;
5547 int p = get_pipe (insn) + 2;
5548
5549 assert (p >= 0);
5550 assert (p < 4);
5551
5552 t[p]++;
5553 if (dump_file && INSN_P (insn))
5554 fprintf (dump_file, "i%d %s %d %d\n",
5555 INSN_UID (insn),
5556 insn_data[INSN_CODE(insn)].name,
5557 p, t[p]);
5558 }
5559 if (dump_file)
5560 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5561
5562 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5563}
5564
5565
5df189be 5566void
5567spu_init_expanders (void)
5568{
5569 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5570 * frame_pointer_needed is true. We don't know that until we're
5571 * expanding the prologue. */
5572 if (cfun)
5573 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 5574}
5575
5576static enum machine_mode
5577spu_libgcc_cmp_return_mode (void)
5578{
5579
5580/* For SPU word mode is TI mode so it is better to use SImode
5581 for compare returns. */
5582 return SImode;
5583}
5584
5585static enum machine_mode
5586spu_libgcc_shift_count_mode (void)
5587{
5588/* For SPU word mode is TI mode so it is better to use SImode
5589 for shift counts. */
5590 return SImode;
5591}