]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* pretty-print.h (struct pretty_print_info): Add
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cfaf579d 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
644459d0 51#include "machmode.h"
75a70cf9 52#include "gimple.h"
644459d0 53#include "tm-constrs.h"
d52fd16a 54#include "ddg.h"
5a976006 55#include "sbitmap.h"
56#include "timevar.h"
57#include "df.h"
6352eedf 58
59/* Builtin types, data and prototypes. */
c2233b46 60
61enum spu_builtin_type_index
62{
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112};
113
114#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
6352eedf 127struct spu_builtin_range
128{
129 int low, high;
130};
131
132static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145};
146
644459d0 147\f
148/* Target specific attribute specifications. */
149char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151/* Prototypes and external defs. */
152static void spu_init_builtins (void);
153static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
154static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
155static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
156static rtx get_pic_reg (void);
157static int need_to_save_reg (int regno, int saving);
158static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
159static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
160static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
161 rtx scratch);
162static void emit_nop_for_insn (rtx insn);
163static bool insn_clobbers_hbr (rtx insn);
164static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 165 int distance, sbitmap blocks);
5474166e 166static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
167 enum machine_mode dmode);
644459d0 168static rtx get_branch_target (rtx branch);
644459d0 169static void spu_machine_dependent_reorg (void);
170static int spu_sched_issue_rate (void);
171static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
172 int can_issue_more);
173static int get_pipe (rtx insn);
644459d0 174static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 175static void spu_sched_init_global (FILE *, int, int);
176static void spu_sched_init (FILE *, int, int);
177static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 178static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
179 int flags,
180 unsigned char *no_add_attrs);
181static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
182 int flags,
183 unsigned char *no_add_attrs);
184static int spu_naked_function_p (tree func);
fb80456a 185static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
186 const_tree type, unsigned char named);
644459d0 187static tree spu_build_builtin_va_list (void);
8a58ed0a 188static void spu_va_start (tree, rtx);
75a70cf9 189static tree spu_gimplify_va_arg_expr (tree valist, tree type,
190 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 191static int regno_aligned_for_load (int regno);
192static int store_with_one_insn_p (rtx mem);
644459d0 193static int mem_is_padded_component_ref (rtx x);
194static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
195static void spu_asm_globalize_label (FILE * file, const char *name);
196static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 197 int *total, bool speed);
644459d0 198static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
199static void spu_init_libfuncs (void);
fb80456a 200static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 201static void fix_range (const char *);
69ced2d6 202static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 203static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
e99f512d 204static tree spu_builtin_mul_widen_even (tree);
205static tree spu_builtin_mul_widen_odd (tree);
a76866d3 206static tree spu_builtin_mask_for_load (void);
a28df51d 207static int spu_builtin_vectorization_cost (bool);
a9f1838b 208static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 209static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 210static int spu_sms_res_mii (struct ddg *g);
5a976006 211static void asm_file_start (void);
a08dfd55 212static unsigned int spu_section_type_flags (tree, const char *, int);
644459d0 213
214extern const char *reg_names[];
215rtx spu_compare_op0, spu_compare_op1;
216
5474166e 217/* Which instruction set architecture to use. */
218int spu_arch;
219/* Which cpu are we tuning for. */
220int spu_tune;
221
5a976006 222/* The hardware requires 8 insns between a hint and the branch it
223 effects. This variable describes how many rtl instructions the
224 compiler needs to see before inserting a hint, and then the compiler
225 will insert enough nops to make it at least 8 insns. The default is
226 for the compiler to allow up to 2 nops be emitted. The nops are
227 inserted in pairs, so we round down. */
228int spu_hint_dist = (8*4) - (2*4);
229
230/* Determines whether we run variable tracking in machine dependent
231 reorganization. */
232static int spu_flag_var_tracking;
233
644459d0 234enum spu_immediate {
235 SPU_NONE,
236 SPU_IL,
237 SPU_ILA,
238 SPU_ILH,
239 SPU_ILHU,
240 SPU_ORI,
241 SPU_ORHI,
242 SPU_ORBI,
99369027 243 SPU_IOHL
644459d0 244};
dea01258 245enum immediate_class
246{
247 IC_POOL, /* constant pool */
248 IC_IL1, /* one il* instruction */
249 IC_IL2, /* both ilhu and iohl instructions */
250 IC_IL1s, /* one il* instruction */
251 IC_IL2s, /* both ilhu and iohl instructions */
252 IC_FSMBI, /* the fsmbi instruction */
253 IC_CPAT, /* one of the c*d instructions */
5df189be 254 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 255};
644459d0 256
257static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
258static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 259static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
260static enum immediate_class classify_immediate (rtx op,
261 enum machine_mode mode);
644459d0 262
1bd43494 263static enum machine_mode spu_unwind_word_mode (void);
264
ea32e033 265static enum machine_mode
266spu_libgcc_cmp_return_mode (void);
267
268static enum machine_mode
269spu_libgcc_shift_count_mode (void);
270
644459d0 271\f
272/* TARGET overrides. */
273
274#undef TARGET_INIT_BUILTINS
275#define TARGET_INIT_BUILTINS spu_init_builtins
276
644459d0 277#undef TARGET_EXPAND_BUILTIN
278#define TARGET_EXPAND_BUILTIN spu_expand_builtin
279
1bd43494 280#undef TARGET_UNWIND_WORD_MODE
281#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 282
41e3a0c7 283#undef TARGET_LEGITIMIZE_ADDRESS
284#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
285
644459d0 286/* The .8byte directive doesn't seem to work well for a 32 bit
287 architecture. */
288#undef TARGET_ASM_UNALIGNED_DI_OP
289#define TARGET_ASM_UNALIGNED_DI_OP NULL
290
291#undef TARGET_RTX_COSTS
292#define TARGET_RTX_COSTS spu_rtx_costs
293
294#undef TARGET_ADDRESS_COST
f529eb25 295#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 296
297#undef TARGET_SCHED_ISSUE_RATE
298#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
299
5a976006 300#undef TARGET_SCHED_INIT_GLOBAL
301#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
302
303#undef TARGET_SCHED_INIT
304#define TARGET_SCHED_INIT spu_sched_init
305
644459d0 306#undef TARGET_SCHED_VARIABLE_ISSUE
307#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
308
5a976006 309#undef TARGET_SCHED_REORDER
310#define TARGET_SCHED_REORDER spu_sched_reorder
311
312#undef TARGET_SCHED_REORDER2
313#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 314
315#undef TARGET_SCHED_ADJUST_COST
316#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
317
318const struct attribute_spec spu_attribute_table[];
319#undef TARGET_ATTRIBUTE_TABLE
320#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
321
322#undef TARGET_ASM_INTEGER
323#define TARGET_ASM_INTEGER spu_assemble_integer
324
325#undef TARGET_SCALAR_MODE_SUPPORTED_P
326#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
327
328#undef TARGET_VECTOR_MODE_SUPPORTED_P
329#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
330
331#undef TARGET_FUNCTION_OK_FOR_SIBCALL
332#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
333
334#undef TARGET_ASM_GLOBALIZE_LABEL
335#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
336
337#undef TARGET_PASS_BY_REFERENCE
338#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
339
340#undef TARGET_MUST_PASS_IN_STACK
341#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
342
343#undef TARGET_BUILD_BUILTIN_VA_LIST
344#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
345
8a58ed0a 346#undef TARGET_EXPAND_BUILTIN_VA_START
347#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
348
644459d0 349#undef TARGET_SETUP_INCOMING_VARARGS
350#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
351
352#undef TARGET_MACHINE_DEPENDENT_REORG
353#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
354
355#undef TARGET_GIMPLIFY_VA_ARG_EXPR
356#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
357
358#undef TARGET_DEFAULT_TARGET_FLAGS
359#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
360
361#undef TARGET_INIT_LIBFUNCS
362#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
363
364#undef TARGET_RETURN_IN_MEMORY
365#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
366
69ced2d6 367#undef TARGET_ENCODE_SECTION_INFO
368#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
369
e99f512d 370#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
371#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
372
373#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
374#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
375
a76866d3 376#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
377#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
378
a28df51d 379#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
380#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
381
0e87db76 382#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
383#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
384
a0515226 385#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
386#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
387
ea32e033 388#undef TARGET_LIBGCC_CMP_RETURN_MODE
389#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
390
391#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
392#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
393
d52fd16a 394#undef TARGET_SCHED_SMS_RES_MII
395#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
396
5a976006 397#undef TARGET_ASM_FILE_START
398#define TARGET_ASM_FILE_START asm_file_start
399
a08dfd55 400#undef TARGET_SECTION_TYPE_FLAGS
401#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
402
644459d0 403struct gcc_target targetm = TARGET_INITIALIZER;
404
5df189be 405void
406spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
407{
5df189be 408 /* Override some of the default param values. With so many registers
409 larger values are better for these params. */
410 MAX_PENDING_LIST_LENGTH = 128;
411
412 /* With so many registers this is better on by default. */
413 flag_rename_registers = 1;
414}
415
644459d0 416/* Sometimes certain combinations of command options do not make sense
417 on a particular target machine. You can define a macro
418 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
419 executed once just after all the command options have been parsed. */
420void
421spu_override_options (void)
422{
14d408d9 423 /* Small loops will be unpeeled at -O3. For SPU it is more important
424 to keep code small by default. */
425 if (!flag_unroll_loops && !flag_peel_loops
426 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
427 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
428
644459d0 429 flag_omit_frame_pointer = 1;
430
5a976006 431 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 432 if (align_functions < 8)
433 align_functions = 8;
c7b91b14 434
5a976006 435 spu_hint_dist = 8*4 - spu_max_nops*4;
436 if (spu_hint_dist < 0)
437 spu_hint_dist = 0;
438
c7b91b14 439 if (spu_fixed_range_string)
440 fix_range (spu_fixed_range_string);
5474166e 441
442 /* Determine processor architectural level. */
443 if (spu_arch_string)
444 {
445 if (strcmp (&spu_arch_string[0], "cell") == 0)
446 spu_arch = PROCESSOR_CELL;
447 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
448 spu_arch = PROCESSOR_CELLEDP;
449 else
450 error ("Unknown architecture '%s'", &spu_arch_string[0]);
451 }
452
453 /* Determine processor to tune for. */
454 if (spu_tune_string)
455 {
456 if (strcmp (&spu_tune_string[0], "cell") == 0)
457 spu_tune = PROCESSOR_CELL;
458 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
459 spu_tune = PROCESSOR_CELLEDP;
460 else
461 error ("Unknown architecture '%s'", &spu_tune_string[0]);
462 }
98bbec1e 463
13684256 464 /* Change defaults according to the processor architecture. */
465 if (spu_arch == PROCESSOR_CELLEDP)
466 {
467 /* If no command line option has been otherwise specified, change
468 the default to -mno-safe-hints on celledp -- only the original
469 Cell/B.E. processors require this workaround. */
470 if (!(target_flags_explicit & MASK_SAFE_HINTS))
471 target_flags &= ~MASK_SAFE_HINTS;
472 }
473
98bbec1e 474 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 475}
476\f
477/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
478 struct attribute_spec.handler. */
479
480/* Table of machine attributes. */
481const struct attribute_spec spu_attribute_table[] =
482{
483 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
484 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
485 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
486 { NULL, 0, 0, false, false, false, NULL }
487};
488
489/* True if MODE is valid for the target. By "valid", we mean able to
490 be manipulated in non-trivial ways. In particular, this means all
491 the arithmetic is supported. */
492static bool
493spu_scalar_mode_supported_p (enum machine_mode mode)
494{
495 switch (mode)
496 {
497 case QImode:
498 case HImode:
499 case SImode:
500 case SFmode:
501 case DImode:
502 case TImode:
503 case DFmode:
504 return true;
505
506 default:
507 return false;
508 }
509}
510
511/* Similarly for vector modes. "Supported" here is less strict. At
512 least some operations are supported; need to check optabs or builtins
513 for further details. */
514static bool
515spu_vector_mode_supported_p (enum machine_mode mode)
516{
517 switch (mode)
518 {
519 case V16QImode:
520 case V8HImode:
521 case V4SImode:
522 case V2DImode:
523 case V4SFmode:
524 case V2DFmode:
525 return true;
526
527 default:
528 return false;
529 }
530}
531
532/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
533 least significant bytes of the outer mode. This function returns
534 TRUE for the SUBREG's where this is correct. */
535int
536valid_subreg (rtx op)
537{
538 enum machine_mode om = GET_MODE (op);
539 enum machine_mode im = GET_MODE (SUBREG_REG (op));
540 return om != VOIDmode && im != VOIDmode
541 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 542 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
543 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 544}
545
546/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 547 and adjust the start offset. */
644459d0 548static rtx
549adjust_operand (rtx op, HOST_WIDE_INT * start)
550{
551 enum machine_mode mode;
552 int op_size;
38aca5eb 553 /* Strip any paradoxical SUBREG. */
554 if (GET_CODE (op) == SUBREG
555 && (GET_MODE_BITSIZE (GET_MODE (op))
556 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 557 {
558 if (start)
559 *start -=
560 GET_MODE_BITSIZE (GET_MODE (op)) -
561 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
562 op = SUBREG_REG (op);
563 }
564 /* If it is smaller than SI, assure a SUBREG */
565 op_size = GET_MODE_BITSIZE (GET_MODE (op));
566 if (op_size < 32)
567 {
568 if (start)
569 *start += 32 - op_size;
570 op_size = 32;
571 }
572 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
573 mode = mode_for_size (op_size, MODE_INT, 0);
574 if (mode != GET_MODE (op))
575 op = gen_rtx_SUBREG (mode, op, 0);
576 return op;
577}
578
579void
580spu_expand_extv (rtx ops[], int unsignedp)
581{
582 HOST_WIDE_INT width = INTVAL (ops[2]);
583 HOST_WIDE_INT start = INTVAL (ops[3]);
584 HOST_WIDE_INT src_size, dst_size;
585 enum machine_mode src_mode, dst_mode;
586 rtx dst = ops[0], src = ops[1];
587 rtx s;
588
589 dst = adjust_operand (ops[0], 0);
590 dst_mode = GET_MODE (dst);
591 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
592
644459d0 593 src = adjust_operand (src, &start);
594 src_mode = GET_MODE (src);
595 src_size = GET_MODE_BITSIZE (GET_MODE (src));
596
597 if (start > 0)
598 {
599 s = gen_reg_rtx (src_mode);
600 switch (src_mode)
601 {
602 case SImode:
603 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
604 break;
605 case DImode:
606 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
607 break;
608 case TImode:
609 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
610 break;
611 default:
612 abort ();
613 }
614 src = s;
615 }
616
617 if (width < src_size)
618 {
619 rtx pat;
620 int icode;
621 switch (src_mode)
622 {
623 case SImode:
624 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
625 break;
626 case DImode:
627 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
628 break;
629 case TImode:
630 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
631 break;
632 default:
633 abort ();
634 }
635 s = gen_reg_rtx (src_mode);
636 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
637 emit_insn (pat);
638 src = s;
639 }
640
641 convert_move (dst, src, unsignedp);
642}
643
644void
645spu_expand_insv (rtx ops[])
646{
647 HOST_WIDE_INT width = INTVAL (ops[1]);
648 HOST_WIDE_INT start = INTVAL (ops[2]);
649 HOST_WIDE_INT maskbits;
650 enum machine_mode dst_mode, src_mode;
651 rtx dst = ops[0], src = ops[3];
652 int dst_size, src_size;
653 rtx mask;
654 rtx shift_reg;
655 int shift;
656
657
658 if (GET_CODE (ops[0]) == MEM)
659 dst = gen_reg_rtx (TImode);
660 else
661 dst = adjust_operand (dst, &start);
662 dst_mode = GET_MODE (dst);
663 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
664
665 if (CONSTANT_P (src))
666 {
667 enum machine_mode m =
668 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
669 src = force_reg (m, convert_to_mode (m, src, 0));
670 }
671 src = adjust_operand (src, 0);
672 src_mode = GET_MODE (src);
673 src_size = GET_MODE_BITSIZE (GET_MODE (src));
674
675 mask = gen_reg_rtx (dst_mode);
676 shift_reg = gen_reg_rtx (dst_mode);
677 shift = dst_size - start - width;
678
679 /* It's not safe to use subreg here because the compiler assumes
680 that the SUBREG_REG is right justified in the SUBREG. */
681 convert_move (shift_reg, src, 1);
682
683 if (shift > 0)
684 {
685 switch (dst_mode)
686 {
687 case SImode:
688 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
689 break;
690 case DImode:
691 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
692 break;
693 case TImode:
694 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
695 break;
696 default:
697 abort ();
698 }
699 }
700 else if (shift < 0)
701 abort ();
702
703 switch (dst_size)
704 {
705 case 32:
706 maskbits = (-1ll << (32 - width - start));
707 if (start)
708 maskbits += (1ll << (32 - start));
709 emit_move_insn (mask, GEN_INT (maskbits));
710 break;
711 case 64:
712 maskbits = (-1ll << (64 - width - start));
713 if (start)
714 maskbits += (1ll << (64 - start));
715 emit_move_insn (mask, GEN_INT (maskbits));
716 break;
717 case 128:
718 {
719 unsigned char arr[16];
720 int i = start / 8;
721 memset (arr, 0, sizeof (arr));
722 arr[i] = 0xff >> (start & 7);
723 for (i++; i <= (start + width - 1) / 8; i++)
724 arr[i] = 0xff;
725 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
726 emit_move_insn (mask, array_to_constant (TImode, arr));
727 }
728 break;
729 default:
730 abort ();
731 }
732 if (GET_CODE (ops[0]) == MEM)
733 {
734 rtx aligned = gen_reg_rtx (SImode);
735 rtx low = gen_reg_rtx (SImode);
736 rtx addr = gen_reg_rtx (SImode);
737 rtx rotl = gen_reg_rtx (SImode);
738 rtx mask0 = gen_reg_rtx (TImode);
739 rtx mem;
740
741 emit_move_insn (addr, XEXP (ops[0], 0));
742 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
743 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
744 emit_insn (gen_negsi2 (rotl, low));
745 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
746 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
747 mem = change_address (ops[0], TImode, aligned);
748 set_mem_alias_set (mem, 0);
749 emit_move_insn (dst, mem);
750 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
751 emit_move_insn (mem, dst);
752 if (start + width > MEM_ALIGN (ops[0]))
753 {
754 rtx shl = gen_reg_rtx (SImode);
755 rtx mask1 = gen_reg_rtx (TImode);
756 rtx dst1 = gen_reg_rtx (TImode);
757 rtx mem1;
758 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
759 emit_insn (gen_shlqby_ti (mask1, mask, shl));
760 mem1 = adjust_address (mem, TImode, 16);
761 set_mem_alias_set (mem1, 0);
762 emit_move_insn (dst1, mem1);
763 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
764 emit_move_insn (mem1, dst1);
765 }
766 }
767 else
71cd778d 768 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 769}
770
771
772int
773spu_expand_block_move (rtx ops[])
774{
775 HOST_WIDE_INT bytes, align, offset;
776 rtx src, dst, sreg, dreg, target;
777 int i;
778 if (GET_CODE (ops[2]) != CONST_INT
779 || GET_CODE (ops[3]) != CONST_INT
48eb4342 780 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 781 return 0;
782
783 bytes = INTVAL (ops[2]);
784 align = INTVAL (ops[3]);
785
786 if (bytes <= 0)
787 return 1;
788
789 dst = ops[0];
790 src = ops[1];
791
792 if (align == 16)
793 {
794 for (offset = 0; offset + 16 <= bytes; offset += 16)
795 {
796 dst = adjust_address (ops[0], V16QImode, offset);
797 src = adjust_address (ops[1], V16QImode, offset);
798 emit_move_insn (dst, src);
799 }
800 if (offset < bytes)
801 {
802 rtx mask;
803 unsigned char arr[16] = { 0 };
804 for (i = 0; i < bytes - offset; i++)
805 arr[i] = 0xff;
806 dst = adjust_address (ops[0], V16QImode, offset);
807 src = adjust_address (ops[1], V16QImode, offset);
808 mask = gen_reg_rtx (V16QImode);
809 sreg = gen_reg_rtx (V16QImode);
810 dreg = gen_reg_rtx (V16QImode);
811 target = gen_reg_rtx (V16QImode);
812 emit_move_insn (mask, array_to_constant (V16QImode, arr));
813 emit_move_insn (dreg, dst);
814 emit_move_insn (sreg, src);
815 emit_insn (gen_selb (target, dreg, sreg, mask));
816 emit_move_insn (dst, target);
817 }
818 return 1;
819 }
820 return 0;
821}
822
823enum spu_comp_code
824{ SPU_EQ, SPU_GT, SPU_GTU };
825
5474166e 826int spu_comp_icode[12][3] = {
827 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
828 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
829 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
830 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
831 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
832 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
833 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
834 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
835 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
836 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
837 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
838 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 839};
840
841/* Generate a compare for CODE. Return a brand-new rtx that represents
842 the result of the compare. GCC can figure this out too if we don't
843 provide all variations of compares, but GCC always wants to use
844 WORD_MODE, we can generate better code in most cases if we do it
845 ourselves. */
846void
847spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
848{
849 int reverse_compare = 0;
850 int reverse_test = 0;
5d70b918 851 rtx compare_result, eq_result;
852 rtx comp_rtx, eq_rtx;
644459d0 853 rtx target = operands[0];
854 enum machine_mode comp_mode;
855 enum machine_mode op_mode;
b9c74b4d 856 enum spu_comp_code scode, eq_code;
857 enum insn_code ior_code;
644459d0 858 int index;
5d70b918 859 int eq_test = 0;
644459d0 860
861 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
862 and so on, to keep the constant in operand 1. */
863 if (GET_CODE (spu_compare_op1) == CONST_INT)
864 {
865 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
866 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
867 switch (code)
868 {
869 case GE:
870 spu_compare_op1 = GEN_INT (val);
871 code = GT;
872 break;
873 case LT:
874 spu_compare_op1 = GEN_INT (val);
875 code = LE;
876 break;
877 case GEU:
878 spu_compare_op1 = GEN_INT (val);
879 code = GTU;
880 break;
881 case LTU:
882 spu_compare_op1 = GEN_INT (val);
883 code = LEU;
884 break;
885 default:
886 break;
887 }
888 }
889
5d70b918 890 comp_mode = SImode;
891 op_mode = GET_MODE (spu_compare_op0);
892
644459d0 893 switch (code)
894 {
895 case GE:
644459d0 896 scode = SPU_GT;
07027691 897 if (HONOR_NANS (op_mode))
5d70b918 898 {
899 reverse_compare = 0;
900 reverse_test = 0;
901 eq_test = 1;
902 eq_code = SPU_EQ;
903 }
904 else
905 {
906 reverse_compare = 1;
907 reverse_test = 1;
908 }
644459d0 909 break;
910 case LE:
644459d0 911 scode = SPU_GT;
07027691 912 if (HONOR_NANS (op_mode))
5d70b918 913 {
914 reverse_compare = 1;
915 reverse_test = 0;
916 eq_test = 1;
917 eq_code = SPU_EQ;
918 }
919 else
920 {
921 reverse_compare = 0;
922 reverse_test = 1;
923 }
644459d0 924 break;
925 case LT:
926 reverse_compare = 1;
927 reverse_test = 0;
928 scode = SPU_GT;
929 break;
930 case GEU:
931 reverse_compare = 1;
932 reverse_test = 1;
933 scode = SPU_GTU;
934 break;
935 case LEU:
936 reverse_compare = 0;
937 reverse_test = 1;
938 scode = SPU_GTU;
939 break;
940 case LTU:
941 reverse_compare = 1;
942 reverse_test = 0;
943 scode = SPU_GTU;
944 break;
945 case NE:
946 reverse_compare = 0;
947 reverse_test = 1;
948 scode = SPU_EQ;
949 break;
950
951 case EQ:
952 scode = SPU_EQ;
953 break;
954 case GT:
955 scode = SPU_GT;
956 break;
957 case GTU:
958 scode = SPU_GTU;
959 break;
960 default:
961 scode = SPU_EQ;
962 break;
963 }
964
644459d0 965 switch (op_mode)
966 {
967 case QImode:
968 index = 0;
969 comp_mode = QImode;
970 break;
971 case HImode:
972 index = 1;
973 comp_mode = HImode;
974 break;
975 case SImode:
976 index = 2;
977 break;
978 case DImode:
979 index = 3;
980 break;
981 case TImode:
982 index = 4;
983 break;
984 case SFmode:
985 index = 5;
986 break;
987 case DFmode:
988 index = 6;
989 break;
990 case V16QImode:
5474166e 991 index = 7;
992 comp_mode = op_mode;
993 break;
644459d0 994 case V8HImode:
5474166e 995 index = 8;
996 comp_mode = op_mode;
997 break;
644459d0 998 case V4SImode:
5474166e 999 index = 9;
1000 comp_mode = op_mode;
1001 break;
644459d0 1002 case V4SFmode:
5474166e 1003 index = 10;
1004 comp_mode = V4SImode;
1005 break;
644459d0 1006 case V2DFmode:
5474166e 1007 index = 11;
1008 comp_mode = V2DImode;
644459d0 1009 break;
5474166e 1010 case V2DImode:
644459d0 1011 default:
1012 abort ();
1013 }
1014
07027691 1015 if (GET_MODE (spu_compare_op1) == DFmode
1016 && (scode != SPU_GT && scode != SPU_EQ))
1017 abort ();
644459d0 1018
1019 if (is_set == 0 && spu_compare_op1 == const0_rtx
1020 && (GET_MODE (spu_compare_op0) == SImode
1021 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
1022 {
1023 /* Don't need to set a register with the result when we are
1024 comparing against zero and branching. */
1025 reverse_test = !reverse_test;
1026 compare_result = spu_compare_op0;
1027 }
1028 else
1029 {
1030 compare_result = gen_reg_rtx (comp_mode);
1031
1032 if (reverse_compare)
1033 {
1034 rtx t = spu_compare_op1;
1035 spu_compare_op1 = spu_compare_op0;
1036 spu_compare_op0 = t;
1037 }
1038
1039 if (spu_comp_icode[index][scode] == 0)
1040 abort ();
1041
1042 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1043 (spu_compare_op0, op_mode))
1044 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
1045 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1046 (spu_compare_op1, op_mode))
1047 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
1048 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1049 spu_compare_op0,
1050 spu_compare_op1);
1051 if (comp_rtx == 0)
1052 abort ();
1053 emit_insn (comp_rtx);
1054
5d70b918 1055 if (eq_test)
1056 {
1057 eq_result = gen_reg_rtx (comp_mode);
1058 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1059 spu_compare_op0,
1060 spu_compare_op1);
1061 if (eq_rtx == 0)
1062 abort ();
1063 emit_insn (eq_rtx);
1064 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1065 gcc_assert (ior_code != CODE_FOR_nothing);
1066 emit_insn (GEN_FCN (ior_code)
1067 (compare_result, compare_result, eq_result));
1068 }
644459d0 1069 }
1070
1071 if (is_set == 0)
1072 {
1073 rtx bcomp;
1074 rtx loc_ref;
1075
1076 /* We don't have branch on QI compare insns, so we convert the
1077 QI compare result to a HI result. */
1078 if (comp_mode == QImode)
1079 {
1080 rtx old_res = compare_result;
1081 compare_result = gen_reg_rtx (HImode);
1082 comp_mode = HImode;
1083 emit_insn (gen_extendqihi2 (compare_result, old_res));
1084 }
1085
1086 if (reverse_test)
1087 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1088 else
1089 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1090
1091 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1092 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1093 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1094 loc_ref, pc_rtx)));
1095 }
1096 else if (is_set == 2)
1097 {
1098 int compare_size = GET_MODE_BITSIZE (comp_mode);
1099 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1100 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1101 rtx select_mask;
1102 rtx op_t = operands[2];
1103 rtx op_f = operands[3];
1104
1105 /* The result of the comparison can be SI, HI or QI mode. Create a
1106 mask based on that result. */
1107 if (target_size > compare_size)
1108 {
1109 select_mask = gen_reg_rtx (mode);
1110 emit_insn (gen_extend_compare (select_mask, compare_result));
1111 }
1112 else if (target_size < compare_size)
1113 select_mask =
1114 gen_rtx_SUBREG (mode, compare_result,
1115 (compare_size - target_size) / BITS_PER_UNIT);
1116 else if (comp_mode != mode)
1117 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1118 else
1119 select_mask = compare_result;
1120
1121 if (GET_MODE (target) != GET_MODE (op_t)
1122 || GET_MODE (target) != GET_MODE (op_f))
1123 abort ();
1124
1125 if (reverse_test)
1126 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1127 else
1128 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1129 }
1130 else
1131 {
1132 if (reverse_test)
1133 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1134 gen_rtx_NOT (comp_mode, compare_result)));
1135 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1136 emit_insn (gen_extendhisi2 (target, compare_result));
1137 else if (GET_MODE (target) == SImode
1138 && GET_MODE (compare_result) == QImode)
1139 emit_insn (gen_extend_compare (target, compare_result));
1140 else
1141 emit_move_insn (target, compare_result);
1142 }
1143}
1144
1145HOST_WIDE_INT
1146const_double_to_hwint (rtx x)
1147{
1148 HOST_WIDE_INT val;
1149 REAL_VALUE_TYPE rv;
1150 if (GET_MODE (x) == SFmode)
1151 {
1152 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1153 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1154 }
1155 else if (GET_MODE (x) == DFmode)
1156 {
1157 long l[2];
1158 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1159 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1160 val = l[0];
1161 val = (val << 32) | (l[1] & 0xffffffff);
1162 }
1163 else
1164 abort ();
1165 return val;
1166}
1167
1168rtx
1169hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1170{
1171 long tv[2];
1172 REAL_VALUE_TYPE rv;
1173 gcc_assert (mode == SFmode || mode == DFmode);
1174
1175 if (mode == SFmode)
1176 tv[0] = (v << 32) >> 32;
1177 else if (mode == DFmode)
1178 {
1179 tv[1] = (v << 32) >> 32;
1180 tv[0] = v >> 32;
1181 }
1182 real_from_target (&rv, tv, mode);
1183 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1184}
1185
1186void
1187print_operand_address (FILE * file, register rtx addr)
1188{
1189 rtx reg;
1190 rtx offset;
1191
e04cf423 1192 if (GET_CODE (addr) == AND
1193 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1194 && INTVAL (XEXP (addr, 1)) == -16)
1195 addr = XEXP (addr, 0);
1196
644459d0 1197 switch (GET_CODE (addr))
1198 {
1199 case REG:
1200 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1201 break;
1202
1203 case PLUS:
1204 reg = XEXP (addr, 0);
1205 offset = XEXP (addr, 1);
1206 if (GET_CODE (offset) == REG)
1207 {
1208 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1209 reg_names[REGNO (offset)]);
1210 }
1211 else if (GET_CODE (offset) == CONST_INT)
1212 {
1213 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1214 INTVAL (offset), reg_names[REGNO (reg)]);
1215 }
1216 else
1217 abort ();
1218 break;
1219
1220 case CONST:
1221 case LABEL_REF:
1222 case SYMBOL_REF:
1223 case CONST_INT:
1224 output_addr_const (file, addr);
1225 break;
1226
1227 default:
1228 debug_rtx (addr);
1229 abort ();
1230 }
1231}
1232
1233void
1234print_operand (FILE * file, rtx x, int code)
1235{
1236 enum machine_mode mode = GET_MODE (x);
1237 HOST_WIDE_INT val;
1238 unsigned char arr[16];
1239 int xcode = GET_CODE (x);
dea01258 1240 int i, info;
644459d0 1241 if (GET_MODE (x) == VOIDmode)
1242 switch (code)
1243 {
644459d0 1244 case 'L': /* 128 bits, signed */
1245 case 'm': /* 128 bits, signed */
1246 case 'T': /* 128 bits, signed */
1247 case 't': /* 128 bits, signed */
1248 mode = TImode;
1249 break;
644459d0 1250 case 'K': /* 64 bits, signed */
1251 case 'k': /* 64 bits, signed */
1252 case 'D': /* 64 bits, signed */
1253 case 'd': /* 64 bits, signed */
1254 mode = DImode;
1255 break;
644459d0 1256 case 'J': /* 32 bits, signed */
1257 case 'j': /* 32 bits, signed */
1258 case 's': /* 32 bits, signed */
1259 case 'S': /* 32 bits, signed */
1260 mode = SImode;
1261 break;
1262 }
1263 switch (code)
1264 {
1265
1266 case 'j': /* 32 bits, signed */
1267 case 'k': /* 64 bits, signed */
1268 case 'm': /* 128 bits, signed */
1269 if (xcode == CONST_INT
1270 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1271 {
1272 gcc_assert (logical_immediate_p (x, mode));
1273 constant_to_array (mode, x, arr);
1274 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1275 val = trunc_int_for_mode (val, SImode);
1276 switch (which_logical_immediate (val))
1277 {
1278 case SPU_ORI:
1279 break;
1280 case SPU_ORHI:
1281 fprintf (file, "h");
1282 break;
1283 case SPU_ORBI:
1284 fprintf (file, "b");
1285 break;
1286 default:
1287 gcc_unreachable();
1288 }
1289 }
1290 else
1291 gcc_unreachable();
1292 return;
1293
1294 case 'J': /* 32 bits, signed */
1295 case 'K': /* 64 bits, signed */
1296 case 'L': /* 128 bits, signed */
1297 if (xcode == CONST_INT
1298 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1299 {
1300 gcc_assert (logical_immediate_p (x, mode)
1301 || iohl_immediate_p (x, mode));
1302 constant_to_array (mode, x, arr);
1303 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1304 val = trunc_int_for_mode (val, SImode);
1305 switch (which_logical_immediate (val))
1306 {
1307 case SPU_ORI:
1308 case SPU_IOHL:
1309 break;
1310 case SPU_ORHI:
1311 val = trunc_int_for_mode (val, HImode);
1312 break;
1313 case SPU_ORBI:
1314 val = trunc_int_for_mode (val, QImode);
1315 break;
1316 default:
1317 gcc_unreachable();
1318 }
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1320 }
1321 else
1322 gcc_unreachable();
1323 return;
1324
1325 case 't': /* 128 bits, signed */
1326 case 'd': /* 64 bits, signed */
1327 case 's': /* 32 bits, signed */
dea01258 1328 if (CONSTANT_P (x))
644459d0 1329 {
dea01258 1330 enum immediate_class c = classify_immediate (x, mode);
1331 switch (c)
1332 {
1333 case IC_IL1:
1334 constant_to_array (mode, x, arr);
1335 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1336 val = trunc_int_for_mode (val, SImode);
1337 switch (which_immediate_load (val))
1338 {
1339 case SPU_IL:
1340 break;
1341 case SPU_ILA:
1342 fprintf (file, "a");
1343 break;
1344 case SPU_ILH:
1345 fprintf (file, "h");
1346 break;
1347 case SPU_ILHU:
1348 fprintf (file, "hu");
1349 break;
1350 default:
1351 gcc_unreachable ();
1352 }
1353 break;
1354 case IC_CPAT:
1355 constant_to_array (mode, x, arr);
1356 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1357 if (info == 1)
1358 fprintf (file, "b");
1359 else if (info == 2)
1360 fprintf (file, "h");
1361 else if (info == 4)
1362 fprintf (file, "w");
1363 else if (info == 8)
1364 fprintf (file, "d");
1365 break;
1366 case IC_IL1s:
1367 if (xcode == CONST_VECTOR)
1368 {
1369 x = CONST_VECTOR_ELT (x, 0);
1370 xcode = GET_CODE (x);
1371 }
1372 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1373 fprintf (file, "a");
1374 else if (xcode == HIGH)
1375 fprintf (file, "hu");
1376 break;
1377 case IC_FSMBI:
5df189be 1378 case IC_FSMBI2:
dea01258 1379 case IC_IL2:
1380 case IC_IL2s:
1381 case IC_POOL:
1382 abort ();
1383 }
644459d0 1384 }
644459d0 1385 else
1386 gcc_unreachable ();
1387 return;
1388
1389 case 'T': /* 128 bits, signed */
1390 case 'D': /* 64 bits, signed */
1391 case 'S': /* 32 bits, signed */
dea01258 1392 if (CONSTANT_P (x))
644459d0 1393 {
dea01258 1394 enum immediate_class c = classify_immediate (x, mode);
1395 switch (c)
644459d0 1396 {
dea01258 1397 case IC_IL1:
1398 constant_to_array (mode, x, arr);
1399 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1400 val = trunc_int_for_mode (val, SImode);
1401 switch (which_immediate_load (val))
1402 {
1403 case SPU_IL:
1404 case SPU_ILA:
1405 break;
1406 case SPU_ILH:
1407 case SPU_ILHU:
1408 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1409 break;
1410 default:
1411 gcc_unreachable ();
1412 }
1413 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1414 break;
1415 case IC_FSMBI:
1416 constant_to_array (mode, x, arr);
1417 val = 0;
1418 for (i = 0; i < 16; i++)
1419 {
1420 val <<= 1;
1421 val |= arr[i] & 1;
1422 }
1423 print_operand (file, GEN_INT (val), 0);
1424 break;
1425 case IC_CPAT:
1426 constant_to_array (mode, x, arr);
1427 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1428 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1429 break;
dea01258 1430 case IC_IL1s:
dea01258 1431 if (xcode == HIGH)
5df189be 1432 x = XEXP (x, 0);
1433 if (GET_CODE (x) == CONST_VECTOR)
1434 x = CONST_VECTOR_ELT (x, 0);
1435 output_addr_const (file, x);
1436 if (xcode == HIGH)
1437 fprintf (file, "@h");
644459d0 1438 break;
dea01258 1439 case IC_IL2:
1440 case IC_IL2s:
5df189be 1441 case IC_FSMBI2:
dea01258 1442 case IC_POOL:
1443 abort ();
644459d0 1444 }
c8befdb9 1445 }
644459d0 1446 else
1447 gcc_unreachable ();
1448 return;
1449
644459d0 1450 case 'C':
1451 if (xcode == CONST_INT)
1452 {
1453 /* Only 4 least significant bits are relevant for generate
1454 control word instructions. */
1455 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1456 return;
1457 }
1458 break;
1459
1460 case 'M': /* print code for c*d */
1461 if (GET_CODE (x) == CONST_INT)
1462 switch (INTVAL (x))
1463 {
1464 case 1:
1465 fprintf (file, "b");
1466 break;
1467 case 2:
1468 fprintf (file, "h");
1469 break;
1470 case 4:
1471 fprintf (file, "w");
1472 break;
1473 case 8:
1474 fprintf (file, "d");
1475 break;
1476 default:
1477 gcc_unreachable();
1478 }
1479 else
1480 gcc_unreachable();
1481 return;
1482
1483 case 'N': /* Negate the operand */
1484 if (xcode == CONST_INT)
1485 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1486 else if (xcode == CONST_VECTOR)
1487 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1488 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1489 return;
1490
1491 case 'I': /* enable/disable interrupts */
1492 if (xcode == CONST_INT)
1493 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1494 return;
1495
1496 case 'b': /* branch modifiers */
1497 if (xcode == REG)
1498 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1499 else if (COMPARISON_P (x))
1500 fprintf (file, "%s", xcode == NE ? "n" : "");
1501 return;
1502
1503 case 'i': /* indirect call */
1504 if (xcode == MEM)
1505 {
1506 if (GET_CODE (XEXP (x, 0)) == REG)
1507 /* Used in indirect function calls. */
1508 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1509 else
1510 output_address (XEXP (x, 0));
1511 }
1512 return;
1513
1514 case 'p': /* load/store */
1515 if (xcode == MEM)
1516 {
1517 x = XEXP (x, 0);
1518 xcode = GET_CODE (x);
1519 }
e04cf423 1520 if (xcode == AND)
1521 {
1522 x = XEXP (x, 0);
1523 xcode = GET_CODE (x);
1524 }
644459d0 1525 if (xcode == REG)
1526 fprintf (file, "d");
1527 else if (xcode == CONST_INT)
1528 fprintf (file, "a");
1529 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1530 fprintf (file, "r");
1531 else if (xcode == PLUS || xcode == LO_SUM)
1532 {
1533 if (GET_CODE (XEXP (x, 1)) == REG)
1534 fprintf (file, "x");
1535 else
1536 fprintf (file, "d");
1537 }
1538 return;
1539
5df189be 1540 case 'e':
1541 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1542 val &= 0x7;
1543 output_addr_const (file, GEN_INT (val));
1544 return;
1545
1546 case 'f':
1547 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1548 val &= 0x1f;
1549 output_addr_const (file, GEN_INT (val));
1550 return;
1551
1552 case 'g':
1553 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1554 val &= 0x3f;
1555 output_addr_const (file, GEN_INT (val));
1556 return;
1557
1558 case 'h':
1559 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1560 val = (val >> 3) & 0x1f;
1561 output_addr_const (file, GEN_INT (val));
1562 return;
1563
1564 case 'E':
1565 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1566 val = -val;
1567 val &= 0x7;
1568 output_addr_const (file, GEN_INT (val));
1569 return;
1570
1571 case 'F':
1572 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1573 val = -val;
1574 val &= 0x1f;
1575 output_addr_const (file, GEN_INT (val));
1576 return;
1577
1578 case 'G':
1579 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1580 val = -val;
1581 val &= 0x3f;
1582 output_addr_const (file, GEN_INT (val));
1583 return;
1584
1585 case 'H':
1586 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1587 val = -(val & -8ll);
1588 val = (val >> 3) & 0x1f;
1589 output_addr_const (file, GEN_INT (val));
1590 return;
1591
56c7bfc2 1592 case 'v':
1593 case 'w':
1594 constant_to_array (mode, x, arr);
1595 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1596 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1597 return;
1598
644459d0 1599 case 0:
1600 if (xcode == REG)
1601 fprintf (file, "%s", reg_names[REGNO (x)]);
1602 else if (xcode == MEM)
1603 output_address (XEXP (x, 0));
1604 else if (xcode == CONST_VECTOR)
dea01258 1605 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1606 else
1607 output_addr_const (file, x);
1608 return;
1609
f6a0d06f 1610 /* unused letters
56c7bfc2 1611 o qr u yz
5df189be 1612 AB OPQR UVWXYZ */
644459d0 1613 default:
1614 output_operand_lossage ("invalid %%xn code");
1615 }
1616 gcc_unreachable ();
1617}
1618
1619extern char call_used_regs[];
644459d0 1620
1621/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1622 caller saved register. For leaf functions it is more efficient to
1623 use a volatile register because we won't need to save and restore the
1624 pic register. This routine is only valid after register allocation
1625 is completed, so we can pick an unused register. */
1626static rtx
1627get_pic_reg (void)
1628{
1629 rtx pic_reg = pic_offset_table_rtx;
1630 if (!reload_completed && !reload_in_progress)
1631 abort ();
1632 return pic_reg;
1633}
1634
5df189be 1635/* Split constant addresses to handle cases that are too large.
1636 Add in the pic register when in PIC mode.
1637 Split immediates that require more than 1 instruction. */
dea01258 1638int
1639spu_split_immediate (rtx * ops)
c8befdb9 1640{
dea01258 1641 enum machine_mode mode = GET_MODE (ops[0]);
1642 enum immediate_class c = classify_immediate (ops[1], mode);
1643
1644 switch (c)
c8befdb9 1645 {
dea01258 1646 case IC_IL2:
1647 {
1648 unsigned char arrhi[16];
1649 unsigned char arrlo[16];
98bbec1e 1650 rtx to, temp, hi, lo;
dea01258 1651 int i;
98bbec1e 1652 enum machine_mode imode = mode;
1653 /* We need to do reals as ints because the constant used in the
1654 IOR might not be a legitimate real constant. */
1655 imode = int_mode_for_mode (mode);
dea01258 1656 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1657 if (imode != mode)
1658 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1659 else
1660 to = ops[0];
1661 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1662 for (i = 0; i < 16; i += 4)
1663 {
1664 arrlo[i + 2] = arrhi[i + 2];
1665 arrlo[i + 3] = arrhi[i + 3];
1666 arrlo[i + 0] = arrlo[i + 1] = 0;
1667 arrhi[i + 2] = arrhi[i + 3] = 0;
1668 }
98bbec1e 1669 hi = array_to_constant (imode, arrhi);
1670 lo = array_to_constant (imode, arrlo);
1671 emit_move_insn (temp, hi);
dea01258 1672 emit_insn (gen_rtx_SET
98bbec1e 1673 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1674 return 1;
1675 }
5df189be 1676 case IC_FSMBI2:
1677 {
1678 unsigned char arr_fsmbi[16];
1679 unsigned char arr_andbi[16];
1680 rtx to, reg_fsmbi, reg_and;
1681 int i;
1682 enum machine_mode imode = mode;
1683 /* We need to do reals as ints because the constant used in the
1684 * AND might not be a legitimate real constant. */
1685 imode = int_mode_for_mode (mode);
1686 constant_to_array (mode, ops[1], arr_fsmbi);
1687 if (imode != mode)
1688 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1689 else
1690 to = ops[0];
1691 for (i = 0; i < 16; i++)
1692 if (arr_fsmbi[i] != 0)
1693 {
1694 arr_andbi[0] = arr_fsmbi[i];
1695 arr_fsmbi[i] = 0xff;
1696 }
1697 for (i = 1; i < 16; i++)
1698 arr_andbi[i] = arr_andbi[0];
1699 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1700 reg_and = array_to_constant (imode, arr_andbi);
1701 emit_move_insn (to, reg_fsmbi);
1702 emit_insn (gen_rtx_SET
1703 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1704 return 1;
1705 }
dea01258 1706 case IC_POOL:
1707 if (reload_in_progress || reload_completed)
1708 {
1709 rtx mem = force_const_mem (mode, ops[1]);
1710 if (TARGET_LARGE_MEM)
1711 {
1712 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1713 emit_move_insn (addr, XEXP (mem, 0));
1714 mem = replace_equiv_address (mem, addr);
1715 }
1716 emit_move_insn (ops[0], mem);
1717 return 1;
1718 }
1719 break;
1720 case IC_IL1s:
1721 case IC_IL2s:
1722 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1723 {
1724 if (c == IC_IL2s)
1725 {
5df189be 1726 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1727 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1728 }
1729 else if (flag_pic)
1730 emit_insn (gen_pic (ops[0], ops[1]));
1731 if (flag_pic)
1732 {
1733 rtx pic_reg = get_pic_reg ();
1734 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1735 crtl->uses_pic_offset_table = 1;
dea01258 1736 }
1737 return flag_pic || c == IC_IL2s;
1738 }
1739 break;
1740 case IC_IL1:
1741 case IC_FSMBI:
1742 case IC_CPAT:
1743 break;
c8befdb9 1744 }
dea01258 1745 return 0;
c8befdb9 1746}
1747
644459d0 1748/* SAVING is TRUE when we are generating the actual load and store
1749 instructions for REGNO. When determining the size of the stack
1750 needed for saving register we must allocate enough space for the
1751 worst case, because we don't always have the information early enough
1752 to not allocate it. But we can at least eliminate the actual loads
1753 and stores during the prologue/epilogue. */
1754static int
1755need_to_save_reg (int regno, int saving)
1756{
3072d30e 1757 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1758 return 1;
1759 if (flag_pic
1760 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1761 && (!saving || crtl->uses_pic_offset_table)
644459d0 1762 && (!saving
3072d30e 1763 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1764 return 1;
1765 return 0;
1766}
1767
1768/* This function is only correct starting with local register
1769 allocation */
1770int
1771spu_saved_regs_size (void)
1772{
1773 int reg_save_size = 0;
1774 int regno;
1775
1776 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1777 if (need_to_save_reg (regno, 0))
1778 reg_save_size += 0x10;
1779 return reg_save_size;
1780}
1781
1782static rtx
1783frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1784{
1785 rtx reg = gen_rtx_REG (V4SImode, regno);
1786 rtx mem =
1787 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1788 return emit_insn (gen_movv4si (mem, reg));
1789}
1790
1791static rtx
1792frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1793{
1794 rtx reg = gen_rtx_REG (V4SImode, regno);
1795 rtx mem =
1796 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1797 return emit_insn (gen_movv4si (reg, mem));
1798}
1799
1800/* This happens after reload, so we need to expand it. */
1801static rtx
1802frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1803{
1804 rtx insn;
1805 if (satisfies_constraint_K (GEN_INT (imm)))
1806 {
1807 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1808 }
1809 else
1810 {
3072d30e 1811 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1812 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1813 if (REGNO (src) == REGNO (scratch))
1814 abort ();
1815 }
644459d0 1816 return insn;
1817}
1818
1819/* Return nonzero if this function is known to have a null epilogue. */
1820
1821int
1822direct_return (void)
1823{
1824 if (reload_completed)
1825 {
1826 if (cfun->static_chain_decl == 0
1827 && (spu_saved_regs_size ()
1828 + get_frame_size ()
abe32cce 1829 + crtl->outgoing_args_size
1830 + crtl->args.pretend_args_size == 0)
644459d0 1831 && current_function_is_leaf)
1832 return 1;
1833 }
1834 return 0;
1835}
1836
1837/*
1838 The stack frame looks like this:
1839 +-------------+
1840 | incoming |
a8e019fa 1841 | args |
1842 AP -> +-------------+
644459d0 1843 | $lr save |
1844 +-------------+
1845 prev SP | back chain |
1846 +-------------+
1847 | var args |
abe32cce 1848 | reg save | crtl->args.pretend_args_size bytes
644459d0 1849 +-------------+
1850 | ... |
1851 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1852 FP -> +-------------+
644459d0 1853 | ... |
a8e019fa 1854 | vars | get_frame_size() bytes
1855 HFP -> +-------------+
644459d0 1856 | ... |
1857 | outgoing |
abe32cce 1858 | args | crtl->outgoing_args_size bytes
644459d0 1859 +-------------+
1860 | $lr of next |
1861 | frame |
1862 +-------------+
a8e019fa 1863 | back chain |
1864 SP -> +-------------+
644459d0 1865
1866*/
1867void
1868spu_expand_prologue (void)
1869{
1870 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1871 HOST_WIDE_INT total_size;
1872 HOST_WIDE_INT saved_regs_size;
1873 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1874 rtx scratch_reg_0, scratch_reg_1;
1875 rtx insn, real;
1876
1877 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1878 the "toplevel" insn chain. */
1879 emit_note (NOTE_INSN_DELETED);
1880
1881 if (flag_pic && optimize == 0)
18d50ae6 1882 crtl->uses_pic_offset_table = 1;
644459d0 1883
1884 if (spu_naked_function_p (current_function_decl))
1885 return;
1886
1887 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1888 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1889
1890 saved_regs_size = spu_saved_regs_size ();
1891 total_size = size + saved_regs_size
abe32cce 1892 + crtl->outgoing_args_size
1893 + crtl->args.pretend_args_size;
644459d0 1894
1895 if (!current_function_is_leaf
18d50ae6 1896 || cfun->calls_alloca || total_size > 0)
644459d0 1897 total_size += STACK_POINTER_OFFSET;
1898
1899 /* Save this first because code after this might use the link
1900 register as a scratch register. */
1901 if (!current_function_is_leaf)
1902 {
1903 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1904 RTX_FRAME_RELATED_P (insn) = 1;
1905 }
1906
1907 if (total_size > 0)
1908 {
abe32cce 1909 offset = -crtl->args.pretend_args_size;
644459d0 1910 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1911 if (need_to_save_reg (regno, 1))
1912 {
1913 offset -= 16;
1914 insn = frame_emit_store (regno, sp_reg, offset);
1915 RTX_FRAME_RELATED_P (insn) = 1;
1916 }
1917 }
1918
18d50ae6 1919 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1920 {
1921 rtx pic_reg = get_pic_reg ();
1922 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1923 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1924 }
1925
1926 if (total_size > 0)
1927 {
1928 if (flag_stack_check)
1929 {
d819917f 1930 /* We compare against total_size-1 because
644459d0 1931 ($sp >= total_size) <=> ($sp > total_size-1) */
1932 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1933 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1934 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1935 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1936 {
1937 emit_move_insn (scratch_v4si, size_v4si);
1938 size_v4si = scratch_v4si;
1939 }
1940 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1941 emit_insn (gen_vec_extractv4si
1942 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1943 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1944 }
1945
1946 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1947 the value of the previous $sp because we save it as the back
1948 chain. */
1949 if (total_size <= 2000)
1950 {
1951 /* In this case we save the back chain first. */
1952 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1953 insn =
1954 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1955 }
644459d0 1956 else
1957 {
1958 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1959 insn =
1960 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1961 }
1962 RTX_FRAME_RELATED_P (insn) = 1;
1963 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1964 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1965
1966 if (total_size > 2000)
1967 {
1968 /* Save the back chain ptr */
1969 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1970 }
1971
1972 if (frame_pointer_needed)
1973 {
1974 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1975 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1976 + crtl->outgoing_args_size;
644459d0 1977 /* Set the new frame_pointer */
d8dfeb55 1978 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1979 RTX_FRAME_RELATED_P (insn) = 1;
1980 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1981 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1982 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1983 }
1984 }
1985
1986 emit_note (NOTE_INSN_DELETED);
1987}
1988
1989void
1990spu_expand_epilogue (bool sibcall_p)
1991{
1992 int size = get_frame_size (), offset, regno;
1993 HOST_WIDE_INT saved_regs_size, total_size;
1994 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1995 rtx jump, scratch_reg_0;
1996
1997 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1998 the "toplevel" insn chain. */
1999 emit_note (NOTE_INSN_DELETED);
2000
2001 if (spu_naked_function_p (current_function_decl))
2002 return;
2003
2004 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2005
2006 saved_regs_size = spu_saved_regs_size ();
2007 total_size = size + saved_regs_size
abe32cce 2008 + crtl->outgoing_args_size
2009 + crtl->args.pretend_args_size;
644459d0 2010
2011 if (!current_function_is_leaf
18d50ae6 2012 || cfun->calls_alloca || total_size > 0)
644459d0 2013 total_size += STACK_POINTER_OFFSET;
2014
2015 if (total_size > 0)
2016 {
18d50ae6 2017 if (cfun->calls_alloca)
644459d0 2018 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2019 else
2020 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2021
2022
2023 if (saved_regs_size > 0)
2024 {
abe32cce 2025 offset = -crtl->args.pretend_args_size;
644459d0 2026 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2027 if (need_to_save_reg (regno, 1))
2028 {
2029 offset -= 0x10;
2030 frame_emit_load (regno, sp_reg, offset);
2031 }
2032 }
2033 }
2034
2035 if (!current_function_is_leaf)
2036 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2037
2038 if (!sibcall_p)
2039 {
18b42941 2040 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2041 jump = emit_jump_insn (gen__return ());
2042 emit_barrier_after (jump);
2043 }
2044
2045 emit_note (NOTE_INSN_DELETED);
2046}
2047
2048rtx
2049spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2050{
2051 if (count != 0)
2052 return 0;
2053 /* This is inefficient because it ends up copying to a save-register
2054 which then gets saved even though $lr has already been saved. But
2055 it does generate better code for leaf functions and we don't need
2056 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2057 used for __builtin_return_address anyway, so maybe we don't care if
2058 it's inefficient. */
2059 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2060}
2061\f
2062
2063/* Given VAL, generate a constant appropriate for MODE.
2064 If MODE is a vector mode, every element will be VAL.
2065 For TImode, VAL will be zero extended to 128 bits. */
2066rtx
2067spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2068{
2069 rtx inner;
2070 rtvec v;
2071 int units, i;
2072
2073 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2074 || GET_MODE_CLASS (mode) == MODE_FLOAT
2075 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2076 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2077
2078 if (GET_MODE_CLASS (mode) == MODE_INT)
2079 return immed_double_const (val, 0, mode);
2080
2081 /* val is the bit representation of the float */
2082 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2083 return hwint_to_const_double (mode, val);
2084
2085 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2086 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2087 else
2088 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2089
2090 units = GET_MODE_NUNITS (mode);
2091
2092 v = rtvec_alloc (units);
2093
2094 for (i = 0; i < units; ++i)
2095 RTVEC_ELT (v, i) = inner;
2096
2097 return gen_rtx_CONST_VECTOR (mode, v);
2098}
644459d0 2099
5474166e 2100/* Create a MODE vector constant from 4 ints. */
2101rtx
2102spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2103{
2104 unsigned char arr[16];
2105 arr[0] = (a >> 24) & 0xff;
2106 arr[1] = (a >> 16) & 0xff;
2107 arr[2] = (a >> 8) & 0xff;
2108 arr[3] = (a >> 0) & 0xff;
2109 arr[4] = (b >> 24) & 0xff;
2110 arr[5] = (b >> 16) & 0xff;
2111 arr[6] = (b >> 8) & 0xff;
2112 arr[7] = (b >> 0) & 0xff;
2113 arr[8] = (c >> 24) & 0xff;
2114 arr[9] = (c >> 16) & 0xff;
2115 arr[10] = (c >> 8) & 0xff;
2116 arr[11] = (c >> 0) & 0xff;
2117 arr[12] = (d >> 24) & 0xff;
2118 arr[13] = (d >> 16) & 0xff;
2119 arr[14] = (d >> 8) & 0xff;
2120 arr[15] = (d >> 0) & 0xff;
2121 return array_to_constant(mode, arr);
2122}
5a976006 2123\f
2124/* branch hint stuff */
5474166e 2125
644459d0 2126/* An array of these is used to propagate hints to predecessor blocks. */
2127struct spu_bb_info
2128{
5a976006 2129 rtx prop_jump; /* propagated from another block */
2130 int bb_index; /* the original block. */
644459d0 2131};
5a976006 2132static struct spu_bb_info *spu_bb_info;
644459d0 2133
5a976006 2134#define STOP_HINT_P(INSN) \
2135 (GET_CODE(INSN) == CALL_INSN \
2136 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2137 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2138
2139/* 1 when RTX is a hinted branch or its target. We keep track of
2140 what has been hinted so the safe-hint code can test it easily. */
2141#define HINTED_P(RTX) \
2142 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2143
2144/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2145#define SCHED_ON_EVEN_P(RTX) \
2146 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2147
2148/* Emit a nop for INSN such that the two will dual issue. This assumes
2149 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2150 We check for TImode to handle a MULTI1 insn which has dual issued its
2151 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2152 ADDR_VEC insns. */
2153static void
2154emit_nop_for_insn (rtx insn)
644459d0 2155{
5a976006 2156 int p;
2157 rtx new_insn;
2158 p = get_pipe (insn);
2159 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2160 new_insn = emit_insn_after (gen_lnop (), insn);
2161 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2162 {
5a976006 2163 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2164 PUT_MODE (new_insn, TImode);
2165 PUT_MODE (insn, VOIDmode);
2166 }
2167 else
2168 new_insn = emit_insn_after (gen_lnop (), insn);
2169 recog_memoized (new_insn);
2170}
2171
2172/* Insert nops in basic blocks to meet dual issue alignment
2173 requirements. Also make sure hbrp and hint instructions are at least
2174 one cycle apart, possibly inserting a nop. */
2175static void
2176pad_bb(void)
2177{
2178 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2179 int length;
2180 int addr;
2181
2182 /* This sets up INSN_ADDRESSES. */
2183 shorten_branches (get_insns ());
2184
2185 /* Keep track of length added by nops. */
2186 length = 0;
2187
2188 prev_insn = 0;
2189 insn = get_insns ();
2190 if (!active_insn_p (insn))
2191 insn = next_active_insn (insn);
2192 for (; insn; insn = next_insn)
2193 {
2194 next_insn = next_active_insn (insn);
2195 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2196 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2197 {
5a976006 2198 if (hbr_insn)
2199 {
2200 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2201 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2202 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2203 || (a1 - a0 == 4))
2204 {
2205 prev_insn = emit_insn_before (gen_lnop (), insn);
2206 PUT_MODE (prev_insn, GET_MODE (insn));
2207 PUT_MODE (insn, TImode);
2208 length += 4;
2209 }
2210 }
2211 hbr_insn = insn;
2212 }
2213 if (INSN_CODE (insn) == CODE_FOR_blockage)
2214 {
2215 if (GET_MODE (insn) == TImode)
2216 PUT_MODE (next_insn, TImode);
2217 insn = next_insn;
2218 next_insn = next_active_insn (insn);
2219 }
2220 addr = INSN_ADDRESSES (INSN_UID (insn));
2221 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2222 {
2223 if (((addr + length) & 7) != 0)
2224 {
2225 emit_nop_for_insn (prev_insn);
2226 length += 4;
2227 }
644459d0 2228 }
5a976006 2229 else if (GET_MODE (insn) == TImode
2230 && ((next_insn && GET_MODE (next_insn) != TImode)
2231 || get_attr_type (insn) == TYPE_MULTI0)
2232 && ((addr + length) & 7) != 0)
2233 {
2234 /* prev_insn will always be set because the first insn is
2235 always 8-byte aligned. */
2236 emit_nop_for_insn (prev_insn);
2237 length += 4;
2238 }
2239 prev_insn = insn;
644459d0 2240 }
644459d0 2241}
2242
5a976006 2243\f
2244/* Routines for branch hints. */
2245
644459d0 2246static void
5a976006 2247spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2248 int distance, sbitmap blocks)
644459d0 2249{
5a976006 2250 rtx branch_label = 0;
2251 rtx hint;
2252 rtx insn;
2253 rtx table;
644459d0 2254
2255 if (before == 0 || branch == 0 || target == 0)
2256 return;
2257
5a976006 2258 /* While scheduling we require hints to be no further than 600, so
2259 we need to enforce that here too */
644459d0 2260 if (distance > 600)
2261 return;
2262
5a976006 2263 /* If we have a Basic block note, emit it after the basic block note. */
2264 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2265 before = NEXT_INSN (before);
644459d0 2266
2267 branch_label = gen_label_rtx ();
2268 LABEL_NUSES (branch_label)++;
2269 LABEL_PRESERVE_P (branch_label) = 1;
2270 insn = emit_label_before (branch_label, branch);
2271 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2272 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2273
2274 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2275 recog_memoized (hint);
2276 HINTED_P (branch) = 1;
644459d0 2277
5a976006 2278 if (GET_CODE (target) == LABEL_REF)
2279 HINTED_P (XEXP (target, 0)) = 1;
2280 else if (tablejump_p (branch, 0, &table))
644459d0 2281 {
5a976006 2282 rtvec vec;
2283 int j;
2284 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2285 vec = XVEC (PATTERN (table), 0);
2286 else
2287 vec = XVEC (PATTERN (table), 1);
2288 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2289 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2290 }
5a976006 2291
2292 if (distance >= 588)
644459d0 2293 {
5a976006 2294 /* Make sure the hint isn't scheduled any earlier than this point,
2295 which could make it too far for the branch offest to fit */
2296 recog_memoized (emit_insn_before (gen_blockage (), hint));
2297 }
2298 else if (distance <= 8 * 4)
2299 {
2300 /* To guarantee at least 8 insns between the hint and branch we
2301 insert nops. */
2302 int d;
2303 for (d = distance; d < 8 * 4; d += 4)
2304 {
2305 insn =
2306 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2307 recog_memoized (insn);
2308 }
2309
2310 /* Make sure any nops inserted aren't scheduled before the hint. */
2311 recog_memoized (emit_insn_after (gen_blockage (), hint));
2312
2313 /* Make sure any nops inserted aren't scheduled after the call. */
2314 if (CALL_P (branch) && distance < 8 * 4)
2315 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2316 }
644459d0 2317}
2318
2319/* Returns 0 if we don't want a hint for this branch. Otherwise return
2320 the rtx for the branch target. */
2321static rtx
2322get_branch_target (rtx branch)
2323{
2324 if (GET_CODE (branch) == JUMP_INSN)
2325 {
2326 rtx set, src;
2327
2328 /* Return statements */
2329 if (GET_CODE (PATTERN (branch)) == RETURN)
2330 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2331
2332 /* jump table */
2333 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2334 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2335 return 0;
2336
2337 set = single_set (branch);
2338 src = SET_SRC (set);
2339 if (GET_CODE (SET_DEST (set)) != PC)
2340 abort ();
2341
2342 if (GET_CODE (src) == IF_THEN_ELSE)
2343 {
2344 rtx lab = 0;
2345 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2346 if (note)
2347 {
2348 /* If the more probable case is not a fall through, then
2349 try a branch hint. */
2350 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2351 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2352 && GET_CODE (XEXP (src, 1)) != PC)
2353 lab = XEXP (src, 1);
2354 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2355 && GET_CODE (XEXP (src, 2)) != PC)
2356 lab = XEXP (src, 2);
2357 }
2358 if (lab)
2359 {
2360 if (GET_CODE (lab) == RETURN)
2361 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2362 return lab;
2363 }
2364 return 0;
2365 }
2366
2367 return src;
2368 }
2369 else if (GET_CODE (branch) == CALL_INSN)
2370 {
2371 rtx call;
2372 /* All of our call patterns are in a PARALLEL and the CALL is
2373 the first pattern in the PARALLEL. */
2374 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2375 abort ();
2376 call = XVECEXP (PATTERN (branch), 0, 0);
2377 if (GET_CODE (call) == SET)
2378 call = SET_SRC (call);
2379 if (GET_CODE (call) != CALL)
2380 abort ();
2381 return XEXP (XEXP (call, 0), 0);
2382 }
2383 return 0;
2384}
2385
5a976006 2386/* The special $hbr register is used to prevent the insn scheduler from
2387 moving hbr insns across instructions which invalidate them. It
2388 should only be used in a clobber, and this function searches for
2389 insns which clobber it. */
2390static bool
2391insn_clobbers_hbr (rtx insn)
2392{
2393 if (INSN_P (insn)
2394 && GET_CODE (PATTERN (insn)) == PARALLEL)
2395 {
2396 rtx parallel = PATTERN (insn);
2397 rtx clobber;
2398 int j;
2399 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2400 {
2401 clobber = XVECEXP (parallel, 0, j);
2402 if (GET_CODE (clobber) == CLOBBER
2403 && GET_CODE (XEXP (clobber, 0)) == REG
2404 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2405 return 1;
2406 }
2407 }
2408 return 0;
2409}
2410
2411/* Search up to 32 insns starting at FIRST:
2412 - at any kind of hinted branch, just return
2413 - at any unconditional branch in the first 15 insns, just return
2414 - at a call or indirect branch, after the first 15 insns, force it to
2415 an even address and return
2416 - at any unconditional branch, after the first 15 insns, force it to
2417 an even address.
2418 At then end of the search, insert an hbrp within 4 insns of FIRST,
2419 and an hbrp within 16 instructions of FIRST.
2420 */
644459d0 2421static void
5a976006 2422insert_hbrp_for_ilb_runout (rtx first)
644459d0 2423{
5a976006 2424 rtx insn, before_4 = 0, before_16 = 0;
2425 int addr = 0, length, first_addr = -1;
2426 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2427 int insert_lnop_after = 0;
2428 for (insn = first; insn; insn = NEXT_INSN (insn))
2429 if (INSN_P (insn))
2430 {
2431 if (first_addr == -1)
2432 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2433 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2434 length = get_attr_length (insn);
2435
2436 if (before_4 == 0 && addr + length >= 4 * 4)
2437 before_4 = insn;
2438 /* We test for 14 instructions because the first hbrp will add
2439 up to 2 instructions. */
2440 if (before_16 == 0 && addr + length >= 14 * 4)
2441 before_16 = insn;
2442
2443 if (INSN_CODE (insn) == CODE_FOR_hbr)
2444 {
2445 /* Make sure an hbrp is at least 2 cycles away from a hint.
2446 Insert an lnop after the hbrp when necessary. */
2447 if (before_4 == 0 && addr > 0)
2448 {
2449 before_4 = insn;
2450 insert_lnop_after |= 1;
2451 }
2452 else if (before_4 && addr <= 4 * 4)
2453 insert_lnop_after |= 1;
2454 if (before_16 == 0 && addr > 10 * 4)
2455 {
2456 before_16 = insn;
2457 insert_lnop_after |= 2;
2458 }
2459 else if (before_16 && addr <= 14 * 4)
2460 insert_lnop_after |= 2;
2461 }
644459d0 2462
5a976006 2463 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2464 {
2465 if (addr < hbrp_addr0)
2466 hbrp_addr0 = addr;
2467 else if (addr < hbrp_addr1)
2468 hbrp_addr1 = addr;
2469 }
644459d0 2470
5a976006 2471 if (CALL_P (insn) || JUMP_P (insn))
2472 {
2473 if (HINTED_P (insn))
2474 return;
2475
2476 /* Any branch after the first 15 insns should be on an even
2477 address to avoid a special case branch. There might be
2478 some nops and/or hbrps inserted, so we test after 10
2479 insns. */
2480 if (addr > 10 * 4)
2481 SCHED_ON_EVEN_P (insn) = 1;
2482 }
644459d0 2483
5a976006 2484 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2485 return;
2486
2487
2488 if (addr + length >= 32 * 4)
644459d0 2489 {
5a976006 2490 gcc_assert (before_4 && before_16);
2491 if (hbrp_addr0 > 4 * 4)
644459d0 2492 {
5a976006 2493 insn =
2494 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2495 recog_memoized (insn);
2496 INSN_ADDRESSES_NEW (insn,
2497 INSN_ADDRESSES (INSN_UID (before_4)));
2498 PUT_MODE (insn, GET_MODE (before_4));
2499 PUT_MODE (before_4, TImode);
2500 if (insert_lnop_after & 1)
644459d0 2501 {
5a976006 2502 insn = emit_insn_before (gen_lnop (), before_4);
2503 recog_memoized (insn);
2504 INSN_ADDRESSES_NEW (insn,
2505 INSN_ADDRESSES (INSN_UID (before_4)));
2506 PUT_MODE (insn, TImode);
644459d0 2507 }
644459d0 2508 }
5a976006 2509 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2510 && hbrp_addr1 > 16 * 4)
644459d0 2511 {
5a976006 2512 insn =
2513 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2514 recog_memoized (insn);
2515 INSN_ADDRESSES_NEW (insn,
2516 INSN_ADDRESSES (INSN_UID (before_16)));
2517 PUT_MODE (insn, GET_MODE (before_16));
2518 PUT_MODE (before_16, TImode);
2519 if (insert_lnop_after & 2)
644459d0 2520 {
5a976006 2521 insn = emit_insn_before (gen_lnop (), before_16);
2522 recog_memoized (insn);
2523 INSN_ADDRESSES_NEW (insn,
2524 INSN_ADDRESSES (INSN_UID
2525 (before_16)));
2526 PUT_MODE (insn, TImode);
644459d0 2527 }
2528 }
5a976006 2529 return;
644459d0 2530 }
644459d0 2531 }
5a976006 2532 else if (BARRIER_P (insn))
2533 return;
644459d0 2534
644459d0 2535}
5a976006 2536
2537/* The SPU might hang when it executes 48 inline instructions after a
2538 hinted branch jumps to its hinted target. The beginning of a
2539 function and the return from a call might have been hinted, and must
2540 be handled as well. To prevent a hang we insert 2 hbrps. The first
2541 should be within 6 insns of the branch target. The second should be
2542 within 22 insns of the branch target. When determining if hbrps are
2543 necessary, we look for only 32 inline instructions, because up to to
2544 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2545 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2546static void
5a976006 2547insert_hbrp (void)
644459d0 2548{
5a976006 2549 rtx insn;
2550 if (TARGET_SAFE_HINTS)
644459d0 2551 {
5a976006 2552 shorten_branches (get_insns ());
2553 /* Insert hbrp at beginning of function */
2554 insn = next_active_insn (get_insns ());
2555 if (insn)
2556 insert_hbrp_for_ilb_runout (insn);
2557 /* Insert hbrp after hinted targets. */
2558 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2559 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2560 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2561 }
644459d0 2562}
2563
5a976006 2564static int in_spu_reorg;
2565
2566/* Insert branch hints. There are no branch optimizations after this
2567 pass, so it's safe to set our branch hints now. */
644459d0 2568static void
5a976006 2569spu_machine_dependent_reorg (void)
644459d0 2570{
5a976006 2571 sbitmap blocks;
2572 basic_block bb;
2573 rtx branch, insn;
2574 rtx branch_target = 0;
2575 int branch_addr = 0, insn_addr, required_dist = 0;
2576 int i;
2577 unsigned int j;
644459d0 2578
5a976006 2579 if (!TARGET_BRANCH_HINTS || optimize == 0)
2580 {
2581 /* We still do it for unoptimized code because an external
2582 function might have hinted a call or return. */
2583 insert_hbrp ();
2584 pad_bb ();
2585 return;
2586 }
644459d0 2587
5a976006 2588 blocks = sbitmap_alloc (last_basic_block);
2589 sbitmap_zero (blocks);
644459d0 2590
5a976006 2591 in_spu_reorg = 1;
2592 compute_bb_for_insn ();
2593
2594 compact_blocks ();
2595
2596 spu_bb_info =
2597 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2598 sizeof (struct spu_bb_info));
2599
2600 /* We need exact insn addresses and lengths. */
2601 shorten_branches (get_insns ());
2602
2603 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2604 {
5a976006 2605 bb = BASIC_BLOCK (i);
2606 branch = 0;
2607 if (spu_bb_info[i].prop_jump)
644459d0 2608 {
5a976006 2609 branch = spu_bb_info[i].prop_jump;
2610 branch_target = get_branch_target (branch);
2611 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2612 required_dist = spu_hint_dist;
2613 }
2614 /* Search from end of a block to beginning. In this loop, find
2615 jumps which need a branch and emit them only when:
2616 - it's an indirect branch and we're at the insn which sets
2617 the register
2618 - we're at an insn that will invalidate the hint. e.g., a
2619 call, another hint insn, inline asm that clobbers $hbr, and
2620 some inlined operations (divmodsi4). Don't consider jumps
2621 because they are only at the end of a block and are
2622 considered when we are deciding whether to propagate
2623 - we're getting too far away from the branch. The hbr insns
2624 only have a signed 10 bit offset
2625 We go back as far as possible so the branch will be considered
2626 for propagation when we get to the beginning of the block. */
2627 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2628 {
2629 if (INSN_P (insn))
2630 {
2631 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2632 if (branch
2633 && ((GET_CODE (branch_target) == REG
2634 && set_of (branch_target, insn) != NULL_RTX)
2635 || insn_clobbers_hbr (insn)
2636 || branch_addr - insn_addr > 600))
2637 {
2638 rtx next = NEXT_INSN (insn);
2639 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2640 if (insn != BB_END (bb)
2641 && branch_addr - next_addr >= required_dist)
2642 {
2643 if (dump_file)
2644 fprintf (dump_file,
2645 "hint for %i in block %i before %i\n",
2646 INSN_UID (branch), bb->index,
2647 INSN_UID (next));
2648 spu_emit_branch_hint (next, branch, branch_target,
2649 branch_addr - next_addr, blocks);
2650 }
2651 branch = 0;
2652 }
2653
2654 /* JUMP_P will only be true at the end of a block. When
2655 branch is already set it means we've previously decided
2656 to propagate a hint for that branch into this block. */
2657 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2658 {
2659 branch = 0;
2660 if ((branch_target = get_branch_target (insn)))
2661 {
2662 branch = insn;
2663 branch_addr = insn_addr;
2664 required_dist = spu_hint_dist;
2665 }
2666 }
2667 }
2668 if (insn == BB_HEAD (bb))
2669 break;
2670 }
2671
2672 if (branch)
2673 {
2674 /* If we haven't emitted a hint for this branch yet, it might
2675 be profitable to emit it in one of the predecessor blocks,
2676 especially for loops. */
2677 rtx bbend;
2678 basic_block prev = 0, prop = 0, prev2 = 0;
2679 int loop_exit = 0, simple_loop = 0;
2680 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2681
2682 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2683 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2684 prev = EDGE_PRED (bb, j)->src;
2685 else
2686 prev2 = EDGE_PRED (bb, j)->src;
2687
2688 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2689 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2690 loop_exit = 1;
2691 else if (EDGE_SUCC (bb, j)->dest == bb)
2692 simple_loop = 1;
2693
2694 /* If this branch is a loop exit then propagate to previous
2695 fallthru block. This catches the cases when it is a simple
2696 loop or when there is an initial branch into the loop. */
2697 if (prev && (loop_exit || simple_loop)
2698 && prev->loop_depth <= bb->loop_depth)
2699 prop = prev;
2700
2701 /* If there is only one adjacent predecessor. Don't propagate
2702 outside this loop. This loop_depth test isn't perfect, but
2703 I'm not sure the loop_father member is valid at this point. */
2704 else if (prev && single_pred_p (bb)
2705 && prev->loop_depth == bb->loop_depth)
2706 prop = prev;
2707
2708 /* If this is the JOIN block of a simple IF-THEN then
2709 propogate the hint to the HEADER block. */
2710 else if (prev && prev2
2711 && EDGE_COUNT (bb->preds) == 2
2712 && EDGE_COUNT (prev->preds) == 1
2713 && EDGE_PRED (prev, 0)->src == prev2
2714 && prev2->loop_depth == bb->loop_depth
2715 && GET_CODE (branch_target) != REG)
2716 prop = prev;
2717
2718 /* Don't propagate when:
2719 - this is a simple loop and the hint would be too far
2720 - this is not a simple loop and there are 16 insns in
2721 this block already
2722 - the predecessor block ends in a branch that will be
2723 hinted
2724 - the predecessor block ends in an insn that invalidates
2725 the hint */
2726 if (prop
2727 && prop->index >= 0
2728 && (bbend = BB_END (prop))
2729 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2730 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2731 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2732 {
2733 if (dump_file)
2734 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2735 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2736 bb->index, prop->index, bb->loop_depth,
2737 INSN_UID (branch), loop_exit, simple_loop,
2738 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2739
2740 spu_bb_info[prop->index].prop_jump = branch;
2741 spu_bb_info[prop->index].bb_index = i;
2742 }
2743 else if (branch_addr - next_addr >= required_dist)
2744 {
2745 if (dump_file)
2746 fprintf (dump_file, "hint for %i in block %i before %i\n",
2747 INSN_UID (branch), bb->index,
2748 INSN_UID (NEXT_INSN (insn)));
2749 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2750 branch_addr - next_addr, blocks);
2751 }
2752 branch = 0;
644459d0 2753 }
644459d0 2754 }
5a976006 2755 free (spu_bb_info);
644459d0 2756
5a976006 2757 if (!sbitmap_empty_p (blocks))
2758 find_many_sub_basic_blocks (blocks);
2759
2760 /* We have to schedule to make sure alignment is ok. */
2761 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2762
2763 /* The hints need to be scheduled, so call it again. */
2764 schedule_insns ();
2765
2766 insert_hbrp ();
2767
2768 pad_bb ();
2769
8f1d58ad 2770 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2771 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2772 {
2773 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2774 between its branch label and the branch . We don't move the
2775 label because GCC expects it at the beginning of the block. */
2776 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2777 rtx label_ref = XVECEXP (unspec, 0, 0);
2778 rtx label = XEXP (label_ref, 0);
2779 rtx branch;
2780 int offset = 0;
2781 for (branch = NEXT_INSN (label);
2782 !JUMP_P (branch) && !CALL_P (branch);
2783 branch = NEXT_INSN (branch))
2784 if (NONJUMP_INSN_P (branch))
2785 offset += get_attr_length (branch);
2786 if (offset > 0)
2787 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2788 }
5a976006 2789
2790 if (spu_flag_var_tracking)
644459d0 2791 {
5a976006 2792 df_analyze ();
2793 timevar_push (TV_VAR_TRACKING);
2794 variable_tracking_main ();
2795 timevar_pop (TV_VAR_TRACKING);
2796 df_finish_pass (false);
644459d0 2797 }
5a976006 2798
2799 free_bb_for_insn ();
2800
2801 in_spu_reorg = 0;
644459d0 2802}
2803\f
2804
2805/* Insn scheduling routines, primarily for dual issue. */
2806static int
2807spu_sched_issue_rate (void)
2808{
2809 return 2;
2810}
2811
2812static int
5a976006 2813uses_ls_unit(rtx insn)
644459d0 2814{
5a976006 2815 rtx set = single_set (insn);
2816 if (set != 0
2817 && (GET_CODE (SET_DEST (set)) == MEM
2818 || GET_CODE (SET_SRC (set)) == MEM))
2819 return 1;
2820 return 0;
644459d0 2821}
2822
2823static int
2824get_pipe (rtx insn)
2825{
2826 enum attr_type t;
2827 /* Handle inline asm */
2828 if (INSN_CODE (insn) == -1)
2829 return -1;
2830 t = get_attr_type (insn);
2831 switch (t)
2832 {
2833 case TYPE_CONVERT:
2834 return -2;
2835 case TYPE_MULTI0:
2836 return -1;
2837
2838 case TYPE_FX2:
2839 case TYPE_FX3:
2840 case TYPE_SPR:
2841 case TYPE_NOP:
2842 case TYPE_FXB:
2843 case TYPE_FPD:
2844 case TYPE_FP6:
2845 case TYPE_FP7:
644459d0 2846 return 0;
2847
2848 case TYPE_LNOP:
2849 case TYPE_SHUF:
2850 case TYPE_LOAD:
2851 case TYPE_STORE:
2852 case TYPE_BR:
2853 case TYPE_MULTI1:
2854 case TYPE_HBR:
5a976006 2855 case TYPE_IPREFETCH:
644459d0 2856 return 1;
2857 default:
2858 abort ();
2859 }
2860}
2861
5a976006 2862
2863/* haifa-sched.c has a static variable that keeps track of the current
2864 cycle. It is passed to spu_sched_reorder, and we record it here for
2865 use by spu_sched_variable_issue. It won't be accurate if the
2866 scheduler updates it's clock_var between the two calls. */
2867static int clock_var;
2868
2869/* This is used to keep track of insn alignment. Set to 0 at the
2870 beginning of each block and increased by the "length" attr of each
2871 insn scheduled. */
2872static int spu_sched_length;
2873
2874/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2875 ready list appropriately in spu_sched_reorder(). */
2876static int pipe0_clock;
2877static int pipe1_clock;
2878
2879static int prev_clock_var;
2880
2881static int prev_priority;
2882
2883/* The SPU needs to load the next ilb sometime during the execution of
2884 the previous ilb. There is a potential conflict if every cycle has a
2885 load or store. To avoid the conflict we make sure the load/store
2886 unit is free for at least one cycle during the execution of insns in
2887 the previous ilb. */
2888static int spu_ls_first;
2889static int prev_ls_clock;
2890
2891static void
2892spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2893 int max_ready ATTRIBUTE_UNUSED)
2894{
2895 spu_sched_length = 0;
2896}
2897
2898static void
2899spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2900 int max_ready ATTRIBUTE_UNUSED)
2901{
2902 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2903 {
2904 /* When any block might be at least 8-byte aligned, assume they
2905 will all be at least 8-byte aligned to make sure dual issue
2906 works out correctly. */
2907 spu_sched_length = 0;
2908 }
2909 spu_ls_first = INT_MAX;
2910 clock_var = -1;
2911 prev_ls_clock = -1;
2912 pipe0_clock = -1;
2913 pipe1_clock = -1;
2914 prev_clock_var = -1;
2915 prev_priority = -1;
2916}
2917
644459d0 2918static int
5a976006 2919spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2920 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2921{
5a976006 2922 int len;
2923 int p;
644459d0 2924 if (GET_CODE (PATTERN (insn)) == USE
2925 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2926 || (len = get_attr_length (insn)) == 0)
2927 return more;
2928
2929 spu_sched_length += len;
2930
2931 /* Reset on inline asm */
2932 if (INSN_CODE (insn) == -1)
2933 {
2934 spu_ls_first = INT_MAX;
2935 pipe0_clock = -1;
2936 pipe1_clock = -1;
2937 return 0;
2938 }
2939 p = get_pipe (insn);
2940 if (p == 0)
2941 pipe0_clock = clock_var;
2942 else
2943 pipe1_clock = clock_var;
2944
2945 if (in_spu_reorg)
2946 {
2947 if (clock_var - prev_ls_clock > 1
2948 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2949 spu_ls_first = INT_MAX;
2950 if (uses_ls_unit (insn))
2951 {
2952 if (spu_ls_first == INT_MAX)
2953 spu_ls_first = spu_sched_length;
2954 prev_ls_clock = clock_var;
2955 }
2956
2957 /* The scheduler hasn't inserted the nop, but we will later on.
2958 Include those nops in spu_sched_length. */
2959 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2960 spu_sched_length += 4;
2961 prev_clock_var = clock_var;
2962
2963 /* more is -1 when called from spu_sched_reorder for new insns
2964 that don't have INSN_PRIORITY */
2965 if (more >= 0)
2966 prev_priority = INSN_PRIORITY (insn);
2967 }
2968
2969 /* Always try issueing more insns. spu_sched_reorder will decide
2970 when the cycle should be advanced. */
2971 return 1;
2972}
2973
2974/* This function is called for both TARGET_SCHED_REORDER and
2975 TARGET_SCHED_REORDER2. */
2976static int
2977spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2978 rtx *ready, int *nreadyp, int clock)
2979{
2980 int i, nready = *nreadyp;
2981 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2982 rtx insn;
2983
2984 clock_var = clock;
2985
2986 if (nready <= 0 || pipe1_clock >= clock)
2987 return 0;
2988
2989 /* Find any rtl insns that don't generate assembly insns and schedule
2990 them first. */
2991 for (i = nready - 1; i >= 0; i--)
2992 {
2993 insn = ready[i];
2994 if (INSN_CODE (insn) == -1
2995 || INSN_CODE (insn) == CODE_FOR_blockage
2996 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2997 {
2998 ready[i] = ready[nready - 1];
2999 ready[nready - 1] = insn;
3000 return 1;
3001 }
3002 }
3003
3004 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3005 for (i = 0; i < nready; i++)
3006 if (INSN_CODE (ready[i]) != -1)
3007 {
3008 insn = ready[i];
3009 switch (get_attr_type (insn))
3010 {
3011 default:
3012 case TYPE_MULTI0:
3013 case TYPE_CONVERT:
3014 case TYPE_FX2:
3015 case TYPE_FX3:
3016 case TYPE_SPR:
3017 case TYPE_NOP:
3018 case TYPE_FXB:
3019 case TYPE_FPD:
3020 case TYPE_FP6:
3021 case TYPE_FP7:
3022 pipe_0 = i;
3023 break;
3024 case TYPE_LOAD:
3025 case TYPE_STORE:
3026 pipe_ls = i;
3027 case TYPE_LNOP:
3028 case TYPE_SHUF:
3029 case TYPE_BR:
3030 case TYPE_MULTI1:
3031 case TYPE_HBR:
3032 pipe_1 = i;
3033 break;
3034 case TYPE_IPREFETCH:
3035 pipe_hbrp = i;
3036 break;
3037 }
3038 }
3039
3040 /* In the first scheduling phase, schedule loads and stores together
3041 to increase the chance they will get merged during postreload CSE. */
3042 if (!reload_completed && pipe_ls >= 0)
3043 {
3044 insn = ready[pipe_ls];
3045 ready[pipe_ls] = ready[nready - 1];
3046 ready[nready - 1] = insn;
3047 return 1;
3048 }
3049
3050 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3051 if (pipe_hbrp >= 0)
3052 pipe_1 = pipe_hbrp;
3053
3054 /* When we have loads/stores in every cycle of the last 15 insns and
3055 we are about to schedule another load/store, emit an hbrp insn
3056 instead. */
3057 if (in_spu_reorg
3058 && spu_sched_length - spu_ls_first >= 4 * 15
3059 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3060 {
3061 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3062 recog_memoized (insn);
3063 if (pipe0_clock < clock)
3064 PUT_MODE (insn, TImode);
3065 spu_sched_variable_issue (file, verbose, insn, -1);
3066 return 0;
3067 }
3068
3069 /* In general, we want to emit nops to increase dual issue, but dual
3070 issue isn't faster when one of the insns could be scheduled later
3071 without effecting the critical path. We look at INSN_PRIORITY to
3072 make a good guess, but it isn't perfect so -mdual-nops=n can be
3073 used to effect it. */
3074 if (in_spu_reorg && spu_dual_nops < 10)
3075 {
3076 /* When we are at an even address and we are not issueing nops to
3077 improve scheduling then we need to advance the cycle. */
3078 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3079 && (spu_dual_nops == 0
3080 || (pipe_1 != -1
3081 && prev_priority >
3082 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3083 return 0;
3084
3085 /* When at an odd address, schedule the highest priority insn
3086 without considering pipeline. */
3087 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3088 && (spu_dual_nops == 0
3089 || (prev_priority >
3090 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3091 return 1;
3092 }
3093
3094
3095 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3096 pipe0 insn in the ready list, schedule it. */
3097 if (pipe0_clock < clock && pipe_0 >= 0)
3098 schedule_i = pipe_0;
3099
3100 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3101 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3102 else
3103 schedule_i = pipe_1;
3104
3105 if (schedule_i > -1)
3106 {
3107 insn = ready[schedule_i];
3108 ready[schedule_i] = ready[nready - 1];
3109 ready[nready - 1] = insn;
3110 return 1;
3111 }
3112 return 0;
644459d0 3113}
3114
3115/* INSN is dependent on DEP_INSN. */
3116static int
5a976006 3117spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3118{
5a976006 3119 rtx set;
3120
3121 /* The blockage pattern is used to prevent instructions from being
3122 moved across it and has no cost. */
3123 if (INSN_CODE (insn) == CODE_FOR_blockage
3124 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3125 return 0;
3126
3127 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3128 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3129 return 0;
3130
3131 /* Make sure hbrps are spread out. */
3132 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3133 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3134 return 8;
3135
3136 /* Make sure hints and hbrps are 2 cycles apart. */
3137 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3138 || INSN_CODE (insn) == CODE_FOR_hbr)
3139 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3140 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3141 return 2;
3142
3143 /* An hbrp has no real dependency on other insns. */
3144 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3145 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3146 return 0;
3147
3148 /* Assuming that it is unlikely an argument register will be used in
3149 the first cycle of the called function, we reduce the cost for
3150 slightly better scheduling of dep_insn. When not hinted, the
3151 mispredicted branch would hide the cost as well. */
3152 if (CALL_P (insn))
3153 {
3154 rtx target = get_branch_target (insn);
3155 if (GET_CODE (target) != REG || !set_of (target, insn))
3156 return cost - 2;
3157 return cost;
3158 }
3159
3160 /* And when returning from a function, let's assume the return values
3161 are completed sooner too. */
3162 if (CALL_P (dep_insn))
644459d0 3163 return cost - 2;
5a976006 3164
3165 /* Make sure an instruction that loads from the back chain is schedule
3166 away from the return instruction so a hint is more likely to get
3167 issued. */
3168 if (INSN_CODE (insn) == CODE_FOR__return
3169 && (set = single_set (dep_insn))
3170 && GET_CODE (SET_DEST (set)) == REG
3171 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3172 return 20;
3173
644459d0 3174 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3175 scheduler makes every insn in a block anti-dependent on the final
3176 jump_insn. We adjust here so higher cost insns will get scheduled
3177 earlier. */
5a976006 3178 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3179 return insn_cost (dep_insn) - 3;
5a976006 3180
644459d0 3181 return cost;
3182}
3183\f
3184/* Create a CONST_DOUBLE from a string. */
3185struct rtx_def *
3186spu_float_const (const char *string, enum machine_mode mode)
3187{
3188 REAL_VALUE_TYPE value;
3189 value = REAL_VALUE_ATOF (string, mode);
3190 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3191}
3192
644459d0 3193int
3194spu_constant_address_p (rtx x)
3195{
3196 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3197 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3198 || GET_CODE (x) == HIGH);
3199}
3200
3201static enum spu_immediate
3202which_immediate_load (HOST_WIDE_INT val)
3203{
3204 gcc_assert (val == trunc_int_for_mode (val, SImode));
3205
3206 if (val >= -0x8000 && val <= 0x7fff)
3207 return SPU_IL;
3208 if (val >= 0 && val <= 0x3ffff)
3209 return SPU_ILA;
3210 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3211 return SPU_ILH;
3212 if ((val & 0xffff) == 0)
3213 return SPU_ILHU;
3214
3215 return SPU_NONE;
3216}
3217
dea01258 3218/* Return true when OP can be loaded by one of the il instructions, or
3219 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3220int
3221immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3222{
3223 if (CONSTANT_P (op))
3224 {
3225 enum immediate_class c = classify_immediate (op, mode);
5df189be 3226 return c == IC_IL1 || c == IC_IL1s
3072d30e 3227 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3228 }
3229 return 0;
3230}
3231
3232/* Return true if the first SIZE bytes of arr is a constant that can be
3233 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3234 represent the size and offset of the instruction to use. */
3235static int
3236cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3237{
3238 int cpat, run, i, start;
3239 cpat = 1;
3240 run = 0;
3241 start = -1;
3242 for (i = 0; i < size && cpat; i++)
3243 if (arr[i] != i+16)
3244 {
3245 if (!run)
3246 {
3247 start = i;
3248 if (arr[i] == 3)
3249 run = 1;
3250 else if (arr[i] == 2 && arr[i+1] == 3)
3251 run = 2;
3252 else if (arr[i] == 0)
3253 {
3254 while (arr[i+run] == run && i+run < 16)
3255 run++;
3256 if (run != 4 && run != 8)
3257 cpat = 0;
3258 }
3259 else
3260 cpat = 0;
3261 if ((i & (run-1)) != 0)
3262 cpat = 0;
3263 i += run;
3264 }
3265 else
3266 cpat = 0;
3267 }
b01a6dc3 3268 if (cpat && (run || size < 16))
dea01258 3269 {
3270 if (run == 0)
3271 run = 1;
3272 if (prun)
3273 *prun = run;
3274 if (pstart)
3275 *pstart = start == -1 ? 16-run : start;
3276 return 1;
3277 }
3278 return 0;
3279}
3280
3281/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3282 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3283static enum immediate_class
3284classify_immediate (rtx op, enum machine_mode mode)
644459d0 3285{
3286 HOST_WIDE_INT val;
3287 unsigned char arr[16];
5df189be 3288 int i, j, repeated, fsmbi, repeat;
dea01258 3289
3290 gcc_assert (CONSTANT_P (op));
3291
644459d0 3292 if (GET_MODE (op) != VOIDmode)
3293 mode = GET_MODE (op);
3294
dea01258 3295 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3296 if (!flag_pic
3297 && mode == V4SImode
dea01258 3298 && GET_CODE (op) == CONST_VECTOR
3299 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3300 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3301 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3302 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3303 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3304 op = CONST_VECTOR_ELT (op, 0);
644459d0 3305
dea01258 3306 switch (GET_CODE (op))
3307 {
3308 case SYMBOL_REF:
3309 case LABEL_REF:
3310 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3311
dea01258 3312 case CONST:
0cfc65d4 3313 /* We can never know if the resulting address fits in 18 bits and can be
3314 loaded with ila. For now, assume the address will not overflow if
3315 the displacement is "small" (fits 'K' constraint). */
3316 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3317 {
3318 rtx sym = XEXP (XEXP (op, 0), 0);
3319 rtx cst = XEXP (XEXP (op, 0), 1);
3320
3321 if (GET_CODE (sym) == SYMBOL_REF
3322 && GET_CODE (cst) == CONST_INT
3323 && satisfies_constraint_K (cst))
3324 return IC_IL1s;
3325 }
3326 return IC_IL2s;
644459d0 3327
dea01258 3328 case HIGH:
3329 return IC_IL1s;
3330
3331 case CONST_VECTOR:
3332 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3333 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3334 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3335 return IC_POOL;
3336 /* Fall through. */
3337
3338 case CONST_INT:
3339 case CONST_DOUBLE:
3340 constant_to_array (mode, op, arr);
644459d0 3341
dea01258 3342 /* Check that each 4-byte slot is identical. */
3343 repeated = 1;
3344 for (i = 4; i < 16; i += 4)
3345 for (j = 0; j < 4; j++)
3346 if (arr[j] != arr[i + j])
3347 repeated = 0;
3348
3349 if (repeated)
3350 {
3351 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3352 val = trunc_int_for_mode (val, SImode);
3353
3354 if (which_immediate_load (val) != SPU_NONE)
3355 return IC_IL1;
3356 }
3357
3358 /* Any mode of 2 bytes or smaller can be loaded with an il
3359 instruction. */
3360 gcc_assert (GET_MODE_SIZE (mode) > 2);
3361
3362 fsmbi = 1;
5df189be 3363 repeat = 0;
dea01258 3364 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3365 if (arr[i] != 0 && repeat == 0)
3366 repeat = arr[i];
3367 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3368 fsmbi = 0;
3369 if (fsmbi)
5df189be 3370 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3371
3372 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3373 return IC_CPAT;
3374
3375 if (repeated)
3376 return IC_IL2;
3377
3378 return IC_POOL;
3379 default:
3380 break;
3381 }
3382 gcc_unreachable ();
644459d0 3383}
3384
3385static enum spu_immediate
3386which_logical_immediate (HOST_WIDE_INT val)
3387{
3388 gcc_assert (val == trunc_int_for_mode (val, SImode));
3389
3390 if (val >= -0x200 && val <= 0x1ff)
3391 return SPU_ORI;
3392 if (val >= 0 && val <= 0xffff)
3393 return SPU_IOHL;
3394 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3395 {
3396 val = trunc_int_for_mode (val, HImode);
3397 if (val >= -0x200 && val <= 0x1ff)
3398 return SPU_ORHI;
3399 if ((val & 0xff) == ((val >> 8) & 0xff))
3400 {
3401 val = trunc_int_for_mode (val, QImode);
3402 if (val >= -0x200 && val <= 0x1ff)
3403 return SPU_ORBI;
3404 }
3405 }
3406 return SPU_NONE;
3407}
3408
5df189be 3409/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3410 CONST_DOUBLEs. */
3411static int
3412const_vector_immediate_p (rtx x)
3413{
3414 int i;
3415 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3416 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3417 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3418 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3419 return 0;
3420 return 1;
3421}
3422
644459d0 3423int
3424logical_immediate_p (rtx op, enum machine_mode mode)
3425{
3426 HOST_WIDE_INT val;
3427 unsigned char arr[16];
3428 int i, j;
3429
3430 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3431 || GET_CODE (op) == CONST_VECTOR);
3432
5df189be 3433 if (GET_CODE (op) == CONST_VECTOR
3434 && !const_vector_immediate_p (op))
3435 return 0;
3436
644459d0 3437 if (GET_MODE (op) != VOIDmode)
3438 mode = GET_MODE (op);
3439
3440 constant_to_array (mode, op, arr);
3441
3442 /* Check that bytes are repeated. */
3443 for (i = 4; i < 16; i += 4)
3444 for (j = 0; j < 4; j++)
3445 if (arr[j] != arr[i + j])
3446 return 0;
3447
3448 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3449 val = trunc_int_for_mode (val, SImode);
3450
3451 i = which_logical_immediate (val);
3452 return i != SPU_NONE && i != SPU_IOHL;
3453}
3454
3455int
3456iohl_immediate_p (rtx op, enum machine_mode mode)
3457{
3458 HOST_WIDE_INT val;
3459 unsigned char arr[16];
3460 int i, j;
3461
3462 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3463 || GET_CODE (op) == CONST_VECTOR);
3464
5df189be 3465 if (GET_CODE (op) == CONST_VECTOR
3466 && !const_vector_immediate_p (op))
3467 return 0;
3468
644459d0 3469 if (GET_MODE (op) != VOIDmode)
3470 mode = GET_MODE (op);
3471
3472 constant_to_array (mode, op, arr);
3473
3474 /* Check that bytes are repeated. */
3475 for (i = 4; i < 16; i += 4)
3476 for (j = 0; j < 4; j++)
3477 if (arr[j] != arr[i + j])
3478 return 0;
3479
3480 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3481 val = trunc_int_for_mode (val, SImode);
3482
3483 return val >= 0 && val <= 0xffff;
3484}
3485
3486int
3487arith_immediate_p (rtx op, enum machine_mode mode,
3488 HOST_WIDE_INT low, HOST_WIDE_INT high)
3489{
3490 HOST_WIDE_INT val;
3491 unsigned char arr[16];
3492 int bytes, i, j;
3493
3494 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3495 || GET_CODE (op) == CONST_VECTOR);
3496
5df189be 3497 if (GET_CODE (op) == CONST_VECTOR
3498 && !const_vector_immediate_p (op))
3499 return 0;
3500
644459d0 3501 if (GET_MODE (op) != VOIDmode)
3502 mode = GET_MODE (op);
3503
3504 constant_to_array (mode, op, arr);
3505
3506 if (VECTOR_MODE_P (mode))
3507 mode = GET_MODE_INNER (mode);
3508
3509 bytes = GET_MODE_SIZE (mode);
3510 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3511
3512 /* Check that bytes are repeated. */
3513 for (i = bytes; i < 16; i += bytes)
3514 for (j = 0; j < bytes; j++)
3515 if (arr[j] != arr[i + j])
3516 return 0;
3517
3518 val = arr[0];
3519 for (j = 1; j < bytes; j++)
3520 val = (val << 8) | arr[j];
3521
3522 val = trunc_int_for_mode (val, mode);
3523
3524 return val >= low && val <= high;
3525}
3526
56c7bfc2 3527/* TRUE when op is an immediate and an exact power of 2, and given that
3528 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3529 all entries must be the same. */
3530bool
3531exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3532{
3533 enum machine_mode int_mode;
3534 HOST_WIDE_INT val;
3535 unsigned char arr[16];
3536 int bytes, i, j;
3537
3538 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3539 || GET_CODE (op) == CONST_VECTOR);
3540
3541 if (GET_CODE (op) == CONST_VECTOR
3542 && !const_vector_immediate_p (op))
3543 return 0;
3544
3545 if (GET_MODE (op) != VOIDmode)
3546 mode = GET_MODE (op);
3547
3548 constant_to_array (mode, op, arr);
3549
3550 if (VECTOR_MODE_P (mode))
3551 mode = GET_MODE_INNER (mode);
3552
3553 bytes = GET_MODE_SIZE (mode);
3554 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3555
3556 /* Check that bytes are repeated. */
3557 for (i = bytes; i < 16; i += bytes)
3558 for (j = 0; j < bytes; j++)
3559 if (arr[j] != arr[i + j])
3560 return 0;
3561
3562 val = arr[0];
3563 for (j = 1; j < bytes; j++)
3564 val = (val << 8) | arr[j];
3565
3566 val = trunc_int_for_mode (val, int_mode);
3567
3568 /* Currently, we only handle SFmode */
3569 gcc_assert (mode == SFmode);
3570 if (mode == SFmode)
3571 {
3572 int exp = (val >> 23) - 127;
3573 return val > 0 && (val & 0x007fffff) == 0
3574 && exp >= low && exp <= high;
3575 }
3576 return FALSE;
3577}
3578
644459d0 3579/* We accept:
5b865faf 3580 - any 32-bit constant (SImode, SFmode)
644459d0 3581 - any constant that can be generated with fsmbi (any mode)
5b865faf 3582 - a 64-bit constant where the high and low bits are identical
644459d0 3583 (DImode, DFmode)
5b865faf 3584 - a 128-bit constant where the four 32-bit words match. */
644459d0 3585int
3586spu_legitimate_constant_p (rtx x)
3587{
5df189be 3588 if (GET_CODE (x) == HIGH)
3589 x = XEXP (x, 0);
644459d0 3590 /* V4SI with all identical symbols is valid. */
5df189be 3591 if (!flag_pic
3592 && GET_MODE (x) == V4SImode
644459d0 3593 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3594 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3595 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3596 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3597 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3598 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3599
5df189be 3600 if (GET_CODE (x) == CONST_VECTOR
3601 && !const_vector_immediate_p (x))
3602 return 0;
644459d0 3603 return 1;
3604}
3605
3606/* Valid address are:
3607 - symbol_ref, label_ref, const
3608 - reg
3609 - reg + const, where either reg or const is 16 byte aligned
3610 - reg + reg, alignment doesn't matter
3611 The alignment matters in the reg+const case because lqd and stqd
3612 ignore the 4 least significant bits of the const. (TODO: It might be
3613 preferable to allow any alignment and fix it up when splitting.) */
3614int
3615spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3616 rtx x, int reg_ok_strict)
3617{
3618 if (mode == TImode && GET_CODE (x) == AND
3619 && GET_CODE (XEXP (x, 1)) == CONST_INT
3620 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3621 x = XEXP (x, 0);
3622 switch (GET_CODE (x))
3623 {
3624 case SYMBOL_REF:
3625 case LABEL_REF:
3626 return !TARGET_LARGE_MEM;
3627
3628 case CONST:
0cfc65d4 3629 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3630 {
3631 rtx sym = XEXP (XEXP (x, 0), 0);
3632 rtx cst = XEXP (XEXP (x, 0), 1);
3633
3634 /* Accept any symbol_ref + constant, assuming it does not
3635 wrap around the local store addressability limit. */
3636 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3637 return 1;
3638 }
3639 return 0;
644459d0 3640
3641 case CONST_INT:
3642 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3643
3644 case SUBREG:
3645 x = XEXP (x, 0);
3646 gcc_assert (GET_CODE (x) == REG);
3647
3648 case REG:
3649 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3650
3651 case PLUS:
3652 case LO_SUM:
3653 {
3654 rtx op0 = XEXP (x, 0);
3655 rtx op1 = XEXP (x, 1);
3656 if (GET_CODE (op0) == SUBREG)
3657 op0 = XEXP (op0, 0);
3658 if (GET_CODE (op1) == SUBREG)
3659 op1 = XEXP (op1, 0);
3660 /* We can't just accept any aligned register because CSE can
3661 change it to a register that is not marked aligned and then
3662 recog will fail. So we only accept frame registers because
3663 they will only be changed to other frame registers. */
3664 if (GET_CODE (op0) == REG
3665 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3666 && GET_CODE (op1) == CONST_INT
3667 && INTVAL (op1) >= -0x2000
3668 && INTVAL (op1) <= 0x1fff
5df189be 3669 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 3670 return 1;
3671 if (GET_CODE (op0) == REG
3672 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3673 && GET_CODE (op1) == REG
3674 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3675 return 1;
3676 }
3677 break;
3678
3679 default:
3680 break;
3681 }
3682 return 0;
3683}
3684
3685/* When the address is reg + const_int, force the const_int into a
fa7637bd 3686 register. */
644459d0 3687rtx
3688spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3689 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3690{
3691 rtx op0, op1;
3692 /* Make sure both operands are registers. */
3693 if (GET_CODE (x) == PLUS)
3694 {
3695 op0 = XEXP (x, 0);
3696 op1 = XEXP (x, 1);
3697 if (ALIGNED_SYMBOL_REF_P (op0))
3698 {
3699 op0 = force_reg (Pmode, op0);
3700 mark_reg_pointer (op0, 128);
3701 }
3702 else if (GET_CODE (op0) != REG)
3703 op0 = force_reg (Pmode, op0);
3704 if (ALIGNED_SYMBOL_REF_P (op1))
3705 {
3706 op1 = force_reg (Pmode, op1);
3707 mark_reg_pointer (op1, 128);
3708 }
3709 else if (GET_CODE (op1) != REG)
3710 op1 = force_reg (Pmode, op1);
3711 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3712 }
41e3a0c7 3713 return x;
644459d0 3714}
3715
3716/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3717 struct attribute_spec.handler. */
3718static tree
3719spu_handle_fndecl_attribute (tree * node,
3720 tree name,
3721 tree args ATTRIBUTE_UNUSED,
3722 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3723{
3724 if (TREE_CODE (*node) != FUNCTION_DECL)
3725 {
3726 warning (0, "`%s' attribute only applies to functions",
3727 IDENTIFIER_POINTER (name));
3728 *no_add_attrs = true;
3729 }
3730
3731 return NULL_TREE;
3732}
3733
3734/* Handle the "vector" attribute. */
3735static tree
3736spu_handle_vector_attribute (tree * node, tree name,
3737 tree args ATTRIBUTE_UNUSED,
3738 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3739{
3740 tree type = *node, result = NULL_TREE;
3741 enum machine_mode mode;
3742 int unsigned_p;
3743
3744 while (POINTER_TYPE_P (type)
3745 || TREE_CODE (type) == FUNCTION_TYPE
3746 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3747 type = TREE_TYPE (type);
3748
3749 mode = TYPE_MODE (type);
3750
3751 unsigned_p = TYPE_UNSIGNED (type);
3752 switch (mode)
3753 {
3754 case DImode:
3755 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3756 break;
3757 case SImode:
3758 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3759 break;
3760 case HImode:
3761 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3762 break;
3763 case QImode:
3764 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3765 break;
3766 case SFmode:
3767 result = V4SF_type_node;
3768 break;
3769 case DFmode:
3770 result = V2DF_type_node;
3771 break;
3772 default:
3773 break;
3774 }
3775
3776 /* Propagate qualifiers attached to the element type
3777 onto the vector type. */
3778 if (result && result != type && TYPE_QUALS (type))
3779 result = build_qualified_type (result, TYPE_QUALS (type));
3780
3781 *no_add_attrs = true; /* No need to hang on to the attribute. */
3782
3783 if (!result)
3784 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3785 else
d991e6e8 3786 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3787
3788 return NULL_TREE;
3789}
3790
f2b32076 3791/* Return nonzero if FUNC is a naked function. */
644459d0 3792static int
3793spu_naked_function_p (tree func)
3794{
3795 tree a;
3796
3797 if (TREE_CODE (func) != FUNCTION_DECL)
3798 abort ();
3799
3800 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3801 return a != NULL_TREE;
3802}
3803
3804int
3805spu_initial_elimination_offset (int from, int to)
3806{
3807 int saved_regs_size = spu_saved_regs_size ();
3808 int sp_offset = 0;
abe32cce 3809 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3810 || get_frame_size () || saved_regs_size)
3811 sp_offset = STACK_POINTER_OFFSET;
3812 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3813 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3814 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3815 return get_frame_size ();
644459d0 3816 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3817 return sp_offset + crtl->outgoing_args_size
644459d0 3818 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3819 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3820 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3821 else
3822 gcc_unreachable ();
644459d0 3823}
3824
3825rtx
fb80456a 3826spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3827{
3828 enum machine_mode mode = TYPE_MODE (type);
3829 int byte_size = ((mode == BLKmode)
3830 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3831
3832 /* Make sure small structs are left justified in a register. */
3833 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3834 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3835 {
3836 enum machine_mode smode;
3837 rtvec v;
3838 int i;
3839 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3840 int n = byte_size / UNITS_PER_WORD;
3841 v = rtvec_alloc (nregs);
3842 for (i = 0; i < n; i++)
3843 {
3844 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3845 gen_rtx_REG (TImode,
3846 FIRST_RETURN_REGNUM
3847 + i),
3848 GEN_INT (UNITS_PER_WORD * i));
3849 byte_size -= UNITS_PER_WORD;
3850 }
3851
3852 if (n < nregs)
3853 {
3854 if (byte_size < 4)
3855 byte_size = 4;
3856 smode =
3857 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3858 RTVEC_ELT (v, n) =
3859 gen_rtx_EXPR_LIST (VOIDmode,
3860 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3861 GEN_INT (UNITS_PER_WORD * n));
3862 }
3863 return gen_rtx_PARALLEL (mode, v);
3864 }
3865 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3866}
3867
3868rtx
3869spu_function_arg (CUMULATIVE_ARGS cum,
3870 enum machine_mode mode,
3871 tree type, int named ATTRIBUTE_UNUSED)
3872{
3873 int byte_size;
3874
3875 if (cum >= MAX_REGISTER_ARGS)
3876 return 0;
3877
3878 byte_size = ((mode == BLKmode)
3879 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3880
3881 /* The ABI does not allow parameters to be passed partially in
3882 reg and partially in stack. */
3883 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3884 return 0;
3885
3886 /* Make sure small structs are left justified in a register. */
3887 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3888 && byte_size < UNITS_PER_WORD && byte_size > 0)
3889 {
3890 enum machine_mode smode;
3891 rtx gr_reg;
3892 if (byte_size < 4)
3893 byte_size = 4;
3894 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3895 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3896 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3897 const0_rtx);
3898 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3899 }
3900 else
3901 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3902}
3903
3904/* Variable sized types are passed by reference. */
3905static bool
3906spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3907 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3908 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3909{
3910 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3911}
3912\f
3913
3914/* Var args. */
3915
3916/* Create and return the va_list datatype.
3917
3918 On SPU, va_list is an array type equivalent to
3919
3920 typedef struct __va_list_tag
3921 {
3922 void *__args __attribute__((__aligned(16)));
3923 void *__skip __attribute__((__aligned(16)));
3924
3925 } va_list[1];
3926
fa7637bd 3927 where __args points to the arg that will be returned by the next
644459d0 3928 va_arg(), and __skip points to the previous stack frame such that
3929 when __args == __skip we should advance __args by 32 bytes. */
3930static tree
3931spu_build_builtin_va_list (void)
3932{
3933 tree f_args, f_skip, record, type_decl;
3934 bool owp;
3935
3936 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3937
3938 type_decl =
3939 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3940
3941 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3942 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3943
3944 DECL_FIELD_CONTEXT (f_args) = record;
3945 DECL_ALIGN (f_args) = 128;
3946 DECL_USER_ALIGN (f_args) = 1;
3947
3948 DECL_FIELD_CONTEXT (f_skip) = record;
3949 DECL_ALIGN (f_skip) = 128;
3950 DECL_USER_ALIGN (f_skip) = 1;
3951
3952 TREE_CHAIN (record) = type_decl;
3953 TYPE_NAME (record) = type_decl;
3954 TYPE_FIELDS (record) = f_args;
3955 TREE_CHAIN (f_args) = f_skip;
3956
3957 /* We know this is being padded and we want it too. It is an internal
3958 type so hide the warnings from the user. */
3959 owp = warn_padded;
3960 warn_padded = false;
3961
3962 layout_type (record);
3963
3964 warn_padded = owp;
3965
3966 /* The correct type is an array type of one element. */
3967 return build_array_type (record, build_index_type (size_zero_node));
3968}
3969
3970/* Implement va_start by filling the va_list structure VALIST.
3971 NEXTARG points to the first anonymous stack argument.
3972
3973 The following global variables are used to initialize
3974 the va_list structure:
3975
abe32cce 3976 crtl->args.info;
644459d0 3977 the CUMULATIVE_ARGS for this function
3978
abe32cce 3979 crtl->args.arg_offset_rtx:
644459d0 3980 holds the offset of the first anonymous stack argument
3981 (relative to the virtual arg pointer). */
3982
8a58ed0a 3983static void
644459d0 3984spu_va_start (tree valist, rtx nextarg)
3985{
3986 tree f_args, f_skip;
3987 tree args, skip, t;
3988
3989 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3990 f_skip = TREE_CHAIN (f_args);
3991
3992 valist = build_va_arg_indirect_ref (valist);
3993 args =
3994 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3995 skip =
3996 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3997
3998 /* Find the __args area. */
3999 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4000 if (crtl->args.pretend_args_size > 0)
0de36bdb 4001 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4002 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4003 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4004 TREE_SIDE_EFFECTS (t) = 1;
4005 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4006
4007 /* Find the __skip area. */
4008 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4009 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4010 size_int (crtl->args.pretend_args_size
0de36bdb 4011 - STACK_POINTER_OFFSET));
75a70cf9 4012 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4013 TREE_SIDE_EFFECTS (t) = 1;
4014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4015}
4016
4017/* Gimplify va_arg by updating the va_list structure
4018 VALIST as required to retrieve an argument of type
4019 TYPE, and returning that argument.
4020
4021 ret = va_arg(VALIST, TYPE);
4022
4023 generates code equivalent to:
4024
4025 paddedsize = (sizeof(TYPE) + 15) & -16;
4026 if (VALIST.__args + paddedsize > VALIST.__skip
4027 && VALIST.__args <= VALIST.__skip)
4028 addr = VALIST.__skip + 32;
4029 else
4030 addr = VALIST.__args;
4031 VALIST.__args = addr + paddedsize;
4032 ret = *(TYPE *)addr;
4033 */
4034static tree
75a70cf9 4035spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4036 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4037{
4038 tree f_args, f_skip;
4039 tree args, skip;
4040 HOST_WIDE_INT size, rsize;
4041 tree paddedsize, addr, tmp;
4042 bool pass_by_reference_p;
4043
4044 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4045 f_skip = TREE_CHAIN (f_args);
4046
4047 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4048 args =
4049 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4050 skip =
4051 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4052
4053 addr = create_tmp_var (ptr_type_node, "va_arg");
4054 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4055
4056 /* if an object is dynamically sized, a pointer to it is passed
4057 instead of the object itself. */
4058 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4059 false);
4060 if (pass_by_reference_p)
4061 type = build_pointer_type (type);
4062 size = int_size_in_bytes (type);
4063 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4064
4065 /* build conditional expression to calculate addr. The expression
4066 will be gimplified later. */
0de36bdb 4067 paddedsize = size_int (rsize);
75a70cf9 4068 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4069 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4070 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4071 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4072 unshare_expr (skip)));
644459d0 4073
4074 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4075 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4076 size_int (32)), unshare_expr (args));
644459d0 4077
75a70cf9 4078 gimplify_assign (addr, tmp, pre_p);
644459d0 4079
4080 /* update VALIST.__args */
0de36bdb 4081 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4082 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4083
4084 addr = fold_convert (build_pointer_type (type), addr);
4085
4086 if (pass_by_reference_p)
4087 addr = build_va_arg_indirect_ref (addr);
4088
4089 return build_va_arg_indirect_ref (addr);
4090}
4091
4092/* Save parameter registers starting with the register that corresponds
4093 to the first unnamed parameters. If the first unnamed parameter is
4094 in the stack then save no registers. Set pretend_args_size to the
4095 amount of space needed to save the registers. */
4096void
4097spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4098 tree type, int *pretend_size, int no_rtl)
4099{
4100 if (!no_rtl)
4101 {
4102 rtx tmp;
4103 int regno;
4104 int offset;
4105 int ncum = *cum;
4106
4107 /* cum currently points to the last named argument, we want to
4108 start at the next argument. */
4109 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4110
4111 offset = -STACK_POINTER_OFFSET;
4112 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4113 {
4114 tmp = gen_frame_mem (V4SImode,
4115 plus_constant (virtual_incoming_args_rtx,
4116 offset));
4117 emit_move_insn (tmp,
4118 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4119 offset += 16;
4120 }
4121 *pretend_size = offset + STACK_POINTER_OFFSET;
4122 }
4123}
4124\f
4125void
4126spu_conditional_register_usage (void)
4127{
4128 if (flag_pic)
4129 {
4130 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4131 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4132 }
644459d0 4133}
4134
4135/* This is called to decide when we can simplify a load instruction. We
4136 must only return true for registers which we know will always be
4137 aligned. Taking into account that CSE might replace this reg with
4138 another one that has not been marked aligned.
4139 So this is really only true for frame, stack and virtual registers,
fa7637bd 4140 which we know are always aligned and should not be adversely effected
4141 by CSE. */
644459d0 4142static int
4143regno_aligned_for_load (int regno)
4144{
4145 return regno == FRAME_POINTER_REGNUM
5df189be 4146 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 4147 || regno == ARG_POINTER_REGNUM
644459d0 4148 || regno == STACK_POINTER_REGNUM
5df189be 4149 || (regno >= FIRST_VIRTUAL_REGISTER
4150 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 4151}
4152
4153/* Return TRUE when mem is known to be 16-byte aligned. */
4154int
4155aligned_mem_p (rtx mem)
4156{
4157 if (MEM_ALIGN (mem) >= 128)
4158 return 1;
4159 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4160 return 1;
4161 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4162 {
4163 rtx p0 = XEXP (XEXP (mem, 0), 0);
4164 rtx p1 = XEXP (XEXP (mem, 0), 1);
4165 if (regno_aligned_for_load (REGNO (p0)))
4166 {
4167 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4168 return 1;
4169 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4170 return 1;
4171 }
4172 }
4173 else if (GET_CODE (XEXP (mem, 0)) == REG)
4174 {
4175 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4176 return 1;
4177 }
4178 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4179 return 1;
4180 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4181 {
4182 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4183 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4184 if (GET_CODE (p0) == SYMBOL_REF
4185 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4186 return 1;
4187 }
4188 return 0;
4189}
4190
69ced2d6 4191/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4192 into its SYMBOL_REF_FLAGS. */
4193static void
4194spu_encode_section_info (tree decl, rtx rtl, int first)
4195{
4196 default_encode_section_info (decl, rtl, first);
4197
4198 /* If a variable has a forced alignment to < 16 bytes, mark it with
4199 SYMBOL_FLAG_ALIGN1. */
4200 if (TREE_CODE (decl) == VAR_DECL
4201 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4202 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4203}
4204
644459d0 4205/* Return TRUE if we are certain the mem refers to a complete object
4206 which is both 16-byte aligned and padded to a 16-byte boundary. This
4207 would make it safe to store with a single instruction.
4208 We guarantee the alignment and padding for static objects by aligning
4209 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4210 FIXME: We currently cannot guarantee this for objects on the stack
4211 because assign_parm_setup_stack calls assign_stack_local with the
4212 alignment of the parameter mode and in that case the alignment never
4213 gets adjusted by LOCAL_ALIGNMENT. */
4214static int
4215store_with_one_insn_p (rtx mem)
4216{
4217 rtx addr = XEXP (mem, 0);
4218 if (GET_MODE (mem) == BLKmode)
4219 return 0;
4220 /* Only static objects. */
4221 if (GET_CODE (addr) == SYMBOL_REF)
4222 {
4223 /* We use the associated declaration to make sure the access is
fa7637bd 4224 referring to the whole object.
644459d0 4225 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4226 if it is necessary. Will there be cases where one exists, and
4227 the other does not? Will there be cases where both exist, but
4228 have different types? */
4229 tree decl = MEM_EXPR (mem);
4230 if (decl
4231 && TREE_CODE (decl) == VAR_DECL
4232 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4233 return 1;
4234 decl = SYMBOL_REF_DECL (addr);
4235 if (decl
4236 && TREE_CODE (decl) == VAR_DECL
4237 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4238 return 1;
4239 }
4240 return 0;
4241}
4242
4243int
4244spu_expand_mov (rtx * ops, enum machine_mode mode)
4245{
4246 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4247 abort ();
4248
4249 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4250 {
4251 rtx from = SUBREG_REG (ops[1]);
8d72495d 4252 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4253
4254 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4255 && GET_MODE_CLASS (imode) == MODE_INT
4256 && subreg_lowpart_p (ops[1]));
4257
4258 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4259 imode = SImode;
4260 if (imode != GET_MODE (from))
4261 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4262
4263 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4264 {
99bdde56 4265 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4266 emit_insn (GEN_FCN (icode) (ops[0], from));
4267 }
4268 else
4269 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4270 return 1;
4271 }
4272
4273 /* At least one of the operands needs to be a register. */
4274 if ((reload_in_progress | reload_completed) == 0
4275 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4276 {
4277 rtx temp = force_reg (mode, ops[1]);
4278 emit_move_insn (ops[0], temp);
4279 return 1;
4280 }
4281 if (reload_in_progress || reload_completed)
4282 {
dea01258 4283 if (CONSTANT_P (ops[1]))
4284 return spu_split_immediate (ops);
644459d0 4285 return 0;
4286 }
4287 else
4288 {
4289 if (GET_CODE (ops[0]) == MEM)
4290 {
4291 if (!spu_valid_move (ops))
4292 {
4293 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4294 gen_reg_rtx (TImode)));
4295 return 1;
4296 }
4297 }
4298 else if (GET_CODE (ops[1]) == MEM)
4299 {
4300 if (!spu_valid_move (ops))
4301 {
4302 emit_insn (gen_load
4303 (ops[0], ops[1], gen_reg_rtx (TImode),
4304 gen_reg_rtx (SImode)));
4305 return 1;
4306 }
4307 }
4308 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4309 extend them. */
4310 if (GET_CODE (ops[1]) == CONST_INT)
4311 {
4312 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4313 if (val != INTVAL (ops[1]))
4314 {
4315 emit_move_insn (ops[0], GEN_INT (val));
4316 return 1;
4317 }
4318 }
4319 }
4320 return 0;
4321}
4322
644459d0 4323void
4324spu_split_load (rtx * ops)
4325{
4326 enum machine_mode mode = GET_MODE (ops[0]);
4327 rtx addr, load, rot, mem, p0, p1;
4328 int rot_amt;
4329
4330 addr = XEXP (ops[1], 0);
4331
4332 rot = 0;
4333 rot_amt = 0;
4334 if (GET_CODE (addr) == PLUS)
4335 {
4336 /* 8 cases:
4337 aligned reg + aligned reg => lqx
4338 aligned reg + unaligned reg => lqx, rotqby
4339 aligned reg + aligned const => lqd
4340 aligned reg + unaligned const => lqd, rotqbyi
4341 unaligned reg + aligned reg => lqx, rotqby
4342 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4343 unaligned reg + aligned const => lqd, rotqby
4344 unaligned reg + unaligned const -> not allowed by legitimate address
4345 */
4346 p0 = XEXP (addr, 0);
4347 p1 = XEXP (addr, 1);
aa71ecd4 4348 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 4349 {
aa71ecd4 4350 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4351 {
4352 emit_insn (gen_addsi3 (ops[3], p0, p1));
4353 rot = ops[3];
4354 }
4355 else
4356 rot = p0;
4357 }
4358 else
4359 {
4360 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4361 {
4362 rot_amt = INTVAL (p1) & 15;
4363 p1 = GEN_INT (INTVAL (p1) & -16);
4364 addr = gen_rtx_PLUS (SImode, p0, p1);
4365 }
aa71ecd4 4366 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4367 rot = p1;
4368 }
4369 }
4370 else if (GET_CODE (addr) == REG)
4371 {
aa71ecd4 4372 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 4373 rot = addr;
4374 }
4375 else if (GET_CODE (addr) == CONST)
4376 {
4377 if (GET_CODE (XEXP (addr, 0)) == PLUS
4378 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4379 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4380 {
4381 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4382 if (rot_amt & -16)
4383 addr = gen_rtx_CONST (Pmode,
4384 gen_rtx_PLUS (Pmode,
4385 XEXP (XEXP (addr, 0), 0),
4386 GEN_INT (rot_amt & -16)));
4387 else
4388 addr = XEXP (XEXP (addr, 0), 0);
4389 }
4390 else
4391 rot = addr;
4392 }
4393 else if (GET_CODE (addr) == CONST_INT)
4394 {
4395 rot_amt = INTVAL (addr);
4396 addr = GEN_INT (rot_amt & -16);
4397 }
4398 else if (!ALIGNED_SYMBOL_REF_P (addr))
4399 rot = addr;
4400
4401 if (GET_MODE_SIZE (mode) < 4)
4402 rot_amt += GET_MODE_SIZE (mode) - 4;
4403
4404 rot_amt &= 15;
4405
4406 if (rot && rot_amt)
4407 {
4408 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4409 rot = ops[3];
4410 rot_amt = 0;
4411 }
4412
4413 load = ops[2];
4414
4415 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4416 mem = change_address (ops[1], TImode, addr);
4417
e04cf423 4418 emit_insn (gen_movti (load, mem));
644459d0 4419
4420 if (rot)
4421 emit_insn (gen_rotqby_ti (load, load, rot));
4422 else if (rot_amt)
4423 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4424
4425 if (reload_completed)
4426 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4427 else
4428 emit_insn (gen_spu_convert (ops[0], load));
4429}
4430
4431void
4432spu_split_store (rtx * ops)
4433{
4434 enum machine_mode mode = GET_MODE (ops[0]);
4435 rtx pat = ops[2];
4436 rtx reg = ops[3];
4437 rtx addr, p0, p1, p1_lo, smem;
4438 int aform;
4439 int scalar;
4440
4441 addr = XEXP (ops[0], 0);
4442
4443 if (GET_CODE (addr) == PLUS)
4444 {
4445 /* 8 cases:
4446 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4447 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4448 aligned reg + aligned const => lqd, c?d, shuf, stqx
4449 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4450 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4451 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4452 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4453 unaligned reg + unaligned const -> not allowed by legitimate address
4454 */
4455 aform = 0;
4456 p0 = XEXP (addr, 0);
4457 p1 = p1_lo = XEXP (addr, 1);
4458 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4459 {
4460 p1_lo = GEN_INT (INTVAL (p1) & 15);
4461 p1 = GEN_INT (INTVAL (p1) & -16);
4462 addr = gen_rtx_PLUS (SImode, p0, p1);
4463 }
4464 }
4465 else if (GET_CODE (addr) == REG)
4466 {
4467 aform = 0;
4468 p0 = addr;
4469 p1 = p1_lo = const0_rtx;
4470 }
4471 else
4472 {
4473 aform = 1;
4474 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4475 p1 = 0; /* aform doesn't use p1 */
4476 p1_lo = addr;
4477 if (ALIGNED_SYMBOL_REF_P (addr))
4478 p1_lo = const0_rtx;
4479 else if (GET_CODE (addr) == CONST)
4480 {
4481 if (GET_CODE (XEXP (addr, 0)) == PLUS
4482 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4483 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4484 {
4485 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4486 if ((v & -16) != 0)
4487 addr = gen_rtx_CONST (Pmode,
4488 gen_rtx_PLUS (Pmode,
4489 XEXP (XEXP (addr, 0), 0),
4490 GEN_INT (v & -16)));
4491 else
4492 addr = XEXP (XEXP (addr, 0), 0);
4493 p1_lo = GEN_INT (v & 15);
4494 }
4495 }
4496 else if (GET_CODE (addr) == CONST_INT)
4497 {
4498 p1_lo = GEN_INT (INTVAL (addr) & 15);
4499 addr = GEN_INT (INTVAL (addr) & -16);
4500 }
4501 }
4502
e04cf423 4503 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4504
644459d0 4505 scalar = store_with_one_insn_p (ops[0]);
4506 if (!scalar)
4507 {
4508 /* We could copy the flags from the ops[0] MEM to mem here,
4509 We don't because we want this load to be optimized away if
4510 possible, and copying the flags will prevent that in certain
4511 cases, e.g. consider the volatile flag. */
4512
e04cf423 4513 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4514 set_mem_alias_set (lmem, 0);
4515 emit_insn (gen_movti (reg, lmem));
644459d0 4516
aa71ecd4 4517 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 4518 p0 = stack_pointer_rtx;
4519 if (!p1_lo)
4520 p1_lo = const0_rtx;
4521
4522 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4523 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4524 }
4525 else if (reload_completed)
4526 {
4527 if (GET_CODE (ops[1]) == REG)
4528 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4529 else if (GET_CODE (ops[1]) == SUBREG)
4530 emit_move_insn (reg,
4531 gen_rtx_REG (GET_MODE (reg),
4532 REGNO (SUBREG_REG (ops[1]))));
4533 else
4534 abort ();
4535 }
4536 else
4537 {
4538 if (GET_CODE (ops[1]) == REG)
4539 emit_insn (gen_spu_convert (reg, ops[1]));
4540 else if (GET_CODE (ops[1]) == SUBREG)
4541 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4542 else
4543 abort ();
4544 }
4545
4546 if (GET_MODE_SIZE (mode) < 4 && scalar)
4547 emit_insn (gen_shlqby_ti
4548 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4549
644459d0 4550 smem = change_address (ops[0], TImode, addr);
4551 /* We can't use the previous alias set because the memory has changed
4552 size and can potentially overlap objects of other types. */
4553 set_mem_alias_set (smem, 0);
4554
e04cf423 4555 emit_insn (gen_movti (smem, reg));
644459d0 4556}
4557
4558/* Return TRUE if X is MEM which is a struct member reference
4559 and the member can safely be loaded and stored with a single
4560 instruction because it is padded. */
4561static int
4562mem_is_padded_component_ref (rtx x)
4563{
4564 tree t = MEM_EXPR (x);
4565 tree r;
4566 if (!t || TREE_CODE (t) != COMPONENT_REF)
4567 return 0;
4568 t = TREE_OPERAND (t, 1);
4569 if (!t || TREE_CODE (t) != FIELD_DECL
4570 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4571 return 0;
4572 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4573 r = DECL_FIELD_CONTEXT (t);
4574 if (!r || TREE_CODE (r) != RECORD_TYPE)
4575 return 0;
4576 /* Make sure they are the same mode */
4577 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4578 return 0;
4579 /* If there are no following fields then the field alignment assures
fa7637bd 4580 the structure is padded to the alignment which means this field is
4581 padded too. */
644459d0 4582 if (TREE_CHAIN (t) == 0)
4583 return 1;
4584 /* If the following field is also aligned then this field will be
4585 padded. */
4586 t = TREE_CHAIN (t);
4587 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4588 return 1;
4589 return 0;
4590}
4591
c7b91b14 4592/* Parse the -mfixed-range= option string. */
4593static void
4594fix_range (const char *const_str)
4595{
4596 int i, first, last;
4597 char *str, *dash, *comma;
4598
4599 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4600 REG2 are either register names or register numbers. The effect
4601 of this option is to mark the registers in the range from REG1 to
4602 REG2 as ``fixed'' so they won't be used by the compiler. */
4603
4604 i = strlen (const_str);
4605 str = (char *) alloca (i + 1);
4606 memcpy (str, const_str, i + 1);
4607
4608 while (1)
4609 {
4610 dash = strchr (str, '-');
4611 if (!dash)
4612 {
4613 warning (0, "value of -mfixed-range must have form REG1-REG2");
4614 return;
4615 }
4616 *dash = '\0';
4617 comma = strchr (dash + 1, ',');
4618 if (comma)
4619 *comma = '\0';
4620
4621 first = decode_reg_name (str);
4622 if (first < 0)
4623 {
4624 warning (0, "unknown register name: %s", str);
4625 return;
4626 }
4627
4628 last = decode_reg_name (dash + 1);
4629 if (last < 0)
4630 {
4631 warning (0, "unknown register name: %s", dash + 1);
4632 return;
4633 }
4634
4635 *dash = '-';
4636
4637 if (first > last)
4638 {
4639 warning (0, "%s-%s is an empty range", str, dash + 1);
4640 return;
4641 }
4642
4643 for (i = first; i <= last; ++i)
4644 fixed_regs[i] = call_used_regs[i] = 1;
4645
4646 if (!comma)
4647 break;
4648
4649 *comma = ',';
4650 str = comma + 1;
4651 }
4652}
4653
644459d0 4654int
4655spu_valid_move (rtx * ops)
4656{
4657 enum machine_mode mode = GET_MODE (ops[0]);
4658 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4659 return 0;
4660
4661 /* init_expr_once tries to recog against load and store insns to set
4662 the direct_load[] and direct_store[] arrays. We always want to
4663 consider those loads and stores valid. init_expr_once is called in
4664 the context of a dummy function which does not have a decl. */
4665 if (cfun->decl == 0)
4666 return 1;
4667
4668 /* Don't allows loads/stores which would require more than 1 insn.
4669 During and after reload we assume loads and stores only take 1
4670 insn. */
4671 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4672 {
4673 if (GET_CODE (ops[0]) == MEM
4674 && (GET_MODE_SIZE (mode) < 4
4675 || !(store_with_one_insn_p (ops[0])
4676 || mem_is_padded_component_ref (ops[0]))))
4677 return 0;
4678 if (GET_CODE (ops[1]) == MEM
4679 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4680 return 0;
4681 }
4682 return 1;
4683}
4684
4685/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4686 can be generated using the fsmbi instruction. */
4687int
4688fsmbi_const_p (rtx x)
4689{
dea01258 4690 if (CONSTANT_P (x))
4691 {
5df189be 4692 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4693 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4694 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4695 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4696 }
4697 return 0;
4698}
4699
4700/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4701 can be generated using the cbd, chd, cwd or cdd instruction. */
4702int
4703cpat_const_p (rtx x, enum machine_mode mode)
4704{
4705 if (CONSTANT_P (x))
4706 {
4707 enum immediate_class c = classify_immediate (x, mode);
4708 return c == IC_CPAT;
4709 }
4710 return 0;
4711}
644459d0 4712
dea01258 4713rtx
4714gen_cpat_const (rtx * ops)
4715{
4716 unsigned char dst[16];
4717 int i, offset, shift, isize;
4718 if (GET_CODE (ops[3]) != CONST_INT
4719 || GET_CODE (ops[2]) != CONST_INT
4720 || (GET_CODE (ops[1]) != CONST_INT
4721 && GET_CODE (ops[1]) != REG))
4722 return 0;
4723 if (GET_CODE (ops[1]) == REG
4724 && (!REG_POINTER (ops[1])
4725 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4726 return 0;
644459d0 4727
4728 for (i = 0; i < 16; i++)
dea01258 4729 dst[i] = i + 16;
4730 isize = INTVAL (ops[3]);
4731 if (isize == 1)
4732 shift = 3;
4733 else if (isize == 2)
4734 shift = 2;
4735 else
4736 shift = 0;
4737 offset = (INTVAL (ops[2]) +
4738 (GET_CODE (ops[1]) ==
4739 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4740 for (i = 0; i < isize; i++)
4741 dst[offset + i] = i + shift;
4742 return array_to_constant (TImode, dst);
644459d0 4743}
4744
4745/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4746 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4747 than 16 bytes, the value is repeated across the rest of the array. */
4748void
4749constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4750{
4751 HOST_WIDE_INT val;
4752 int i, j, first;
4753
4754 memset (arr, 0, 16);
4755 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4756 if (GET_CODE (x) == CONST_INT
4757 || (GET_CODE (x) == CONST_DOUBLE
4758 && (mode == SFmode || mode == DFmode)))
4759 {
4760 gcc_assert (mode != VOIDmode && mode != BLKmode);
4761
4762 if (GET_CODE (x) == CONST_DOUBLE)
4763 val = const_double_to_hwint (x);
4764 else
4765 val = INTVAL (x);
4766 first = GET_MODE_SIZE (mode) - 1;
4767 for (i = first; i >= 0; i--)
4768 {
4769 arr[i] = val & 0xff;
4770 val >>= 8;
4771 }
4772 /* Splat the constant across the whole array. */
4773 for (j = 0, i = first + 1; i < 16; i++)
4774 {
4775 arr[i] = arr[j];
4776 j = (j == first) ? 0 : j + 1;
4777 }
4778 }
4779 else if (GET_CODE (x) == CONST_DOUBLE)
4780 {
4781 val = CONST_DOUBLE_LOW (x);
4782 for (i = 15; i >= 8; i--)
4783 {
4784 arr[i] = val & 0xff;
4785 val >>= 8;
4786 }
4787 val = CONST_DOUBLE_HIGH (x);
4788 for (i = 7; i >= 0; i--)
4789 {
4790 arr[i] = val & 0xff;
4791 val >>= 8;
4792 }
4793 }
4794 else if (GET_CODE (x) == CONST_VECTOR)
4795 {
4796 int units;
4797 rtx elt;
4798 mode = GET_MODE_INNER (mode);
4799 units = CONST_VECTOR_NUNITS (x);
4800 for (i = 0; i < units; i++)
4801 {
4802 elt = CONST_VECTOR_ELT (x, i);
4803 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4804 {
4805 if (GET_CODE (elt) == CONST_DOUBLE)
4806 val = const_double_to_hwint (elt);
4807 else
4808 val = INTVAL (elt);
4809 first = GET_MODE_SIZE (mode) - 1;
4810 if (first + i * GET_MODE_SIZE (mode) > 16)
4811 abort ();
4812 for (j = first; j >= 0; j--)
4813 {
4814 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4815 val >>= 8;
4816 }
4817 }
4818 }
4819 }
4820 else
4821 gcc_unreachable();
4822}
4823
4824/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4825 smaller than 16 bytes, use the bytes that would represent that value
4826 in a register, e.g., for QImode return the value of arr[3]. */
4827rtx
4828array_to_constant (enum machine_mode mode, unsigned char arr[16])
4829{
4830 enum machine_mode inner_mode;
4831 rtvec v;
4832 int units, size, i, j, k;
4833 HOST_WIDE_INT val;
4834
4835 if (GET_MODE_CLASS (mode) == MODE_INT
4836 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4837 {
4838 j = GET_MODE_SIZE (mode);
4839 i = j < 4 ? 4 - j : 0;
4840 for (val = 0; i < j; i++)
4841 val = (val << 8) | arr[i];
4842 val = trunc_int_for_mode (val, mode);
4843 return GEN_INT (val);
4844 }
4845
4846 if (mode == TImode)
4847 {
4848 HOST_WIDE_INT high;
4849 for (i = high = 0; i < 8; i++)
4850 high = (high << 8) | arr[i];
4851 for (i = 8, val = 0; i < 16; i++)
4852 val = (val << 8) | arr[i];
4853 return immed_double_const (val, high, TImode);
4854 }
4855 if (mode == SFmode)
4856 {
4857 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4858 val = trunc_int_for_mode (val, SImode);
171b6d22 4859 return hwint_to_const_double (SFmode, val);
644459d0 4860 }
4861 if (mode == DFmode)
4862 {
1f915911 4863 for (i = 0, val = 0; i < 8; i++)
4864 val = (val << 8) | arr[i];
171b6d22 4865 return hwint_to_const_double (DFmode, val);
644459d0 4866 }
4867
4868 if (!VECTOR_MODE_P (mode))
4869 abort ();
4870
4871 units = GET_MODE_NUNITS (mode);
4872 size = GET_MODE_UNIT_SIZE (mode);
4873 inner_mode = GET_MODE_INNER (mode);
4874 v = rtvec_alloc (units);
4875
4876 for (k = i = 0; i < units; ++i)
4877 {
4878 val = 0;
4879 for (j = 0; j < size; j++, k++)
4880 val = (val << 8) | arr[k];
4881
4882 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4883 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4884 else
4885 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4886 }
4887 if (k > 16)
4888 abort ();
4889
4890 return gen_rtx_CONST_VECTOR (mode, v);
4891}
4892
4893static void
4894reloc_diagnostic (rtx x)
4895{
4896 tree loc_decl, decl = 0;
4897 const char *msg;
4898 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4899 return;
4900
4901 if (GET_CODE (x) == SYMBOL_REF)
4902 decl = SYMBOL_REF_DECL (x);
4903 else if (GET_CODE (x) == CONST
4904 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4905 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4906
4907 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4908 if (decl && !DECL_P (decl))
4909 decl = 0;
4910
4911 /* We use last_assemble_variable_decl to get line information. It's
4912 not always going to be right and might not even be close, but will
4913 be right for the more common cases. */
5df189be 4914 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4915 loc_decl = decl;
4916 else
4917 loc_decl = last_assemble_variable_decl;
4918
4919 /* The decl could be a string constant. */
4920 if (decl && DECL_P (decl))
4921 msg = "%Jcreating run-time relocation for %qD";
4922 else
4923 msg = "creating run-time relocation";
4924
99369027 4925 if (TARGET_WARN_RELOC)
644459d0 4926 warning (0, msg, loc_decl, decl);
99369027 4927 else
4928 error (msg, loc_decl, decl);
644459d0 4929}
4930
4931/* Hook into assemble_integer so we can generate an error for run-time
4932 relocations. The SPU ABI disallows them. */
4933static bool
4934spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4935{
4936 /* By default run-time relocations aren't supported, but we allow them
4937 in case users support it in their own run-time loader. And we provide
4938 a warning for those users that don't. */
4939 if ((GET_CODE (x) == SYMBOL_REF)
4940 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4941 reloc_diagnostic (x);
4942
4943 return default_assemble_integer (x, size, aligned_p);
4944}
4945
4946static void
4947spu_asm_globalize_label (FILE * file, const char *name)
4948{
4949 fputs ("\t.global\t", file);
4950 assemble_name (file, name);
4951 fputs ("\n", file);
4952}
4953
4954static bool
f529eb25 4955spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4956 bool speed ATTRIBUTE_UNUSED)
644459d0 4957{
4958 enum machine_mode mode = GET_MODE (x);
4959 int cost = COSTS_N_INSNS (2);
4960
4961 /* Folding to a CONST_VECTOR will use extra space but there might
4962 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4963 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4964 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4965 because this cost will only be compared against a single insn.
4966 if (code == CONST_VECTOR)
4967 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4968 */
4969
4970 /* Use defaults for float operations. Not accurate but good enough. */
4971 if (mode == DFmode)
4972 {
4973 *total = COSTS_N_INSNS (13);
4974 return true;
4975 }
4976 if (mode == SFmode)
4977 {
4978 *total = COSTS_N_INSNS (6);
4979 return true;
4980 }
4981 switch (code)
4982 {
4983 case CONST_INT:
4984 if (satisfies_constraint_K (x))
4985 *total = 0;
4986 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4987 *total = COSTS_N_INSNS (1);
4988 else
4989 *total = COSTS_N_INSNS (3);
4990 return true;
4991
4992 case CONST:
4993 *total = COSTS_N_INSNS (3);
4994 return true;
4995
4996 case LABEL_REF:
4997 case SYMBOL_REF:
4998 *total = COSTS_N_INSNS (0);
4999 return true;
5000
5001 case CONST_DOUBLE:
5002 *total = COSTS_N_INSNS (5);
5003 return true;
5004
5005 case FLOAT_EXTEND:
5006 case FLOAT_TRUNCATE:
5007 case FLOAT:
5008 case UNSIGNED_FLOAT:
5009 case FIX:
5010 case UNSIGNED_FIX:
5011 *total = COSTS_N_INSNS (7);
5012 return true;
5013
5014 case PLUS:
5015 if (mode == TImode)
5016 {
5017 *total = COSTS_N_INSNS (9);
5018 return true;
5019 }
5020 break;
5021
5022 case MULT:
5023 cost =
5024 GET_CODE (XEXP (x, 0)) ==
5025 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5026 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5027 {
5028 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5029 {
5030 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5031 cost = COSTS_N_INSNS (14);
5032 if ((val & 0xffff) == 0)
5033 cost = COSTS_N_INSNS (9);
5034 else if (val > 0 && val < 0x10000)
5035 cost = COSTS_N_INSNS (11);
5036 }
5037 }
5038 *total = cost;
5039 return true;
5040 case DIV:
5041 case UDIV:
5042 case MOD:
5043 case UMOD:
5044 *total = COSTS_N_INSNS (20);
5045 return true;
5046 case ROTATE:
5047 case ROTATERT:
5048 case ASHIFT:
5049 case ASHIFTRT:
5050 case LSHIFTRT:
5051 *total = COSTS_N_INSNS (4);
5052 return true;
5053 case UNSPEC:
5054 if (XINT (x, 1) == UNSPEC_CONVERT)
5055 *total = COSTS_N_INSNS (0);
5056 else
5057 *total = COSTS_N_INSNS (4);
5058 return true;
5059 }
5060 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5061 if (GET_MODE_CLASS (mode) == MODE_INT
5062 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5063 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5064 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5065 *total = cost;
5066 return true;
5067}
5068
1bd43494 5069static enum machine_mode
5070spu_unwind_word_mode (void)
644459d0 5071{
1bd43494 5072 return SImode;
644459d0 5073}
5074
5075/* Decide whether we can make a sibling call to a function. DECL is the
5076 declaration of the function being targeted by the call and EXP is the
5077 CALL_EXPR representing the call. */
5078static bool
5079spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5080{
5081 return decl && !TARGET_LARGE_MEM;
5082}
5083
5084/* We need to correctly update the back chain pointer and the Available
5085 Stack Size (which is in the second slot of the sp register.) */
5086void
5087spu_allocate_stack (rtx op0, rtx op1)
5088{
5089 HOST_WIDE_INT v;
5090 rtx chain = gen_reg_rtx (V4SImode);
5091 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5092 rtx sp = gen_reg_rtx (V4SImode);
5093 rtx splatted = gen_reg_rtx (V4SImode);
5094 rtx pat = gen_reg_rtx (TImode);
5095
5096 /* copy the back chain so we can save it back again. */
5097 emit_move_insn (chain, stack_bot);
5098
5099 op1 = force_reg (SImode, op1);
5100
5101 v = 0x1020300010203ll;
5102 emit_move_insn (pat, immed_double_const (v, v, TImode));
5103 emit_insn (gen_shufb (splatted, op1, op1, pat));
5104
5105 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5106 emit_insn (gen_subv4si3 (sp, sp, splatted));
5107
5108 if (flag_stack_check)
5109 {
5110 rtx avail = gen_reg_rtx(SImode);
5111 rtx result = gen_reg_rtx(SImode);
5112 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5113 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5114 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5115 }
5116
5117 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5118
5119 emit_move_insn (stack_bot, chain);
5120
5121 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5122}
5123
5124void
5125spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5126{
5127 static unsigned char arr[16] =
5128 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5129 rtx temp = gen_reg_rtx (SImode);
5130 rtx temp2 = gen_reg_rtx (SImode);
5131 rtx temp3 = gen_reg_rtx (V4SImode);
5132 rtx temp4 = gen_reg_rtx (V4SImode);
5133 rtx pat = gen_reg_rtx (TImode);
5134 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5135
5136 /* Restore the backchain from the first word, sp from the second. */
5137 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5138 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5139
5140 emit_move_insn (pat, array_to_constant (TImode, arr));
5141
5142 /* Compute Available Stack Size for sp */
5143 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5144 emit_insn (gen_shufb (temp3, temp, temp, pat));
5145
5146 /* Compute Available Stack Size for back chain */
5147 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5148 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5149 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5150
5151 emit_insn (gen_addv4si3 (sp, sp, temp3));
5152 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5153}
5154
5155static void
5156spu_init_libfuncs (void)
5157{
5158 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5159 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5160 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5161 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5162 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5163 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5164 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5165 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5166 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5167 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5168 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5169
5170 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5171 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5172
5173 set_optab_libfunc (smul_optab, TImode, "__multi3");
5174 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5175 set_optab_libfunc (smod_optab, TImode, "__modti3");
5176 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5177 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5178 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5179}
5180
5181/* Make a subreg, stripping any existing subreg. We could possibly just
5182 call simplify_subreg, but in this case we know what we want. */
5183rtx
5184spu_gen_subreg (enum machine_mode mode, rtx x)
5185{
5186 if (GET_CODE (x) == SUBREG)
5187 x = SUBREG_REG (x);
5188 if (GET_MODE (x) == mode)
5189 return x;
5190 return gen_rtx_SUBREG (mode, x, 0);
5191}
5192
5193static bool
fb80456a 5194spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5195{
5196 return (TYPE_MODE (type) == BLKmode
5197 && ((type) == 0
5198 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5199 || int_size_in_bytes (type) >
5200 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5201}
5202\f
5203/* Create the built-in types and functions */
5204
c2233b46 5205enum spu_function_code
5206{
5207#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5208#include "spu-builtins.def"
5209#undef DEF_BUILTIN
5210 NUM_SPU_BUILTINS
5211};
5212
5213extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5214
644459d0 5215struct spu_builtin_description spu_builtins[] = {
5216#define DEF_BUILTIN(fcode, icode, name, type, params) \
5217 {fcode, icode, name, type, params, NULL_TREE},
5218#include "spu-builtins.def"
5219#undef DEF_BUILTIN
5220};
5221
5222static void
5223spu_init_builtins (void)
5224{
5225 struct spu_builtin_description *d;
5226 unsigned int i;
5227
5228 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5229 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5230 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5231 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5232 V4SF_type_node = build_vector_type (float_type_node, 4);
5233 V2DF_type_node = build_vector_type (double_type_node, 2);
5234
5235 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5236 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5237 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5238 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5239
c4ecce0c 5240 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5241
5242 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5243 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5244 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5245 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5246 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5247 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5248 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5249 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5250 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5251 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5252 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5253 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5254
5255 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5256 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5257 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5258 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5259 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5260 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5261 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5262 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5263
5264 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5265 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5266
5267 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5268
5269 spu_builtin_types[SPU_BTI_PTR] =
5270 build_pointer_type (build_qualified_type
5271 (void_type_node,
5272 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5273
5274 /* For each builtin we build a new prototype. The tree code will make
5275 sure nodes are shared. */
5276 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5277 {
5278 tree p;
5279 char name[64]; /* build_function will make a copy. */
5280 int parm;
5281
5282 if (d->name == 0)
5283 continue;
5284
5dfbd18f 5285 /* Find last parm. */
644459d0 5286 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5287 ;
644459d0 5288
5289 p = void_list_node;
5290 while (parm > 1)
5291 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5292
5293 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5294
5295 sprintf (name, "__builtin_%s", d->name);
5296 d->fndecl =
5297 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5298 NULL, NULL_TREE);
a76866d3 5299 if (d->fcode == SPU_MASK_FOR_LOAD)
5300 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5301
5302 /* These builtins don't throw. */
5303 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5304 }
5305}
5306
cf31d486 5307void
5308spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5309{
5310 static unsigned char arr[16] =
5311 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5312
5313 rtx temp = gen_reg_rtx (Pmode);
5314 rtx temp2 = gen_reg_rtx (V4SImode);
5315 rtx temp3 = gen_reg_rtx (V4SImode);
5316 rtx pat = gen_reg_rtx (TImode);
5317 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5318
5319 emit_move_insn (pat, array_to_constant (TImode, arr));
5320
5321 /* Restore the sp. */
5322 emit_move_insn (temp, op1);
5323 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5324
5325 /* Compute available stack size for sp. */
5326 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5327 emit_insn (gen_shufb (temp3, temp, temp, pat));
5328
5329 emit_insn (gen_addv4si3 (sp, sp, temp3));
5330 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5331}
5332
644459d0 5333int
5334spu_safe_dma (HOST_WIDE_INT channel)
5335{
006e4b96 5336 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5337}
5338
5339void
5340spu_builtin_splats (rtx ops[])
5341{
5342 enum machine_mode mode = GET_MODE (ops[0]);
5343 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5344 {
5345 unsigned char arr[16];
5346 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5347 emit_move_insn (ops[0], array_to_constant (mode, arr));
5348 }
644459d0 5349 else
5350 {
5351 rtx reg = gen_reg_rtx (TImode);
5352 rtx shuf;
5353 if (GET_CODE (ops[1]) != REG
5354 && GET_CODE (ops[1]) != SUBREG)
5355 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5356 switch (mode)
5357 {
5358 case V2DImode:
5359 case V2DFmode:
5360 shuf =
5361 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5362 TImode);
5363 break;
5364 case V4SImode:
5365 case V4SFmode:
5366 shuf =
5367 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5368 TImode);
5369 break;
5370 case V8HImode:
5371 shuf =
5372 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5373 TImode);
5374 break;
5375 case V16QImode:
5376 shuf =
5377 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5378 TImode);
5379 break;
5380 default:
5381 abort ();
5382 }
5383 emit_move_insn (reg, shuf);
5384 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5385 }
5386}
5387
5388void
5389spu_builtin_extract (rtx ops[])
5390{
5391 enum machine_mode mode;
5392 rtx rot, from, tmp;
5393
5394 mode = GET_MODE (ops[1]);
5395
5396 if (GET_CODE (ops[2]) == CONST_INT)
5397 {
5398 switch (mode)
5399 {
5400 case V16QImode:
5401 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5402 break;
5403 case V8HImode:
5404 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5405 break;
5406 case V4SFmode:
5407 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5408 break;
5409 case V4SImode:
5410 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5411 break;
5412 case V2DImode:
5413 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5414 break;
5415 case V2DFmode:
5416 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5417 break;
5418 default:
5419 abort ();
5420 }
5421 return;
5422 }
5423
5424 from = spu_gen_subreg (TImode, ops[1]);
5425 rot = gen_reg_rtx (TImode);
5426 tmp = gen_reg_rtx (SImode);
5427
5428 switch (mode)
5429 {
5430 case V16QImode:
5431 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5432 break;
5433 case V8HImode:
5434 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5435 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5436 break;
5437 case V4SFmode:
5438 case V4SImode:
5439 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5440 break;
5441 case V2DImode:
5442 case V2DFmode:
5443 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5444 break;
5445 default:
5446 abort ();
5447 }
5448 emit_insn (gen_rotqby_ti (rot, from, tmp));
5449
5450 emit_insn (gen_spu_convert (ops[0], rot));
5451}
5452
5453void
5454spu_builtin_insert (rtx ops[])
5455{
5456 enum machine_mode mode = GET_MODE (ops[0]);
5457 enum machine_mode imode = GET_MODE_INNER (mode);
5458 rtx mask = gen_reg_rtx (TImode);
5459 rtx offset;
5460
5461 if (GET_CODE (ops[3]) == CONST_INT)
5462 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5463 else
5464 {
5465 offset = gen_reg_rtx (SImode);
5466 emit_insn (gen_mulsi3
5467 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5468 }
5469 emit_insn (gen_cpat
5470 (mask, stack_pointer_rtx, offset,
5471 GEN_INT (GET_MODE_SIZE (imode))));
5472 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5473}
5474
5475void
5476spu_builtin_promote (rtx ops[])
5477{
5478 enum machine_mode mode, imode;
5479 rtx rot, from, offset;
5480 HOST_WIDE_INT pos;
5481
5482 mode = GET_MODE (ops[0]);
5483 imode = GET_MODE_INNER (mode);
5484
5485 from = gen_reg_rtx (TImode);
5486 rot = spu_gen_subreg (TImode, ops[0]);
5487
5488 emit_insn (gen_spu_convert (from, ops[1]));
5489
5490 if (GET_CODE (ops[2]) == CONST_INT)
5491 {
5492 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5493 if (GET_MODE_SIZE (imode) < 4)
5494 pos += 4 - GET_MODE_SIZE (imode);
5495 offset = GEN_INT (pos & 15);
5496 }
5497 else
5498 {
5499 offset = gen_reg_rtx (SImode);
5500 switch (mode)
5501 {
5502 case V16QImode:
5503 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5504 break;
5505 case V8HImode:
5506 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5507 emit_insn (gen_addsi3 (offset, offset, offset));
5508 break;
5509 case V4SFmode:
5510 case V4SImode:
5511 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5512 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5513 break;
5514 case V2DImode:
5515 case V2DFmode:
5516 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5517 break;
5518 default:
5519 abort ();
5520 }
5521 }
5522 emit_insn (gen_rotqby_ti (rot, from, offset));
5523}
5524
5525void
5526spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5527{
5528 rtx shuf = gen_reg_rtx (V4SImode);
5529 rtx insn = gen_reg_rtx (V4SImode);
5530 rtx shufc;
5531 rtx insnc;
5532 rtx mem;
5533
5534 fnaddr = force_reg (SImode, fnaddr);
5535 cxt = force_reg (SImode, cxt);
5536
5537 if (TARGET_LARGE_MEM)
5538 {
5539 rtx rotl = gen_reg_rtx (V4SImode);
5540 rtx mask = gen_reg_rtx (V4SImode);
5541 rtx bi = gen_reg_rtx (SImode);
5542 unsigned char shufa[16] = {
5543 2, 3, 0, 1, 18, 19, 16, 17,
5544 0, 1, 2, 3, 16, 17, 18, 19
5545 };
5546 unsigned char insna[16] = {
5547 0x41, 0, 0, 79,
5548 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5549 0x60, 0x80, 0, 79,
5550 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5551 };
5552
5553 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5554 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5555
5556 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5557 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5558 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5559 emit_insn (gen_selb (insn, insnc, rotl, mask));
5560
5561 mem = memory_address (Pmode, tramp);
5562 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5563
5564 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5565 mem = memory_address (Pmode, plus_constant (tramp, 16));
5566 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5567 }
5568 else
5569 {
5570 rtx scxt = gen_reg_rtx (SImode);
5571 rtx sfnaddr = gen_reg_rtx (SImode);
5572 unsigned char insna[16] = {
5573 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5574 0x30, 0, 0, 0,
5575 0, 0, 0, 0,
5576 0, 0, 0, 0
5577 };
5578
5579 shufc = gen_reg_rtx (TImode);
5580 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5581
5582 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5583 fits 18 bits and the last 4 are zeros. This will be true if
5584 the stack pointer is initialized to 0x3fff0 at program start,
5585 otherwise the ila instruction will be garbage. */
5586
5587 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5588 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5589 emit_insn (gen_cpat
5590 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5591 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5592 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5593
5594 mem = memory_address (Pmode, tramp);
5595 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5596
5597 }
5598 emit_insn (gen_sync ());
5599}
5600
5601void
5602spu_expand_sign_extend (rtx ops[])
5603{
5604 unsigned char arr[16];
5605 rtx pat = gen_reg_rtx (TImode);
5606 rtx sign, c;
5607 int i, last;
5608 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5609 if (GET_MODE (ops[1]) == QImode)
5610 {
5611 sign = gen_reg_rtx (HImode);
5612 emit_insn (gen_extendqihi2 (sign, ops[1]));
5613 for (i = 0; i < 16; i++)
5614 arr[i] = 0x12;
5615 arr[last] = 0x13;
5616 }
5617 else
5618 {
5619 for (i = 0; i < 16; i++)
5620 arr[i] = 0x10;
5621 switch (GET_MODE (ops[1]))
5622 {
5623 case HImode:
5624 sign = gen_reg_rtx (SImode);
5625 emit_insn (gen_extendhisi2 (sign, ops[1]));
5626 arr[last] = 0x03;
5627 arr[last - 1] = 0x02;
5628 break;
5629 case SImode:
5630 sign = gen_reg_rtx (SImode);
5631 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5632 for (i = 0; i < 4; i++)
5633 arr[last - i] = 3 - i;
5634 break;
5635 case DImode:
5636 sign = gen_reg_rtx (SImode);
5637 c = gen_reg_rtx (SImode);
5638 emit_insn (gen_spu_convert (c, ops[1]));
5639 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5640 for (i = 0; i < 8; i++)
5641 arr[last - i] = 7 - i;
5642 break;
5643 default:
5644 abort ();
5645 }
5646 }
5647 emit_move_insn (pat, array_to_constant (TImode, arr));
5648 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5649}
5650
5651/* expand vector initialization. If there are any constant parts,
5652 load constant parts first. Then load any non-constant parts. */
5653void
5654spu_expand_vector_init (rtx target, rtx vals)
5655{
5656 enum machine_mode mode = GET_MODE (target);
5657 int n_elts = GET_MODE_NUNITS (mode);
5658 int n_var = 0;
5659 bool all_same = true;
790c536c 5660 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5661 int i;
5662
5663 first = XVECEXP (vals, 0, 0);
5664 for (i = 0; i < n_elts; ++i)
5665 {
5666 x = XVECEXP (vals, 0, i);
e442af0b 5667 if (!(CONST_INT_P (x)
5668 || GET_CODE (x) == CONST_DOUBLE
5669 || GET_CODE (x) == CONST_FIXED))
644459d0 5670 ++n_var;
5671 else
5672 {
5673 if (first_constant == NULL_RTX)
5674 first_constant = x;
5675 }
5676 if (i > 0 && !rtx_equal_p (x, first))
5677 all_same = false;
5678 }
5679
5680 /* if all elements are the same, use splats to repeat elements */
5681 if (all_same)
5682 {
5683 if (!CONSTANT_P (first)
5684 && !register_operand (first, GET_MODE (x)))
5685 first = force_reg (GET_MODE (first), first);
5686 emit_insn (gen_spu_splats (target, first));
5687 return;
5688 }
5689
5690 /* load constant parts */
5691 if (n_var != n_elts)
5692 {
5693 if (n_var == 0)
5694 {
5695 emit_move_insn (target,
5696 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5697 }
5698 else
5699 {
5700 rtx constant_parts_rtx = copy_rtx (vals);
5701
5702 gcc_assert (first_constant != NULL_RTX);
5703 /* fill empty slots with the first constant, this increases
5704 our chance of using splats in the recursive call below. */
5705 for (i = 0; i < n_elts; ++i)
e442af0b 5706 {
5707 x = XVECEXP (constant_parts_rtx, 0, i);
5708 if (!(CONST_INT_P (x)
5709 || GET_CODE (x) == CONST_DOUBLE
5710 || GET_CODE (x) == CONST_FIXED))
5711 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5712 }
644459d0 5713
5714 spu_expand_vector_init (target, constant_parts_rtx);
5715 }
5716 }
5717
5718 /* load variable parts */
5719 if (n_var != 0)
5720 {
5721 rtx insert_operands[4];
5722
5723 insert_operands[0] = target;
5724 insert_operands[2] = target;
5725 for (i = 0; i < n_elts; ++i)
5726 {
5727 x = XVECEXP (vals, 0, i);
e442af0b 5728 if (!(CONST_INT_P (x)
5729 || GET_CODE (x) == CONST_DOUBLE
5730 || GET_CODE (x) == CONST_FIXED))
644459d0 5731 {
5732 if (!register_operand (x, GET_MODE (x)))
5733 x = force_reg (GET_MODE (x), x);
5734 insert_operands[1] = x;
5735 insert_operands[3] = GEN_INT (i);
5736 spu_builtin_insert (insert_operands);
5737 }
5738 }
5739 }
5740}
6352eedf 5741
5474166e 5742/* Return insn index for the vector compare instruction for given CODE,
5743 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5744
5745static int
5746get_vec_cmp_insn (enum rtx_code code,
5747 enum machine_mode dest_mode,
5748 enum machine_mode op_mode)
5749
5750{
5751 switch (code)
5752 {
5753 case EQ:
5754 if (dest_mode == V16QImode && op_mode == V16QImode)
5755 return CODE_FOR_ceq_v16qi;
5756 if (dest_mode == V8HImode && op_mode == V8HImode)
5757 return CODE_FOR_ceq_v8hi;
5758 if (dest_mode == V4SImode && op_mode == V4SImode)
5759 return CODE_FOR_ceq_v4si;
5760 if (dest_mode == V4SImode && op_mode == V4SFmode)
5761 return CODE_FOR_ceq_v4sf;
5762 if (dest_mode == V2DImode && op_mode == V2DFmode)
5763 return CODE_FOR_ceq_v2df;
5764 break;
5765 case GT:
5766 if (dest_mode == V16QImode && op_mode == V16QImode)
5767 return CODE_FOR_cgt_v16qi;
5768 if (dest_mode == V8HImode && op_mode == V8HImode)
5769 return CODE_FOR_cgt_v8hi;
5770 if (dest_mode == V4SImode && op_mode == V4SImode)
5771 return CODE_FOR_cgt_v4si;
5772 if (dest_mode == V4SImode && op_mode == V4SFmode)
5773 return CODE_FOR_cgt_v4sf;
5774 if (dest_mode == V2DImode && op_mode == V2DFmode)
5775 return CODE_FOR_cgt_v2df;
5776 break;
5777 case GTU:
5778 if (dest_mode == V16QImode && op_mode == V16QImode)
5779 return CODE_FOR_clgt_v16qi;
5780 if (dest_mode == V8HImode && op_mode == V8HImode)
5781 return CODE_FOR_clgt_v8hi;
5782 if (dest_mode == V4SImode && op_mode == V4SImode)
5783 return CODE_FOR_clgt_v4si;
5784 break;
5785 default:
5786 break;
5787 }
5788 return -1;
5789}
5790
5791/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5792 DMODE is expected destination mode. This is a recursive function. */
5793
5794static rtx
5795spu_emit_vector_compare (enum rtx_code rcode,
5796 rtx op0, rtx op1,
5797 enum machine_mode dmode)
5798{
5799 int vec_cmp_insn;
5800 rtx mask;
5801 enum machine_mode dest_mode;
5802 enum machine_mode op_mode = GET_MODE (op1);
5803
5804 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5805
5806 /* Floating point vector compare instructions uses destination V4SImode.
5807 Double floating point vector compare instructions uses destination V2DImode.
5808 Move destination to appropriate mode later. */
5809 if (dmode == V4SFmode)
5810 dest_mode = V4SImode;
5811 else if (dmode == V2DFmode)
5812 dest_mode = V2DImode;
5813 else
5814 dest_mode = dmode;
5815
5816 mask = gen_reg_rtx (dest_mode);
5817 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5818
5819 if (vec_cmp_insn == -1)
5820 {
5821 bool swap_operands = false;
5822 bool try_again = false;
5823 switch (rcode)
5824 {
5825 case LT:
5826 rcode = GT;
5827 swap_operands = true;
5828 try_again = true;
5829 break;
5830 case LTU:
5831 rcode = GTU;
5832 swap_operands = true;
5833 try_again = true;
5834 break;
5835 case NE:
5836 /* Treat A != B as ~(A==B). */
5837 {
5838 enum insn_code nor_code;
5839 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5840 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5841 gcc_assert (nor_code != CODE_FOR_nothing);
5842 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5843 if (dmode != dest_mode)
5844 {
5845 rtx temp = gen_reg_rtx (dest_mode);
5846 convert_move (temp, mask, 0);
5847 return temp;
5848 }
5849 return mask;
5850 }
5851 break;
5852 case GE:
5853 case GEU:
5854 case LE:
5855 case LEU:
5856 /* Try GT/GTU/LT/LTU OR EQ */
5857 {
5858 rtx c_rtx, eq_rtx;
5859 enum insn_code ior_code;
5860 enum rtx_code new_code;
5861
5862 switch (rcode)
5863 {
5864 case GE: new_code = GT; break;
5865 case GEU: new_code = GTU; break;
5866 case LE: new_code = LT; break;
5867 case LEU: new_code = LTU; break;
5868 default:
5869 gcc_unreachable ();
5870 }
5871
5872 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5873 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5874
99bdde56 5875 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5876 gcc_assert (ior_code != CODE_FOR_nothing);
5877 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5878 if (dmode != dest_mode)
5879 {
5880 rtx temp = gen_reg_rtx (dest_mode);
5881 convert_move (temp, mask, 0);
5882 return temp;
5883 }
5884 return mask;
5885 }
5886 break;
5887 default:
5888 gcc_unreachable ();
5889 }
5890
5891 /* You only get two chances. */
5892 if (try_again)
5893 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5894
5895 gcc_assert (vec_cmp_insn != -1);
5896
5897 if (swap_operands)
5898 {
5899 rtx tmp;
5900 tmp = op0;
5901 op0 = op1;
5902 op1 = tmp;
5903 }
5904 }
5905
5906 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5907 if (dmode != dest_mode)
5908 {
5909 rtx temp = gen_reg_rtx (dest_mode);
5910 convert_move (temp, mask, 0);
5911 return temp;
5912 }
5913 return mask;
5914}
5915
5916
5917/* Emit vector conditional expression.
5918 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5919 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5920
5921int
5922spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5923 rtx cond, rtx cc_op0, rtx cc_op1)
5924{
5925 enum machine_mode dest_mode = GET_MODE (dest);
5926 enum rtx_code rcode = GET_CODE (cond);
5927 rtx mask;
5928
5929 /* Get the vector mask for the given relational operations. */
5930 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5931
5932 emit_insn(gen_selb (dest, op2, op1, mask));
5933
5934 return 1;
5935}
5936
6352eedf 5937static rtx
5938spu_force_reg (enum machine_mode mode, rtx op)
5939{
5940 rtx x, r;
5941 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5942 {
5943 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5944 || GET_MODE (op) == BLKmode)
5945 return force_reg (mode, convert_to_mode (mode, op, 0));
5946 abort ();
5947 }
5948
5949 r = force_reg (GET_MODE (op), op);
5950 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5951 {
5952 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5953 if (x)
5954 return x;
5955 }
5956
5957 x = gen_reg_rtx (mode);
5958 emit_insn (gen_spu_convert (x, r));
5959 return x;
5960}
5961
5962static void
5963spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5964{
5965 HOST_WIDE_INT v = 0;
5966 int lsbits;
5967 /* Check the range of immediate operands. */
5968 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5969 {
5970 int range = p - SPU_BTI_7;
5df189be 5971
5972 if (!CONSTANT_P (op))
6352eedf 5973 error ("%s expects an integer literal in the range [%d, %d].",
5974 d->name,
5975 spu_builtin_range[range].low, spu_builtin_range[range].high);
5976
5977 if (GET_CODE (op) == CONST
5978 && (GET_CODE (XEXP (op, 0)) == PLUS
5979 || GET_CODE (XEXP (op, 0)) == MINUS))
5980 {
5981 v = INTVAL (XEXP (XEXP (op, 0), 1));
5982 op = XEXP (XEXP (op, 0), 0);
5983 }
5984 else if (GET_CODE (op) == CONST_INT)
5985 v = INTVAL (op);
5df189be 5986 else if (GET_CODE (op) == CONST_VECTOR
5987 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5988 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5989
5990 /* The default for v is 0 which is valid in every range. */
5991 if (v < spu_builtin_range[range].low
5992 || v > spu_builtin_range[range].high)
5993 error ("%s expects an integer literal in the range [%d, %d]. ("
5994 HOST_WIDE_INT_PRINT_DEC ")",
5995 d->name,
5996 spu_builtin_range[range].low, spu_builtin_range[range].high,
5997 v);
6352eedf 5998
5999 switch (p)
6000 {
6001 case SPU_BTI_S10_4:
6002 lsbits = 4;
6003 break;
6004 case SPU_BTI_U16_2:
6005 /* This is only used in lqa, and stqa. Even though the insns
6006 encode 16 bits of the address (all but the 2 least
6007 significant), only 14 bits are used because it is masked to
6008 be 16 byte aligned. */
6009 lsbits = 4;
6010 break;
6011 case SPU_BTI_S16_2:
6012 /* This is used for lqr and stqr. */
6013 lsbits = 2;
6014 break;
6015 default:
6016 lsbits = 0;
6017 }
6018
6019 if (GET_CODE (op) == LABEL_REF
6020 || (GET_CODE (op) == SYMBOL_REF
6021 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6022 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 6023 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6024 d->name);
6025 }
6026}
6027
6028
70ca06f8 6029static int
5df189be 6030expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6031 rtx target, rtx ops[])
6032{
bc620c5c 6033 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6034 int i = 0, a;
6352eedf 6035
6036 /* Expand the arguments into rtl. */
6037
6038 if (d->parm[0] != SPU_BTI_VOID)
6039 ops[i++] = target;
6040
70ca06f8 6041 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6042 {
5df189be 6043 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6044 if (arg == 0)
6045 abort ();
b9c74b4d 6046 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6047 }
70ca06f8 6048
6049 /* The insn pattern may have additional operands (SCRATCH).
6050 Return the number of actual non-SCRATCH operands. */
6051 gcc_assert (i <= insn_data[icode].n_operands);
6052 return i;
6352eedf 6053}
6054
6055static rtx
6056spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6057 tree exp, rtx target)
6352eedf 6058{
6059 rtx pat;
6060 rtx ops[8];
bc620c5c 6061 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6062 enum machine_mode mode, tmode;
6063 int i, p;
70ca06f8 6064 int n_operands;
6352eedf 6065 tree return_type;
6066
6067 /* Set up ops[] with values from arglist. */
70ca06f8 6068 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6069
6070 /* Handle the target operand which must be operand 0. */
6071 i = 0;
6072 if (d->parm[0] != SPU_BTI_VOID)
6073 {
6074
6075 /* We prefer the mode specified for the match_operand otherwise
6076 use the mode from the builtin function prototype. */
6077 tmode = insn_data[d->icode].operand[0].mode;
6078 if (tmode == VOIDmode)
6079 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6080
6081 /* Try to use target because not using it can lead to extra copies
6082 and when we are using all of the registers extra copies leads
6083 to extra spills. */
6084 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6085 ops[0] = target;
6086 else
6087 target = ops[0] = gen_reg_rtx (tmode);
6088
6089 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6090 abort ();
6091
6092 i++;
6093 }
6094
a76866d3 6095 if (d->fcode == SPU_MASK_FOR_LOAD)
6096 {
6097 enum machine_mode mode = insn_data[icode].operand[1].mode;
6098 tree arg;
6099 rtx addr, op, pat;
6100
6101 /* get addr */
5df189be 6102 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 6103 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6104 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6105 addr = memory_address (mode, op);
6106
6107 /* negate addr */
6108 op = gen_reg_rtx (GET_MODE (addr));
6109 emit_insn (gen_rtx_SET (VOIDmode, op,
6110 gen_rtx_NEG (GET_MODE (addr), addr)));
6111 op = gen_rtx_MEM (mode, op);
6112
6113 pat = GEN_FCN (icode) (target, op);
6114 if (!pat)
6115 return 0;
6116 emit_insn (pat);
6117 return target;
6118 }
6119
6352eedf 6120 /* Ignore align_hint, but still expand it's args in case they have
6121 side effects. */
6122 if (icode == CODE_FOR_spu_align_hint)
6123 return 0;
6124
6125 /* Handle the rest of the operands. */
70ca06f8 6126 for (p = 1; i < n_operands; i++, p++)
6352eedf 6127 {
6128 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6129 mode = insn_data[d->icode].operand[i].mode;
6130 else
6131 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6132
6133 /* mode can be VOIDmode here for labels */
6134
6135 /* For specific intrinsics with an immediate operand, e.g.,
6136 si_ai(), we sometimes need to convert the scalar argument to a
6137 vector argument by splatting the scalar. */
6138 if (VECTOR_MODE_P (mode)
6139 && (GET_CODE (ops[i]) == CONST_INT
6140 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6141 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6142 {
6143 if (GET_CODE (ops[i]) == CONST_INT)
6144 ops[i] = spu_const (mode, INTVAL (ops[i]));
6145 else
6146 {
6147 rtx reg = gen_reg_rtx (mode);
6148 enum machine_mode imode = GET_MODE_INNER (mode);
6149 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6150 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6151 if (imode != GET_MODE (ops[i]))
6152 ops[i] = convert_to_mode (imode, ops[i],
6153 TYPE_UNSIGNED (spu_builtin_types
6154 [d->parm[i]]));
6155 emit_insn (gen_spu_splats (reg, ops[i]));
6156 ops[i] = reg;
6157 }
6158 }
6159
5df189be 6160 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6161
6352eedf 6162 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6163 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6164 }
6165
70ca06f8 6166 switch (n_operands)
6352eedf 6167 {
6168 case 0:
6169 pat = GEN_FCN (icode) (0);
6170 break;
6171 case 1:
6172 pat = GEN_FCN (icode) (ops[0]);
6173 break;
6174 case 2:
6175 pat = GEN_FCN (icode) (ops[0], ops[1]);
6176 break;
6177 case 3:
6178 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6179 break;
6180 case 4:
6181 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6182 break;
6183 case 5:
6184 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6185 break;
6186 case 6:
6187 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6188 break;
6189 default:
6190 abort ();
6191 }
6192
6193 if (!pat)
6194 abort ();
6195
6196 if (d->type == B_CALL || d->type == B_BISLED)
6197 emit_call_insn (pat);
6198 else if (d->type == B_JUMP)
6199 {
6200 emit_jump_insn (pat);
6201 emit_barrier ();
6202 }
6203 else
6204 emit_insn (pat);
6205
6206 return_type = spu_builtin_types[d->parm[0]];
6207 if (d->parm[0] != SPU_BTI_VOID
6208 && GET_MODE (target) != TYPE_MODE (return_type))
6209 {
6210 /* target is the return value. It should always be the mode of
6211 the builtin function prototype. */
6212 target = spu_force_reg (TYPE_MODE (return_type), target);
6213 }
6214
6215 return target;
6216}
6217
6218rtx
6219spu_expand_builtin (tree exp,
6220 rtx target,
6221 rtx subtarget ATTRIBUTE_UNUSED,
6222 enum machine_mode mode ATTRIBUTE_UNUSED,
6223 int ignore ATTRIBUTE_UNUSED)
6224{
5df189be 6225 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6226 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6227 struct spu_builtin_description *d;
6228
6229 if (fcode < NUM_SPU_BUILTINS)
6230 {
6231 d = &spu_builtins[fcode];
6232
5df189be 6233 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6234 }
6235 abort ();
6236}
6237
e99f512d 6238/* Implement targetm.vectorize.builtin_mul_widen_even. */
6239static tree
6240spu_builtin_mul_widen_even (tree type)
6241{
e99f512d 6242 switch (TYPE_MODE (type))
6243 {
6244 case V8HImode:
6245 if (TYPE_UNSIGNED (type))
6246 return spu_builtins[SPU_MULE_0].fndecl;
6247 else
6248 return spu_builtins[SPU_MULE_1].fndecl;
6249 break;
6250 default:
6251 return NULL_TREE;
6252 }
6253}
6254
6255/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6256static tree
6257spu_builtin_mul_widen_odd (tree type)
6258{
6259 switch (TYPE_MODE (type))
6260 {
6261 case V8HImode:
6262 if (TYPE_UNSIGNED (type))
6263 return spu_builtins[SPU_MULO_1].fndecl;
6264 else
6265 return spu_builtins[SPU_MULO_0].fndecl;
6266 break;
6267 default:
6268 return NULL_TREE;
6269 }
6270}
6271
a76866d3 6272/* Implement targetm.vectorize.builtin_mask_for_load. */
6273static tree
6274spu_builtin_mask_for_load (void)
6275{
6276 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6277 gcc_assert (d);
6278 return d->fndecl;
6279}
5df189be 6280
a28df51d 6281/* Implement targetm.vectorize.builtin_vectorization_cost. */
6282static int
6283spu_builtin_vectorization_cost (bool runtime_test)
6284{
6285 /* If the branch of the runtime test is taken - i.e. - the vectorized
6286 version is skipped - this incurs a misprediction cost (because the
6287 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6288 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6289 when the vectorized version is executed. */
6290 if (runtime_test)
6291 return -19;
6292 else
6293 return 0;
6294}
6295
0e87db76 6296/* Return true iff, data reference of TYPE can reach vector alignment (16)
6297 after applying N number of iterations. This routine does not determine
6298 how may iterations are required to reach desired alignment. */
6299
6300static bool
a9f1838b 6301spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6302{
6303 if (is_packed)
6304 return false;
6305
6306 /* All other types are naturally aligned. */
6307 return true;
6308}
6309
a0515226 6310/* Implement targetm.vectorize.builtin_vec_perm. */
6311tree
6312spu_builtin_vec_perm (tree type, tree *mask_element_type)
6313{
6314 struct spu_builtin_description *d;
6315
6316 *mask_element_type = unsigned_char_type_node;
6317
6318 switch (TYPE_MODE (type))
6319 {
6320 case V16QImode:
6321 if (TYPE_UNSIGNED (type))
6322 d = &spu_builtins[SPU_SHUFFLE_0];
6323 else
6324 d = &spu_builtins[SPU_SHUFFLE_1];
6325 break;
6326
6327 case V8HImode:
6328 if (TYPE_UNSIGNED (type))
6329 d = &spu_builtins[SPU_SHUFFLE_2];
6330 else
6331 d = &spu_builtins[SPU_SHUFFLE_3];
6332 break;
6333
6334 case V4SImode:
6335 if (TYPE_UNSIGNED (type))
6336 d = &spu_builtins[SPU_SHUFFLE_4];
6337 else
6338 d = &spu_builtins[SPU_SHUFFLE_5];
6339 break;
6340
6341 case V2DImode:
6342 if (TYPE_UNSIGNED (type))
6343 d = &spu_builtins[SPU_SHUFFLE_6];
6344 else
6345 d = &spu_builtins[SPU_SHUFFLE_7];
6346 break;
6347
6348 case V4SFmode:
6349 d = &spu_builtins[SPU_SHUFFLE_8];
6350 break;
6351
6352 case V2DFmode:
6353 d = &spu_builtins[SPU_SHUFFLE_9];
6354 break;
6355
6356 default:
6357 return NULL_TREE;
6358 }
6359
6360 gcc_assert (d);
6361 return d->fndecl;
6362}
6363
d52fd16a 6364/* Count the total number of instructions in each pipe and return the
6365 maximum, which is used as the Minimum Iteration Interval (MII)
6366 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6367 -2 are instructions that can go in pipe0 or pipe1. */
6368static int
6369spu_sms_res_mii (struct ddg *g)
6370{
6371 int i;
6372 unsigned t[4] = {0, 0, 0, 0};
6373
6374 for (i = 0; i < g->num_nodes; i++)
6375 {
6376 rtx insn = g->nodes[i].insn;
6377 int p = get_pipe (insn) + 2;
6378
6379 assert (p >= 0);
6380 assert (p < 4);
6381
6382 t[p]++;
6383 if (dump_file && INSN_P (insn))
6384 fprintf (dump_file, "i%d %s %d %d\n",
6385 INSN_UID (insn),
6386 insn_data[INSN_CODE(insn)].name,
6387 p, t[p]);
6388 }
6389 if (dump_file)
6390 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6391
6392 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6393}
6394
6395
5df189be 6396void
6397spu_init_expanders (void)
6398{
6399 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6400 * frame_pointer_needed is true. We don't know that until we're
6401 * expanding the prologue. */
6402 if (cfun)
6403 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 6404}
6405
6406static enum machine_mode
6407spu_libgcc_cmp_return_mode (void)
6408{
6409
6410/* For SPU word mode is TI mode so it is better to use SImode
6411 for compare returns. */
6412 return SImode;
6413}
6414
6415static enum machine_mode
6416spu_libgcc_shift_count_mode (void)
6417{
6418/* For SPU word mode is TI mode so it is better to use SImode
6419 for shift counts. */
6420 return SImode;
6421}
5a976006 6422
6423/* An early place to adjust some flags after GCC has finished processing
6424 * them. */
6425static void
6426asm_file_start (void)
6427{
6428 /* Variable tracking should be run after all optimizations which
6429 change order of insns. It also needs a valid CFG. */
6430 spu_flag_var_tracking = flag_var_tracking;
6431 flag_var_tracking = 0;
6432
6433 default_file_start ();
6434}
6435
a08dfd55 6436/* Implement targetm.section_type_flags. */
6437static unsigned int
6438spu_section_type_flags (tree decl, const char *name, int reloc)
6439{
6440 /* .toe needs to have type @nobits. */
6441 if (strcmp (name, ".toe") == 0)
6442 return SECTION_BSS;
6443 return default_section_type_flags (decl, name, reloc);
6444}
c2233b46 6445
56c7bfc2 6446/* Generate a constant or register which contains 2^SCALE. We assume
6447 the result is valid for MODE. Currently, MODE must be V4SFmode and
6448 SCALE must be SImode. */
6449rtx
6450spu_gen_exp2 (enum machine_mode mode, rtx scale)
6451{
6452 gcc_assert (mode == V4SFmode);
6453 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6454 if (GET_CODE (scale) != CONST_INT)
6455 {
6456 /* unsigned int exp = (127 + scale) << 23;
6457 __vector float m = (__vector float) spu_splats (exp); */
6458 rtx reg = force_reg (SImode, scale);
6459 rtx exp = gen_reg_rtx (SImode);
6460 rtx mul = gen_reg_rtx (mode);
6461 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6462 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6463 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6464 return mul;
6465 }
6466 else
6467 {
6468 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6469 unsigned char arr[16];
6470 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6471 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6472 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6473 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6474 return array_to_constant (mode, arr);
6475 }
6476}
6477
c2233b46 6478#include "gt-spu.h"