]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2009-06-29 Richard Guenther <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cfaf579d 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
644459d0 51#include "machmode.h"
75a70cf9 52#include "gimple.h"
644459d0 53#include "tm-constrs.h"
d52fd16a 54#include "ddg.h"
5a976006 55#include "sbitmap.h"
56#include "timevar.h"
57#include "df.h"
6352eedf 58
59/* Builtin types, data and prototypes. */
c2233b46 60
61enum spu_builtin_type_index
62{
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112};
113
114#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
6352eedf 127struct spu_builtin_range
128{
129 int low, high;
130};
131
132static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145};
146
644459d0 147\f
148/* Target specific attribute specifications. */
149char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151/* Prototypes and external defs. */
152static void spu_init_builtins (void);
153static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
154static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 155static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
644459d0 156static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157static rtx get_pic_reg (void);
158static int need_to_save_reg (int regno, int saving);
159static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
162 rtx scratch);
163static void emit_nop_for_insn (rtx insn);
164static bool insn_clobbers_hbr (rtx insn);
165static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 166 int distance, sbitmap blocks);
5474166e 167static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
644459d0 169static rtx get_branch_target (rtx branch);
644459d0 170static void spu_machine_dependent_reorg (void);
171static int spu_sched_issue_rate (void);
172static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
173 int can_issue_more);
174static int get_pipe (rtx insn);
644459d0 175static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 176static void spu_sched_init_global (FILE *, int, int);
177static void spu_sched_init (FILE *, int, int);
178static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 179static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
180 int flags,
181 unsigned char *no_add_attrs);
182static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
183 int flags,
184 unsigned char *no_add_attrs);
185static int spu_naked_function_p (tree func);
fb80456a 186static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
644459d0 188static tree spu_build_builtin_va_list (void);
8a58ed0a 189static void spu_va_start (tree, rtx);
75a70cf9 190static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 192static int store_with_one_insn_p (rtx mem);
644459d0 193static int mem_is_padded_component_ref (rtx x);
9d98604b 194static int reg_aligned_for_addr (rtx x);
644459d0 195static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196static void spu_asm_globalize_label (FILE * file, const char *name);
197static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 198 int *total, bool speed);
644459d0 199static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200static void spu_init_libfuncs (void);
fb80456a 201static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 202static void fix_range (const char *);
69ced2d6 203static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 204static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
e99f512d 205static tree spu_builtin_mul_widen_even (tree);
206static tree spu_builtin_mul_widen_odd (tree);
a76866d3 207static tree spu_builtin_mask_for_load (void);
a28df51d 208static int spu_builtin_vectorization_cost (bool);
a9f1838b 209static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 210static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 211static int spu_sms_res_mii (struct ddg *g);
5a976006 212static void asm_file_start (void);
a08dfd55 213static unsigned int spu_section_type_flags (tree, const char *, int);
9d98604b 214static rtx spu_expand_load (rtx, rtx, rtx, int);
644459d0 215
216extern const char *reg_names[];
644459d0 217
5474166e 218/* Which instruction set architecture to use. */
219int spu_arch;
220/* Which cpu are we tuning for. */
221int spu_tune;
222
5a976006 223/* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229int spu_hint_dist = (8*4) - (2*4);
230
231/* Determines whether we run variable tracking in machine dependent
232 reorganization. */
233static int spu_flag_var_tracking;
234
644459d0 235enum spu_immediate {
236 SPU_NONE,
237 SPU_IL,
238 SPU_ILA,
239 SPU_ILH,
240 SPU_ILHU,
241 SPU_ORI,
242 SPU_ORHI,
243 SPU_ORBI,
99369027 244 SPU_IOHL
644459d0 245};
dea01258 246enum immediate_class
247{
248 IC_POOL, /* constant pool */
249 IC_IL1, /* one il* instruction */
250 IC_IL2, /* both ilhu and iohl instructions */
251 IC_IL1s, /* one il* instruction */
252 IC_IL2s, /* both ilhu and iohl instructions */
253 IC_FSMBI, /* the fsmbi instruction */
254 IC_CPAT, /* one of the c*d instructions */
5df189be 255 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 256};
644459d0 257
258static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
259static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 260static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
261static enum immediate_class classify_immediate (rtx op,
262 enum machine_mode mode);
644459d0 263
1bd43494 264static enum machine_mode spu_unwind_word_mode (void);
265
ea32e033 266static enum machine_mode
267spu_libgcc_cmp_return_mode (void);
268
269static enum machine_mode
270spu_libgcc_shift_count_mode (void);
ef51d1e3 271\f
272/* Table of machine attributes. */
273static const struct attribute_spec spu_attribute_table[] =
274{
275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
276 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
277 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
278 { NULL, 0, 0, false, false, false, NULL }
279};
644459d0 280\f
281/* TARGET overrides. */
282
283#undef TARGET_INIT_BUILTINS
284#define TARGET_INIT_BUILTINS spu_init_builtins
285
644459d0 286#undef TARGET_EXPAND_BUILTIN
287#define TARGET_EXPAND_BUILTIN spu_expand_builtin
288
1bd43494 289#undef TARGET_UNWIND_WORD_MODE
290#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 291
41e3a0c7 292#undef TARGET_LEGITIMIZE_ADDRESS
293#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
294
644459d0 295/* The .8byte directive doesn't seem to work well for a 32 bit
296 architecture. */
297#undef TARGET_ASM_UNALIGNED_DI_OP
298#define TARGET_ASM_UNALIGNED_DI_OP NULL
299
300#undef TARGET_RTX_COSTS
301#define TARGET_RTX_COSTS spu_rtx_costs
302
303#undef TARGET_ADDRESS_COST
f529eb25 304#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 305
306#undef TARGET_SCHED_ISSUE_RATE
307#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
308
5a976006 309#undef TARGET_SCHED_INIT_GLOBAL
310#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
311
312#undef TARGET_SCHED_INIT
313#define TARGET_SCHED_INIT spu_sched_init
314
644459d0 315#undef TARGET_SCHED_VARIABLE_ISSUE
316#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
317
5a976006 318#undef TARGET_SCHED_REORDER
319#define TARGET_SCHED_REORDER spu_sched_reorder
320
321#undef TARGET_SCHED_REORDER2
322#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 323
324#undef TARGET_SCHED_ADJUST_COST
325#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
326
644459d0 327#undef TARGET_ATTRIBUTE_TABLE
328#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
329
330#undef TARGET_ASM_INTEGER
331#define TARGET_ASM_INTEGER spu_assemble_integer
332
333#undef TARGET_SCALAR_MODE_SUPPORTED_P
334#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
335
336#undef TARGET_VECTOR_MODE_SUPPORTED_P
337#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
338
339#undef TARGET_FUNCTION_OK_FOR_SIBCALL
340#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
341
342#undef TARGET_ASM_GLOBALIZE_LABEL
343#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
344
345#undef TARGET_PASS_BY_REFERENCE
346#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
347
348#undef TARGET_MUST_PASS_IN_STACK
349#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
350
351#undef TARGET_BUILD_BUILTIN_VA_LIST
352#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
353
8a58ed0a 354#undef TARGET_EXPAND_BUILTIN_VA_START
355#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
356
644459d0 357#undef TARGET_SETUP_INCOMING_VARARGS
358#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
359
360#undef TARGET_MACHINE_DEPENDENT_REORG
361#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
362
363#undef TARGET_GIMPLIFY_VA_ARG_EXPR
364#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
365
366#undef TARGET_DEFAULT_TARGET_FLAGS
367#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
368
369#undef TARGET_INIT_LIBFUNCS
370#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
371
372#undef TARGET_RETURN_IN_MEMORY
373#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
374
69ced2d6 375#undef TARGET_ENCODE_SECTION_INFO
376#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
377
e99f512d 378#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
379#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
380
381#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
382#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
383
a76866d3 384#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
385#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
386
a28df51d 387#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
388#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
389
0e87db76 390#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
391#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
392
a0515226 393#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
394#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
395
ea32e033 396#undef TARGET_LIBGCC_CMP_RETURN_MODE
397#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
398
399#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
400#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
401
d52fd16a 402#undef TARGET_SCHED_SMS_RES_MII
403#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
404
5a976006 405#undef TARGET_ASM_FILE_START
406#define TARGET_ASM_FILE_START asm_file_start
407
a08dfd55 408#undef TARGET_SECTION_TYPE_FLAGS
409#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
410
fd50b071 411#undef TARGET_LEGITIMATE_ADDRESS_P
412#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
413
644459d0 414struct gcc_target targetm = TARGET_INITIALIZER;
415
5df189be 416void
417spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
418{
5df189be 419 /* Override some of the default param values. With so many registers
420 larger values are better for these params. */
421 MAX_PENDING_LIST_LENGTH = 128;
422
423 /* With so many registers this is better on by default. */
424 flag_rename_registers = 1;
425}
426
644459d0 427/* Sometimes certain combinations of command options do not make sense
428 on a particular target machine. You can define a macro
429 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
430 executed once just after all the command options have been parsed. */
431void
432spu_override_options (void)
433{
14d408d9 434 /* Small loops will be unpeeled at -O3. For SPU it is more important
435 to keep code small by default. */
436 if (!flag_unroll_loops && !flag_peel_loops
437 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
438 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
439
644459d0 440 flag_omit_frame_pointer = 1;
441
5a976006 442 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 443 if (align_functions < 8)
444 align_functions = 8;
c7b91b14 445
5a976006 446 spu_hint_dist = 8*4 - spu_max_nops*4;
447 if (spu_hint_dist < 0)
448 spu_hint_dist = 0;
449
c7b91b14 450 if (spu_fixed_range_string)
451 fix_range (spu_fixed_range_string);
5474166e 452
453 /* Determine processor architectural level. */
454 if (spu_arch_string)
455 {
456 if (strcmp (&spu_arch_string[0], "cell") == 0)
457 spu_arch = PROCESSOR_CELL;
458 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
459 spu_arch = PROCESSOR_CELLEDP;
460 else
461 error ("Unknown architecture '%s'", &spu_arch_string[0]);
462 }
463
464 /* Determine processor to tune for. */
465 if (spu_tune_string)
466 {
467 if (strcmp (&spu_tune_string[0], "cell") == 0)
468 spu_tune = PROCESSOR_CELL;
469 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
470 spu_tune = PROCESSOR_CELLEDP;
471 else
472 error ("Unknown architecture '%s'", &spu_tune_string[0]);
473 }
98bbec1e 474
13684256 475 /* Change defaults according to the processor architecture. */
476 if (spu_arch == PROCESSOR_CELLEDP)
477 {
478 /* If no command line option has been otherwise specified, change
479 the default to -mno-safe-hints on celledp -- only the original
480 Cell/B.E. processors require this workaround. */
481 if (!(target_flags_explicit & MASK_SAFE_HINTS))
482 target_flags &= ~MASK_SAFE_HINTS;
483 }
484
98bbec1e 485 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 486}
487\f
488/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
489 struct attribute_spec.handler. */
490
644459d0 491/* True if MODE is valid for the target. By "valid", we mean able to
492 be manipulated in non-trivial ways. In particular, this means all
493 the arithmetic is supported. */
494static bool
495spu_scalar_mode_supported_p (enum machine_mode mode)
496{
497 switch (mode)
498 {
499 case QImode:
500 case HImode:
501 case SImode:
502 case SFmode:
503 case DImode:
504 case TImode:
505 case DFmode:
506 return true;
507
508 default:
509 return false;
510 }
511}
512
513/* Similarly for vector modes. "Supported" here is less strict. At
514 least some operations are supported; need to check optabs or builtins
515 for further details. */
516static bool
517spu_vector_mode_supported_p (enum machine_mode mode)
518{
519 switch (mode)
520 {
521 case V16QImode:
522 case V8HImode:
523 case V4SImode:
524 case V2DImode:
525 case V4SFmode:
526 case V2DFmode:
527 return true;
528
529 default:
530 return false;
531 }
532}
533
534/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
535 least significant bytes of the outer mode. This function returns
536 TRUE for the SUBREG's where this is correct. */
537int
538valid_subreg (rtx op)
539{
540 enum machine_mode om = GET_MODE (op);
541 enum machine_mode im = GET_MODE (SUBREG_REG (op));
542 return om != VOIDmode && im != VOIDmode
543 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 544 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
545 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 546}
547
548/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 549 and adjust the start offset. */
644459d0 550static rtx
551adjust_operand (rtx op, HOST_WIDE_INT * start)
552{
553 enum machine_mode mode;
554 int op_size;
38aca5eb 555 /* Strip any paradoxical SUBREG. */
556 if (GET_CODE (op) == SUBREG
557 && (GET_MODE_BITSIZE (GET_MODE (op))
558 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 559 {
560 if (start)
561 *start -=
562 GET_MODE_BITSIZE (GET_MODE (op)) -
563 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
564 op = SUBREG_REG (op);
565 }
566 /* If it is smaller than SI, assure a SUBREG */
567 op_size = GET_MODE_BITSIZE (GET_MODE (op));
568 if (op_size < 32)
569 {
570 if (start)
571 *start += 32 - op_size;
572 op_size = 32;
573 }
574 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
575 mode = mode_for_size (op_size, MODE_INT, 0);
576 if (mode != GET_MODE (op))
577 op = gen_rtx_SUBREG (mode, op, 0);
578 return op;
579}
580
581void
582spu_expand_extv (rtx ops[], int unsignedp)
583{
9d98604b 584 rtx dst = ops[0], src = ops[1];
644459d0 585 HOST_WIDE_INT width = INTVAL (ops[2]);
586 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 587 HOST_WIDE_INT align_mask;
588 rtx s0, s1, mask, r0;
644459d0 589
9d98604b 590 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 591
9d98604b 592 if (MEM_P (src))
644459d0 593 {
9d98604b 594 /* First, determine if we need 1 TImode load or 2. We need only 1
595 if the bits being extracted do not cross the alignment boundary
596 as determined by the MEM and its address. */
597
598 align_mask = -MEM_ALIGN (src);
599 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 600 {
9d98604b 601 /* Alignment is sufficient for 1 load. */
602 s0 = gen_reg_rtx (TImode);
603 r0 = spu_expand_load (s0, 0, src, start / 8);
604 start &= 7;
605 if (r0)
606 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 607 }
9d98604b 608 else
609 {
610 /* Need 2 loads. */
611 s0 = gen_reg_rtx (TImode);
612 s1 = gen_reg_rtx (TImode);
613 r0 = spu_expand_load (s0, s1, src, start / 8);
614 start &= 7;
615
616 gcc_assert (start + width <= 128);
617 if (r0)
618 {
619 rtx r1 = gen_reg_rtx (SImode);
620 mask = gen_reg_rtx (TImode);
621 emit_move_insn (mask, GEN_INT (-1));
622 emit_insn (gen_rotqby_ti (s0, s0, r0));
623 emit_insn (gen_rotqby_ti (s1, s1, r0));
624 if (GET_CODE (r0) == CONST_INT)
625 r1 = GEN_INT (INTVAL (r0) & 15);
626 else
627 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
628 emit_insn (gen_shlqby_ti (mask, mask, r1));
629 emit_insn (gen_selb (s0, s1, s0, mask));
630 }
631 }
632
633 }
634 else if (GET_CODE (src) == SUBREG)
635 {
636 rtx r = SUBREG_REG (src);
637 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
638 s0 = gen_reg_rtx (TImode);
639 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
640 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
641 else
642 emit_move_insn (s0, src);
643 }
644 else
645 {
646 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
647 s0 = gen_reg_rtx (TImode);
648 emit_move_insn (s0, src);
644459d0 649 }
650
9d98604b 651 /* Now s0 is TImode and contains the bits to extract at start. */
652
653 if (start)
654 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
655
656 if (128 - width)
644459d0 657 {
9d98604b 658 tree c = build_int_cst (NULL_TREE, 128 - width);
659 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 660 }
661
9d98604b 662 emit_move_insn (dst, s0);
644459d0 663}
664
665void
666spu_expand_insv (rtx ops[])
667{
668 HOST_WIDE_INT width = INTVAL (ops[1]);
669 HOST_WIDE_INT start = INTVAL (ops[2]);
670 HOST_WIDE_INT maskbits;
671 enum machine_mode dst_mode, src_mode;
672 rtx dst = ops[0], src = ops[3];
673 int dst_size, src_size;
674 rtx mask;
675 rtx shift_reg;
676 int shift;
677
678
679 if (GET_CODE (ops[0]) == MEM)
680 dst = gen_reg_rtx (TImode);
681 else
682 dst = adjust_operand (dst, &start);
683 dst_mode = GET_MODE (dst);
684 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
685
686 if (CONSTANT_P (src))
687 {
688 enum machine_mode m =
689 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
690 src = force_reg (m, convert_to_mode (m, src, 0));
691 }
692 src = adjust_operand (src, 0);
693 src_mode = GET_MODE (src);
694 src_size = GET_MODE_BITSIZE (GET_MODE (src));
695
696 mask = gen_reg_rtx (dst_mode);
697 shift_reg = gen_reg_rtx (dst_mode);
698 shift = dst_size - start - width;
699
700 /* It's not safe to use subreg here because the compiler assumes
701 that the SUBREG_REG is right justified in the SUBREG. */
702 convert_move (shift_reg, src, 1);
703
704 if (shift > 0)
705 {
706 switch (dst_mode)
707 {
708 case SImode:
709 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
710 break;
711 case DImode:
712 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
713 break;
714 case TImode:
715 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
716 break;
717 default:
718 abort ();
719 }
720 }
721 else if (shift < 0)
722 abort ();
723
724 switch (dst_size)
725 {
726 case 32:
727 maskbits = (-1ll << (32 - width - start));
728 if (start)
729 maskbits += (1ll << (32 - start));
730 emit_move_insn (mask, GEN_INT (maskbits));
731 break;
732 case 64:
733 maskbits = (-1ll << (64 - width - start));
734 if (start)
735 maskbits += (1ll << (64 - start));
736 emit_move_insn (mask, GEN_INT (maskbits));
737 break;
738 case 128:
739 {
740 unsigned char arr[16];
741 int i = start / 8;
742 memset (arr, 0, sizeof (arr));
743 arr[i] = 0xff >> (start & 7);
744 for (i++; i <= (start + width - 1) / 8; i++)
745 arr[i] = 0xff;
746 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
747 emit_move_insn (mask, array_to_constant (TImode, arr));
748 }
749 break;
750 default:
751 abort ();
752 }
753 if (GET_CODE (ops[0]) == MEM)
754 {
644459d0 755 rtx low = gen_reg_rtx (SImode);
644459d0 756 rtx rotl = gen_reg_rtx (SImode);
757 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 758 rtx addr;
759 rtx addr0;
760 rtx addr1;
644459d0 761 rtx mem;
762
9d98604b 763 addr = force_reg (Pmode, XEXP (ops[0], 0));
764 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 765 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
766 emit_insn (gen_negsi2 (rotl, low));
767 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
768 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 769 mem = change_address (ops[0], TImode, addr0);
644459d0 770 set_mem_alias_set (mem, 0);
771 emit_move_insn (dst, mem);
772 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 773 if (start + width > MEM_ALIGN (ops[0]))
774 {
775 rtx shl = gen_reg_rtx (SImode);
776 rtx mask1 = gen_reg_rtx (TImode);
777 rtx dst1 = gen_reg_rtx (TImode);
778 rtx mem1;
9d98604b 779 addr1 = plus_constant (addr, 16);
780 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 781 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
782 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 783 mem1 = change_address (ops[0], TImode, addr1);
644459d0 784 set_mem_alias_set (mem1, 0);
785 emit_move_insn (dst1, mem1);
786 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
787 emit_move_insn (mem1, dst1);
788 }
9d98604b 789 emit_move_insn (mem, dst);
644459d0 790 }
791 else
71cd778d 792 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 793}
794
795
796int
797spu_expand_block_move (rtx ops[])
798{
799 HOST_WIDE_INT bytes, align, offset;
800 rtx src, dst, sreg, dreg, target;
801 int i;
802 if (GET_CODE (ops[2]) != CONST_INT
803 || GET_CODE (ops[3]) != CONST_INT
48eb4342 804 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 805 return 0;
806
807 bytes = INTVAL (ops[2]);
808 align = INTVAL (ops[3]);
809
810 if (bytes <= 0)
811 return 1;
812
813 dst = ops[0];
814 src = ops[1];
815
816 if (align == 16)
817 {
818 for (offset = 0; offset + 16 <= bytes; offset += 16)
819 {
820 dst = adjust_address (ops[0], V16QImode, offset);
821 src = adjust_address (ops[1], V16QImode, offset);
822 emit_move_insn (dst, src);
823 }
824 if (offset < bytes)
825 {
826 rtx mask;
827 unsigned char arr[16] = { 0 };
828 for (i = 0; i < bytes - offset; i++)
829 arr[i] = 0xff;
830 dst = adjust_address (ops[0], V16QImode, offset);
831 src = adjust_address (ops[1], V16QImode, offset);
832 mask = gen_reg_rtx (V16QImode);
833 sreg = gen_reg_rtx (V16QImode);
834 dreg = gen_reg_rtx (V16QImode);
835 target = gen_reg_rtx (V16QImode);
836 emit_move_insn (mask, array_to_constant (V16QImode, arr));
837 emit_move_insn (dreg, dst);
838 emit_move_insn (sreg, src);
839 emit_insn (gen_selb (target, dreg, sreg, mask));
840 emit_move_insn (dst, target);
841 }
842 return 1;
843 }
844 return 0;
845}
846
847enum spu_comp_code
848{ SPU_EQ, SPU_GT, SPU_GTU };
849
5474166e 850int spu_comp_icode[12][3] = {
851 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
852 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
853 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
854 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
855 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
856 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
857 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
858 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
859 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
860 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
861 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
862 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 863};
864
865/* Generate a compare for CODE. Return a brand-new rtx that represents
866 the result of the compare. GCC can figure this out too if we don't
867 provide all variations of compares, but GCC always wants to use
868 WORD_MODE, we can generate better code in most cases if we do it
869 ourselves. */
870void
74f4459c 871spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 872{
873 int reverse_compare = 0;
874 int reverse_test = 0;
5d70b918 875 rtx compare_result, eq_result;
876 rtx comp_rtx, eq_rtx;
644459d0 877 enum machine_mode comp_mode;
878 enum machine_mode op_mode;
b9c74b4d 879 enum spu_comp_code scode, eq_code;
880 enum insn_code ior_code;
74f4459c 881 enum rtx_code code = GET_CODE (cmp);
882 rtx op0 = XEXP (cmp, 0);
883 rtx op1 = XEXP (cmp, 1);
644459d0 884 int index;
5d70b918 885 int eq_test = 0;
644459d0 886
74f4459c 887 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 888 and so on, to keep the constant in operand 1. */
74f4459c 889 if (GET_CODE (op1) == CONST_INT)
644459d0 890 {
74f4459c 891 HOST_WIDE_INT val = INTVAL (op1) - 1;
892 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 893 switch (code)
894 {
895 case GE:
74f4459c 896 op1 = GEN_INT (val);
644459d0 897 code = GT;
898 break;
899 case LT:
74f4459c 900 op1 = GEN_INT (val);
644459d0 901 code = LE;
902 break;
903 case GEU:
74f4459c 904 op1 = GEN_INT (val);
644459d0 905 code = GTU;
906 break;
907 case LTU:
74f4459c 908 op1 = GEN_INT (val);
644459d0 909 code = LEU;
910 break;
911 default:
912 break;
913 }
914 }
915
5d70b918 916 comp_mode = SImode;
74f4459c 917 op_mode = GET_MODE (op0);
5d70b918 918
644459d0 919 switch (code)
920 {
921 case GE:
644459d0 922 scode = SPU_GT;
07027691 923 if (HONOR_NANS (op_mode))
5d70b918 924 {
925 reverse_compare = 0;
926 reverse_test = 0;
927 eq_test = 1;
928 eq_code = SPU_EQ;
929 }
930 else
931 {
932 reverse_compare = 1;
933 reverse_test = 1;
934 }
644459d0 935 break;
936 case LE:
644459d0 937 scode = SPU_GT;
07027691 938 if (HONOR_NANS (op_mode))
5d70b918 939 {
940 reverse_compare = 1;
941 reverse_test = 0;
942 eq_test = 1;
943 eq_code = SPU_EQ;
944 }
945 else
946 {
947 reverse_compare = 0;
948 reverse_test = 1;
949 }
644459d0 950 break;
951 case LT:
952 reverse_compare = 1;
953 reverse_test = 0;
954 scode = SPU_GT;
955 break;
956 case GEU:
957 reverse_compare = 1;
958 reverse_test = 1;
959 scode = SPU_GTU;
960 break;
961 case LEU:
962 reverse_compare = 0;
963 reverse_test = 1;
964 scode = SPU_GTU;
965 break;
966 case LTU:
967 reverse_compare = 1;
968 reverse_test = 0;
969 scode = SPU_GTU;
970 break;
971 case NE:
972 reverse_compare = 0;
973 reverse_test = 1;
974 scode = SPU_EQ;
975 break;
976
977 case EQ:
978 scode = SPU_EQ;
979 break;
980 case GT:
981 scode = SPU_GT;
982 break;
983 case GTU:
984 scode = SPU_GTU;
985 break;
986 default:
987 scode = SPU_EQ;
988 break;
989 }
990
644459d0 991 switch (op_mode)
992 {
993 case QImode:
994 index = 0;
995 comp_mode = QImode;
996 break;
997 case HImode:
998 index = 1;
999 comp_mode = HImode;
1000 break;
1001 case SImode:
1002 index = 2;
1003 break;
1004 case DImode:
1005 index = 3;
1006 break;
1007 case TImode:
1008 index = 4;
1009 break;
1010 case SFmode:
1011 index = 5;
1012 break;
1013 case DFmode:
1014 index = 6;
1015 break;
1016 case V16QImode:
5474166e 1017 index = 7;
1018 comp_mode = op_mode;
1019 break;
644459d0 1020 case V8HImode:
5474166e 1021 index = 8;
1022 comp_mode = op_mode;
1023 break;
644459d0 1024 case V4SImode:
5474166e 1025 index = 9;
1026 comp_mode = op_mode;
1027 break;
644459d0 1028 case V4SFmode:
5474166e 1029 index = 10;
1030 comp_mode = V4SImode;
1031 break;
644459d0 1032 case V2DFmode:
5474166e 1033 index = 11;
1034 comp_mode = V2DImode;
644459d0 1035 break;
5474166e 1036 case V2DImode:
644459d0 1037 default:
1038 abort ();
1039 }
1040
74f4459c 1041 if (GET_MODE (op1) == DFmode
07027691 1042 && (scode != SPU_GT && scode != SPU_EQ))
1043 abort ();
644459d0 1044
74f4459c 1045 if (is_set == 0 && op1 == const0_rtx
1046 && (GET_MODE (op0) == SImode
1047 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1048 {
1049 /* Don't need to set a register with the result when we are
1050 comparing against zero and branching. */
1051 reverse_test = !reverse_test;
74f4459c 1052 compare_result = op0;
644459d0 1053 }
1054 else
1055 {
1056 compare_result = gen_reg_rtx (comp_mode);
1057
1058 if (reverse_compare)
1059 {
74f4459c 1060 rtx t = op1;
1061 op1 = op0;
1062 op0 = t;
644459d0 1063 }
1064
1065 if (spu_comp_icode[index][scode] == 0)
1066 abort ();
1067
1068 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1069 (op0, op_mode))
1070 op0 = force_reg (op_mode, op0);
644459d0 1071 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1072 (op1, op_mode))
1073 op1 = force_reg (op_mode, op1);
644459d0 1074 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1075 op0, op1);
644459d0 1076 if (comp_rtx == 0)
1077 abort ();
1078 emit_insn (comp_rtx);
1079
5d70b918 1080 if (eq_test)
1081 {
1082 eq_result = gen_reg_rtx (comp_mode);
1083 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1084 op0, op1);
5d70b918 1085 if (eq_rtx == 0)
1086 abort ();
1087 emit_insn (eq_rtx);
1088 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1089 gcc_assert (ior_code != CODE_FOR_nothing);
1090 emit_insn (GEN_FCN (ior_code)
1091 (compare_result, compare_result, eq_result));
1092 }
644459d0 1093 }
1094
1095 if (is_set == 0)
1096 {
1097 rtx bcomp;
1098 rtx loc_ref;
1099
1100 /* We don't have branch on QI compare insns, so we convert the
1101 QI compare result to a HI result. */
1102 if (comp_mode == QImode)
1103 {
1104 rtx old_res = compare_result;
1105 compare_result = gen_reg_rtx (HImode);
1106 comp_mode = HImode;
1107 emit_insn (gen_extendqihi2 (compare_result, old_res));
1108 }
1109
1110 if (reverse_test)
1111 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1112 else
1113 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1114
74f4459c 1115 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1116 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1117 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1118 loc_ref, pc_rtx)));
1119 }
1120 else if (is_set == 2)
1121 {
74f4459c 1122 rtx target = operands[0];
644459d0 1123 int compare_size = GET_MODE_BITSIZE (comp_mode);
1124 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1125 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1126 rtx select_mask;
1127 rtx op_t = operands[2];
1128 rtx op_f = operands[3];
1129
1130 /* The result of the comparison can be SI, HI or QI mode. Create a
1131 mask based on that result. */
1132 if (target_size > compare_size)
1133 {
1134 select_mask = gen_reg_rtx (mode);
1135 emit_insn (gen_extend_compare (select_mask, compare_result));
1136 }
1137 else if (target_size < compare_size)
1138 select_mask =
1139 gen_rtx_SUBREG (mode, compare_result,
1140 (compare_size - target_size) / BITS_PER_UNIT);
1141 else if (comp_mode != mode)
1142 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1143 else
1144 select_mask = compare_result;
1145
1146 if (GET_MODE (target) != GET_MODE (op_t)
1147 || GET_MODE (target) != GET_MODE (op_f))
1148 abort ();
1149
1150 if (reverse_test)
1151 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1152 else
1153 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1154 }
1155 else
1156 {
74f4459c 1157 rtx target = operands[0];
644459d0 1158 if (reverse_test)
1159 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1160 gen_rtx_NOT (comp_mode, compare_result)));
1161 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1162 emit_insn (gen_extendhisi2 (target, compare_result));
1163 else if (GET_MODE (target) == SImode
1164 && GET_MODE (compare_result) == QImode)
1165 emit_insn (gen_extend_compare (target, compare_result));
1166 else
1167 emit_move_insn (target, compare_result);
1168 }
1169}
1170
1171HOST_WIDE_INT
1172const_double_to_hwint (rtx x)
1173{
1174 HOST_WIDE_INT val;
1175 REAL_VALUE_TYPE rv;
1176 if (GET_MODE (x) == SFmode)
1177 {
1178 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1179 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1180 }
1181 else if (GET_MODE (x) == DFmode)
1182 {
1183 long l[2];
1184 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1185 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1186 val = l[0];
1187 val = (val << 32) | (l[1] & 0xffffffff);
1188 }
1189 else
1190 abort ();
1191 return val;
1192}
1193
1194rtx
1195hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1196{
1197 long tv[2];
1198 REAL_VALUE_TYPE rv;
1199 gcc_assert (mode == SFmode || mode == DFmode);
1200
1201 if (mode == SFmode)
1202 tv[0] = (v << 32) >> 32;
1203 else if (mode == DFmode)
1204 {
1205 tv[1] = (v << 32) >> 32;
1206 tv[0] = v >> 32;
1207 }
1208 real_from_target (&rv, tv, mode);
1209 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1210}
1211
1212void
1213print_operand_address (FILE * file, register rtx addr)
1214{
1215 rtx reg;
1216 rtx offset;
1217
e04cf423 1218 if (GET_CODE (addr) == AND
1219 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1220 && INTVAL (XEXP (addr, 1)) == -16)
1221 addr = XEXP (addr, 0);
1222
644459d0 1223 switch (GET_CODE (addr))
1224 {
1225 case REG:
1226 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1227 break;
1228
1229 case PLUS:
1230 reg = XEXP (addr, 0);
1231 offset = XEXP (addr, 1);
1232 if (GET_CODE (offset) == REG)
1233 {
1234 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1235 reg_names[REGNO (offset)]);
1236 }
1237 else if (GET_CODE (offset) == CONST_INT)
1238 {
1239 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1240 INTVAL (offset), reg_names[REGNO (reg)]);
1241 }
1242 else
1243 abort ();
1244 break;
1245
1246 case CONST:
1247 case LABEL_REF:
1248 case SYMBOL_REF:
1249 case CONST_INT:
1250 output_addr_const (file, addr);
1251 break;
1252
1253 default:
1254 debug_rtx (addr);
1255 abort ();
1256 }
1257}
1258
1259void
1260print_operand (FILE * file, rtx x, int code)
1261{
1262 enum machine_mode mode = GET_MODE (x);
1263 HOST_WIDE_INT val;
1264 unsigned char arr[16];
1265 int xcode = GET_CODE (x);
dea01258 1266 int i, info;
644459d0 1267 if (GET_MODE (x) == VOIDmode)
1268 switch (code)
1269 {
644459d0 1270 case 'L': /* 128 bits, signed */
1271 case 'm': /* 128 bits, signed */
1272 case 'T': /* 128 bits, signed */
1273 case 't': /* 128 bits, signed */
1274 mode = TImode;
1275 break;
644459d0 1276 case 'K': /* 64 bits, signed */
1277 case 'k': /* 64 bits, signed */
1278 case 'D': /* 64 bits, signed */
1279 case 'd': /* 64 bits, signed */
1280 mode = DImode;
1281 break;
644459d0 1282 case 'J': /* 32 bits, signed */
1283 case 'j': /* 32 bits, signed */
1284 case 's': /* 32 bits, signed */
1285 case 'S': /* 32 bits, signed */
1286 mode = SImode;
1287 break;
1288 }
1289 switch (code)
1290 {
1291
1292 case 'j': /* 32 bits, signed */
1293 case 'k': /* 64 bits, signed */
1294 case 'm': /* 128 bits, signed */
1295 if (xcode == CONST_INT
1296 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1297 {
1298 gcc_assert (logical_immediate_p (x, mode));
1299 constant_to_array (mode, x, arr);
1300 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1301 val = trunc_int_for_mode (val, SImode);
1302 switch (which_logical_immediate (val))
1303 {
1304 case SPU_ORI:
1305 break;
1306 case SPU_ORHI:
1307 fprintf (file, "h");
1308 break;
1309 case SPU_ORBI:
1310 fprintf (file, "b");
1311 break;
1312 default:
1313 gcc_unreachable();
1314 }
1315 }
1316 else
1317 gcc_unreachable();
1318 return;
1319
1320 case 'J': /* 32 bits, signed */
1321 case 'K': /* 64 bits, signed */
1322 case 'L': /* 128 bits, signed */
1323 if (xcode == CONST_INT
1324 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1325 {
1326 gcc_assert (logical_immediate_p (x, mode)
1327 || iohl_immediate_p (x, mode));
1328 constant_to_array (mode, x, arr);
1329 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1330 val = trunc_int_for_mode (val, SImode);
1331 switch (which_logical_immediate (val))
1332 {
1333 case SPU_ORI:
1334 case SPU_IOHL:
1335 break;
1336 case SPU_ORHI:
1337 val = trunc_int_for_mode (val, HImode);
1338 break;
1339 case SPU_ORBI:
1340 val = trunc_int_for_mode (val, QImode);
1341 break;
1342 default:
1343 gcc_unreachable();
1344 }
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1346 }
1347 else
1348 gcc_unreachable();
1349 return;
1350
1351 case 't': /* 128 bits, signed */
1352 case 'd': /* 64 bits, signed */
1353 case 's': /* 32 bits, signed */
dea01258 1354 if (CONSTANT_P (x))
644459d0 1355 {
dea01258 1356 enum immediate_class c = classify_immediate (x, mode);
1357 switch (c)
1358 {
1359 case IC_IL1:
1360 constant_to_array (mode, x, arr);
1361 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1362 val = trunc_int_for_mode (val, SImode);
1363 switch (which_immediate_load (val))
1364 {
1365 case SPU_IL:
1366 break;
1367 case SPU_ILA:
1368 fprintf (file, "a");
1369 break;
1370 case SPU_ILH:
1371 fprintf (file, "h");
1372 break;
1373 case SPU_ILHU:
1374 fprintf (file, "hu");
1375 break;
1376 default:
1377 gcc_unreachable ();
1378 }
1379 break;
1380 case IC_CPAT:
1381 constant_to_array (mode, x, arr);
1382 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1383 if (info == 1)
1384 fprintf (file, "b");
1385 else if (info == 2)
1386 fprintf (file, "h");
1387 else if (info == 4)
1388 fprintf (file, "w");
1389 else if (info == 8)
1390 fprintf (file, "d");
1391 break;
1392 case IC_IL1s:
1393 if (xcode == CONST_VECTOR)
1394 {
1395 x = CONST_VECTOR_ELT (x, 0);
1396 xcode = GET_CODE (x);
1397 }
1398 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1399 fprintf (file, "a");
1400 else if (xcode == HIGH)
1401 fprintf (file, "hu");
1402 break;
1403 case IC_FSMBI:
5df189be 1404 case IC_FSMBI2:
dea01258 1405 case IC_IL2:
1406 case IC_IL2s:
1407 case IC_POOL:
1408 abort ();
1409 }
644459d0 1410 }
644459d0 1411 else
1412 gcc_unreachable ();
1413 return;
1414
1415 case 'T': /* 128 bits, signed */
1416 case 'D': /* 64 bits, signed */
1417 case 'S': /* 32 bits, signed */
dea01258 1418 if (CONSTANT_P (x))
644459d0 1419 {
dea01258 1420 enum immediate_class c = classify_immediate (x, mode);
1421 switch (c)
644459d0 1422 {
dea01258 1423 case IC_IL1:
1424 constant_to_array (mode, x, arr);
1425 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1426 val = trunc_int_for_mode (val, SImode);
1427 switch (which_immediate_load (val))
1428 {
1429 case SPU_IL:
1430 case SPU_ILA:
1431 break;
1432 case SPU_ILH:
1433 case SPU_ILHU:
1434 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1435 break;
1436 default:
1437 gcc_unreachable ();
1438 }
1439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1440 break;
1441 case IC_FSMBI:
1442 constant_to_array (mode, x, arr);
1443 val = 0;
1444 for (i = 0; i < 16; i++)
1445 {
1446 val <<= 1;
1447 val |= arr[i] & 1;
1448 }
1449 print_operand (file, GEN_INT (val), 0);
1450 break;
1451 case IC_CPAT:
1452 constant_to_array (mode, x, arr);
1453 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1454 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1455 break;
dea01258 1456 case IC_IL1s:
dea01258 1457 if (xcode == HIGH)
5df189be 1458 x = XEXP (x, 0);
1459 if (GET_CODE (x) == CONST_VECTOR)
1460 x = CONST_VECTOR_ELT (x, 0);
1461 output_addr_const (file, x);
1462 if (xcode == HIGH)
1463 fprintf (file, "@h");
644459d0 1464 break;
dea01258 1465 case IC_IL2:
1466 case IC_IL2s:
5df189be 1467 case IC_FSMBI2:
dea01258 1468 case IC_POOL:
1469 abort ();
644459d0 1470 }
c8befdb9 1471 }
644459d0 1472 else
1473 gcc_unreachable ();
1474 return;
1475
644459d0 1476 case 'C':
1477 if (xcode == CONST_INT)
1478 {
1479 /* Only 4 least significant bits are relevant for generate
1480 control word instructions. */
1481 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1482 return;
1483 }
1484 break;
1485
1486 case 'M': /* print code for c*d */
1487 if (GET_CODE (x) == CONST_INT)
1488 switch (INTVAL (x))
1489 {
1490 case 1:
1491 fprintf (file, "b");
1492 break;
1493 case 2:
1494 fprintf (file, "h");
1495 break;
1496 case 4:
1497 fprintf (file, "w");
1498 break;
1499 case 8:
1500 fprintf (file, "d");
1501 break;
1502 default:
1503 gcc_unreachable();
1504 }
1505 else
1506 gcc_unreachable();
1507 return;
1508
1509 case 'N': /* Negate the operand */
1510 if (xcode == CONST_INT)
1511 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1512 else if (xcode == CONST_VECTOR)
1513 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1514 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1515 return;
1516
1517 case 'I': /* enable/disable interrupts */
1518 if (xcode == CONST_INT)
1519 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1520 return;
1521
1522 case 'b': /* branch modifiers */
1523 if (xcode == REG)
1524 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1525 else if (COMPARISON_P (x))
1526 fprintf (file, "%s", xcode == NE ? "n" : "");
1527 return;
1528
1529 case 'i': /* indirect call */
1530 if (xcode == MEM)
1531 {
1532 if (GET_CODE (XEXP (x, 0)) == REG)
1533 /* Used in indirect function calls. */
1534 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1535 else
1536 output_address (XEXP (x, 0));
1537 }
1538 return;
1539
1540 case 'p': /* load/store */
1541 if (xcode == MEM)
1542 {
1543 x = XEXP (x, 0);
1544 xcode = GET_CODE (x);
1545 }
e04cf423 1546 if (xcode == AND)
1547 {
1548 x = XEXP (x, 0);
1549 xcode = GET_CODE (x);
1550 }
644459d0 1551 if (xcode == REG)
1552 fprintf (file, "d");
1553 else if (xcode == CONST_INT)
1554 fprintf (file, "a");
1555 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1556 fprintf (file, "r");
1557 else if (xcode == PLUS || xcode == LO_SUM)
1558 {
1559 if (GET_CODE (XEXP (x, 1)) == REG)
1560 fprintf (file, "x");
1561 else
1562 fprintf (file, "d");
1563 }
1564 return;
1565
5df189be 1566 case 'e':
1567 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1568 val &= 0x7;
1569 output_addr_const (file, GEN_INT (val));
1570 return;
1571
1572 case 'f':
1573 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1574 val &= 0x1f;
1575 output_addr_const (file, GEN_INT (val));
1576 return;
1577
1578 case 'g':
1579 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1580 val &= 0x3f;
1581 output_addr_const (file, GEN_INT (val));
1582 return;
1583
1584 case 'h':
1585 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1586 val = (val >> 3) & 0x1f;
1587 output_addr_const (file, GEN_INT (val));
1588 return;
1589
1590 case 'E':
1591 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1592 val = -val;
1593 val &= 0x7;
1594 output_addr_const (file, GEN_INT (val));
1595 return;
1596
1597 case 'F':
1598 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1599 val = -val;
1600 val &= 0x1f;
1601 output_addr_const (file, GEN_INT (val));
1602 return;
1603
1604 case 'G':
1605 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1606 val = -val;
1607 val &= 0x3f;
1608 output_addr_const (file, GEN_INT (val));
1609 return;
1610
1611 case 'H':
1612 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1613 val = -(val & -8ll);
1614 val = (val >> 3) & 0x1f;
1615 output_addr_const (file, GEN_INT (val));
1616 return;
1617
56c7bfc2 1618 case 'v':
1619 case 'w':
1620 constant_to_array (mode, x, arr);
1621 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1622 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1623 return;
1624
644459d0 1625 case 0:
1626 if (xcode == REG)
1627 fprintf (file, "%s", reg_names[REGNO (x)]);
1628 else if (xcode == MEM)
1629 output_address (XEXP (x, 0));
1630 else if (xcode == CONST_VECTOR)
dea01258 1631 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1632 else
1633 output_addr_const (file, x);
1634 return;
1635
f6a0d06f 1636 /* unused letters
56c7bfc2 1637 o qr u yz
5df189be 1638 AB OPQR UVWXYZ */
644459d0 1639 default:
1640 output_operand_lossage ("invalid %%xn code");
1641 }
1642 gcc_unreachable ();
1643}
1644
1645extern char call_used_regs[];
644459d0 1646
1647/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1648 caller saved register. For leaf functions it is more efficient to
1649 use a volatile register because we won't need to save and restore the
1650 pic register. This routine is only valid after register allocation
1651 is completed, so we can pick an unused register. */
1652static rtx
1653get_pic_reg (void)
1654{
1655 rtx pic_reg = pic_offset_table_rtx;
1656 if (!reload_completed && !reload_in_progress)
1657 abort ();
1658 return pic_reg;
1659}
1660
5df189be 1661/* Split constant addresses to handle cases that are too large.
1662 Add in the pic register when in PIC mode.
1663 Split immediates that require more than 1 instruction. */
dea01258 1664int
1665spu_split_immediate (rtx * ops)
c8befdb9 1666{
dea01258 1667 enum machine_mode mode = GET_MODE (ops[0]);
1668 enum immediate_class c = classify_immediate (ops[1], mode);
1669
1670 switch (c)
c8befdb9 1671 {
dea01258 1672 case IC_IL2:
1673 {
1674 unsigned char arrhi[16];
1675 unsigned char arrlo[16];
98bbec1e 1676 rtx to, temp, hi, lo;
dea01258 1677 int i;
98bbec1e 1678 enum machine_mode imode = mode;
1679 /* We need to do reals as ints because the constant used in the
1680 IOR might not be a legitimate real constant. */
1681 imode = int_mode_for_mode (mode);
dea01258 1682 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1683 if (imode != mode)
1684 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1685 else
1686 to = ops[0];
1687 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1688 for (i = 0; i < 16; i += 4)
1689 {
1690 arrlo[i + 2] = arrhi[i + 2];
1691 arrlo[i + 3] = arrhi[i + 3];
1692 arrlo[i + 0] = arrlo[i + 1] = 0;
1693 arrhi[i + 2] = arrhi[i + 3] = 0;
1694 }
98bbec1e 1695 hi = array_to_constant (imode, arrhi);
1696 lo = array_to_constant (imode, arrlo);
1697 emit_move_insn (temp, hi);
dea01258 1698 emit_insn (gen_rtx_SET
98bbec1e 1699 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1700 return 1;
1701 }
5df189be 1702 case IC_FSMBI2:
1703 {
1704 unsigned char arr_fsmbi[16];
1705 unsigned char arr_andbi[16];
1706 rtx to, reg_fsmbi, reg_and;
1707 int i;
1708 enum machine_mode imode = mode;
1709 /* We need to do reals as ints because the constant used in the
1710 * AND might not be a legitimate real constant. */
1711 imode = int_mode_for_mode (mode);
1712 constant_to_array (mode, ops[1], arr_fsmbi);
1713 if (imode != mode)
1714 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1715 else
1716 to = ops[0];
1717 for (i = 0; i < 16; i++)
1718 if (arr_fsmbi[i] != 0)
1719 {
1720 arr_andbi[0] = arr_fsmbi[i];
1721 arr_fsmbi[i] = 0xff;
1722 }
1723 for (i = 1; i < 16; i++)
1724 arr_andbi[i] = arr_andbi[0];
1725 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1726 reg_and = array_to_constant (imode, arr_andbi);
1727 emit_move_insn (to, reg_fsmbi);
1728 emit_insn (gen_rtx_SET
1729 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1730 return 1;
1731 }
dea01258 1732 case IC_POOL:
1733 if (reload_in_progress || reload_completed)
1734 {
1735 rtx mem = force_const_mem (mode, ops[1]);
1736 if (TARGET_LARGE_MEM)
1737 {
1738 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1739 emit_move_insn (addr, XEXP (mem, 0));
1740 mem = replace_equiv_address (mem, addr);
1741 }
1742 emit_move_insn (ops[0], mem);
1743 return 1;
1744 }
1745 break;
1746 case IC_IL1s:
1747 case IC_IL2s:
1748 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1749 {
1750 if (c == IC_IL2s)
1751 {
5df189be 1752 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1753 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1754 }
1755 else if (flag_pic)
1756 emit_insn (gen_pic (ops[0], ops[1]));
1757 if (flag_pic)
1758 {
1759 rtx pic_reg = get_pic_reg ();
1760 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1761 crtl->uses_pic_offset_table = 1;
dea01258 1762 }
1763 return flag_pic || c == IC_IL2s;
1764 }
1765 break;
1766 case IC_IL1:
1767 case IC_FSMBI:
1768 case IC_CPAT:
1769 break;
c8befdb9 1770 }
dea01258 1771 return 0;
c8befdb9 1772}
1773
644459d0 1774/* SAVING is TRUE when we are generating the actual load and store
1775 instructions for REGNO. When determining the size of the stack
1776 needed for saving register we must allocate enough space for the
1777 worst case, because we don't always have the information early enough
1778 to not allocate it. But we can at least eliminate the actual loads
1779 and stores during the prologue/epilogue. */
1780static int
1781need_to_save_reg (int regno, int saving)
1782{
3072d30e 1783 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1784 return 1;
1785 if (flag_pic
1786 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1787 && (!saving || crtl->uses_pic_offset_table)
644459d0 1788 && (!saving
3072d30e 1789 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1790 return 1;
1791 return 0;
1792}
1793
1794/* This function is only correct starting with local register
1795 allocation */
1796int
1797spu_saved_regs_size (void)
1798{
1799 int reg_save_size = 0;
1800 int regno;
1801
1802 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1803 if (need_to_save_reg (regno, 0))
1804 reg_save_size += 0x10;
1805 return reg_save_size;
1806}
1807
1808static rtx
1809frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1810{
1811 rtx reg = gen_rtx_REG (V4SImode, regno);
1812 rtx mem =
1813 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1814 return emit_insn (gen_movv4si (mem, reg));
1815}
1816
1817static rtx
1818frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1819{
1820 rtx reg = gen_rtx_REG (V4SImode, regno);
1821 rtx mem =
1822 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1823 return emit_insn (gen_movv4si (reg, mem));
1824}
1825
1826/* This happens after reload, so we need to expand it. */
1827static rtx
1828frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1829{
1830 rtx insn;
1831 if (satisfies_constraint_K (GEN_INT (imm)))
1832 {
1833 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1834 }
1835 else
1836 {
3072d30e 1837 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1838 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1839 if (REGNO (src) == REGNO (scratch))
1840 abort ();
1841 }
644459d0 1842 return insn;
1843}
1844
1845/* Return nonzero if this function is known to have a null epilogue. */
1846
1847int
1848direct_return (void)
1849{
1850 if (reload_completed)
1851 {
1852 if (cfun->static_chain_decl == 0
1853 && (spu_saved_regs_size ()
1854 + get_frame_size ()
abe32cce 1855 + crtl->outgoing_args_size
1856 + crtl->args.pretend_args_size == 0)
644459d0 1857 && current_function_is_leaf)
1858 return 1;
1859 }
1860 return 0;
1861}
1862
1863/*
1864 The stack frame looks like this:
1865 +-------------+
1866 | incoming |
a8e019fa 1867 | args |
1868 AP -> +-------------+
644459d0 1869 | $lr save |
1870 +-------------+
1871 prev SP | back chain |
1872 +-------------+
1873 | var args |
abe32cce 1874 | reg save | crtl->args.pretend_args_size bytes
644459d0 1875 +-------------+
1876 | ... |
1877 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1878 FP -> +-------------+
644459d0 1879 | ... |
a8e019fa 1880 | vars | get_frame_size() bytes
1881 HFP -> +-------------+
644459d0 1882 | ... |
1883 | outgoing |
abe32cce 1884 | args | crtl->outgoing_args_size bytes
644459d0 1885 +-------------+
1886 | $lr of next |
1887 | frame |
1888 +-------------+
a8e019fa 1889 | back chain |
1890 SP -> +-------------+
644459d0 1891
1892*/
1893void
1894spu_expand_prologue (void)
1895{
1896 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1897 HOST_WIDE_INT total_size;
1898 HOST_WIDE_INT saved_regs_size;
1899 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1900 rtx scratch_reg_0, scratch_reg_1;
1901 rtx insn, real;
1902
1903 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1904 the "toplevel" insn chain. */
1905 emit_note (NOTE_INSN_DELETED);
1906
1907 if (flag_pic && optimize == 0)
18d50ae6 1908 crtl->uses_pic_offset_table = 1;
644459d0 1909
1910 if (spu_naked_function_p (current_function_decl))
1911 return;
1912
1913 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1914 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1915
1916 saved_regs_size = spu_saved_regs_size ();
1917 total_size = size + saved_regs_size
abe32cce 1918 + crtl->outgoing_args_size
1919 + crtl->args.pretend_args_size;
644459d0 1920
1921 if (!current_function_is_leaf
18d50ae6 1922 || cfun->calls_alloca || total_size > 0)
644459d0 1923 total_size += STACK_POINTER_OFFSET;
1924
1925 /* Save this first because code after this might use the link
1926 register as a scratch register. */
1927 if (!current_function_is_leaf)
1928 {
1929 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1930 RTX_FRAME_RELATED_P (insn) = 1;
1931 }
1932
1933 if (total_size > 0)
1934 {
abe32cce 1935 offset = -crtl->args.pretend_args_size;
644459d0 1936 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1937 if (need_to_save_reg (regno, 1))
1938 {
1939 offset -= 16;
1940 insn = frame_emit_store (regno, sp_reg, offset);
1941 RTX_FRAME_RELATED_P (insn) = 1;
1942 }
1943 }
1944
18d50ae6 1945 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1946 {
1947 rtx pic_reg = get_pic_reg ();
1948 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1949 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1950 }
1951
1952 if (total_size > 0)
1953 {
1954 if (flag_stack_check)
1955 {
d819917f 1956 /* We compare against total_size-1 because
644459d0 1957 ($sp >= total_size) <=> ($sp > total_size-1) */
1958 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1959 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1960 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1961 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1962 {
1963 emit_move_insn (scratch_v4si, size_v4si);
1964 size_v4si = scratch_v4si;
1965 }
1966 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1967 emit_insn (gen_vec_extractv4si
1968 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1969 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1970 }
1971
1972 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1973 the value of the previous $sp because we save it as the back
1974 chain. */
1975 if (total_size <= 2000)
1976 {
1977 /* In this case we save the back chain first. */
1978 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1979 insn =
1980 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1981 }
644459d0 1982 else
1983 {
1984 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1985 insn =
1986 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1987 }
1988 RTX_FRAME_RELATED_P (insn) = 1;
1989 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1990 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1991
1992 if (total_size > 2000)
1993 {
1994 /* Save the back chain ptr */
1995 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1996 }
1997
1998 if (frame_pointer_needed)
1999 {
2000 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2001 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2002 + crtl->outgoing_args_size;
644459d0 2003 /* Set the new frame_pointer */
d8dfeb55 2004 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2005 RTX_FRAME_RELATED_P (insn) = 1;
2006 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2007 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2008 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2009 }
2010 }
2011
2012 emit_note (NOTE_INSN_DELETED);
2013}
2014
2015void
2016spu_expand_epilogue (bool sibcall_p)
2017{
2018 int size = get_frame_size (), offset, regno;
2019 HOST_WIDE_INT saved_regs_size, total_size;
2020 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2021 rtx jump, scratch_reg_0;
2022
2023 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2024 the "toplevel" insn chain. */
2025 emit_note (NOTE_INSN_DELETED);
2026
2027 if (spu_naked_function_p (current_function_decl))
2028 return;
2029
2030 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2031
2032 saved_regs_size = spu_saved_regs_size ();
2033 total_size = size + saved_regs_size
abe32cce 2034 + crtl->outgoing_args_size
2035 + crtl->args.pretend_args_size;
644459d0 2036
2037 if (!current_function_is_leaf
18d50ae6 2038 || cfun->calls_alloca || total_size > 0)
644459d0 2039 total_size += STACK_POINTER_OFFSET;
2040
2041 if (total_size > 0)
2042 {
18d50ae6 2043 if (cfun->calls_alloca)
644459d0 2044 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2045 else
2046 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2047
2048
2049 if (saved_regs_size > 0)
2050 {
abe32cce 2051 offset = -crtl->args.pretend_args_size;
644459d0 2052 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2053 if (need_to_save_reg (regno, 1))
2054 {
2055 offset -= 0x10;
2056 frame_emit_load (regno, sp_reg, offset);
2057 }
2058 }
2059 }
2060
2061 if (!current_function_is_leaf)
2062 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2063
2064 if (!sibcall_p)
2065 {
18b42941 2066 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2067 jump = emit_jump_insn (gen__return ());
2068 emit_barrier_after (jump);
2069 }
2070
2071 emit_note (NOTE_INSN_DELETED);
2072}
2073
2074rtx
2075spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2076{
2077 if (count != 0)
2078 return 0;
2079 /* This is inefficient because it ends up copying to a save-register
2080 which then gets saved even though $lr has already been saved. But
2081 it does generate better code for leaf functions and we don't need
2082 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2083 used for __builtin_return_address anyway, so maybe we don't care if
2084 it's inefficient. */
2085 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2086}
2087\f
2088
2089/* Given VAL, generate a constant appropriate for MODE.
2090 If MODE is a vector mode, every element will be VAL.
2091 For TImode, VAL will be zero extended to 128 bits. */
2092rtx
2093spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2094{
2095 rtx inner;
2096 rtvec v;
2097 int units, i;
2098
2099 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2100 || GET_MODE_CLASS (mode) == MODE_FLOAT
2101 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2102 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2103
2104 if (GET_MODE_CLASS (mode) == MODE_INT)
2105 return immed_double_const (val, 0, mode);
2106
2107 /* val is the bit representation of the float */
2108 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2109 return hwint_to_const_double (mode, val);
2110
2111 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2112 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2113 else
2114 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2115
2116 units = GET_MODE_NUNITS (mode);
2117
2118 v = rtvec_alloc (units);
2119
2120 for (i = 0; i < units; ++i)
2121 RTVEC_ELT (v, i) = inner;
2122
2123 return gen_rtx_CONST_VECTOR (mode, v);
2124}
644459d0 2125
5474166e 2126/* Create a MODE vector constant from 4 ints. */
2127rtx
2128spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2129{
2130 unsigned char arr[16];
2131 arr[0] = (a >> 24) & 0xff;
2132 arr[1] = (a >> 16) & 0xff;
2133 arr[2] = (a >> 8) & 0xff;
2134 arr[3] = (a >> 0) & 0xff;
2135 arr[4] = (b >> 24) & 0xff;
2136 arr[5] = (b >> 16) & 0xff;
2137 arr[6] = (b >> 8) & 0xff;
2138 arr[7] = (b >> 0) & 0xff;
2139 arr[8] = (c >> 24) & 0xff;
2140 arr[9] = (c >> 16) & 0xff;
2141 arr[10] = (c >> 8) & 0xff;
2142 arr[11] = (c >> 0) & 0xff;
2143 arr[12] = (d >> 24) & 0xff;
2144 arr[13] = (d >> 16) & 0xff;
2145 arr[14] = (d >> 8) & 0xff;
2146 arr[15] = (d >> 0) & 0xff;
2147 return array_to_constant(mode, arr);
2148}
5a976006 2149\f
2150/* branch hint stuff */
5474166e 2151
644459d0 2152/* An array of these is used to propagate hints to predecessor blocks. */
2153struct spu_bb_info
2154{
5a976006 2155 rtx prop_jump; /* propagated from another block */
2156 int bb_index; /* the original block. */
644459d0 2157};
5a976006 2158static struct spu_bb_info *spu_bb_info;
644459d0 2159
5a976006 2160#define STOP_HINT_P(INSN) \
2161 (GET_CODE(INSN) == CALL_INSN \
2162 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2163 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2164
2165/* 1 when RTX is a hinted branch or its target. We keep track of
2166 what has been hinted so the safe-hint code can test it easily. */
2167#define HINTED_P(RTX) \
2168 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2169
2170/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2171#define SCHED_ON_EVEN_P(RTX) \
2172 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2173
2174/* Emit a nop for INSN such that the two will dual issue. This assumes
2175 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2176 We check for TImode to handle a MULTI1 insn which has dual issued its
2177 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2178 ADDR_VEC insns. */
2179static void
2180emit_nop_for_insn (rtx insn)
644459d0 2181{
5a976006 2182 int p;
2183 rtx new_insn;
2184 p = get_pipe (insn);
2185 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2186 new_insn = emit_insn_after (gen_lnop (), insn);
2187 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2188 {
5a976006 2189 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2190 PUT_MODE (new_insn, TImode);
2191 PUT_MODE (insn, VOIDmode);
2192 }
2193 else
2194 new_insn = emit_insn_after (gen_lnop (), insn);
2195 recog_memoized (new_insn);
2196}
2197
2198/* Insert nops in basic blocks to meet dual issue alignment
2199 requirements. Also make sure hbrp and hint instructions are at least
2200 one cycle apart, possibly inserting a nop. */
2201static void
2202pad_bb(void)
2203{
2204 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2205 int length;
2206 int addr;
2207
2208 /* This sets up INSN_ADDRESSES. */
2209 shorten_branches (get_insns ());
2210
2211 /* Keep track of length added by nops. */
2212 length = 0;
2213
2214 prev_insn = 0;
2215 insn = get_insns ();
2216 if (!active_insn_p (insn))
2217 insn = next_active_insn (insn);
2218 for (; insn; insn = next_insn)
2219 {
2220 next_insn = next_active_insn (insn);
2221 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2222 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2223 {
5a976006 2224 if (hbr_insn)
2225 {
2226 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2227 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2228 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2229 || (a1 - a0 == 4))
2230 {
2231 prev_insn = emit_insn_before (gen_lnop (), insn);
2232 PUT_MODE (prev_insn, GET_MODE (insn));
2233 PUT_MODE (insn, TImode);
2234 length += 4;
2235 }
2236 }
2237 hbr_insn = insn;
2238 }
2239 if (INSN_CODE (insn) == CODE_FOR_blockage)
2240 {
2241 if (GET_MODE (insn) == TImode)
2242 PUT_MODE (next_insn, TImode);
2243 insn = next_insn;
2244 next_insn = next_active_insn (insn);
2245 }
2246 addr = INSN_ADDRESSES (INSN_UID (insn));
2247 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2248 {
2249 if (((addr + length) & 7) != 0)
2250 {
2251 emit_nop_for_insn (prev_insn);
2252 length += 4;
2253 }
644459d0 2254 }
5a976006 2255 else if (GET_MODE (insn) == TImode
2256 && ((next_insn && GET_MODE (next_insn) != TImode)
2257 || get_attr_type (insn) == TYPE_MULTI0)
2258 && ((addr + length) & 7) != 0)
2259 {
2260 /* prev_insn will always be set because the first insn is
2261 always 8-byte aligned. */
2262 emit_nop_for_insn (prev_insn);
2263 length += 4;
2264 }
2265 prev_insn = insn;
644459d0 2266 }
644459d0 2267}
2268
5a976006 2269\f
2270/* Routines for branch hints. */
2271
644459d0 2272static void
5a976006 2273spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2274 int distance, sbitmap blocks)
644459d0 2275{
5a976006 2276 rtx branch_label = 0;
2277 rtx hint;
2278 rtx insn;
2279 rtx table;
644459d0 2280
2281 if (before == 0 || branch == 0 || target == 0)
2282 return;
2283
5a976006 2284 /* While scheduling we require hints to be no further than 600, so
2285 we need to enforce that here too */
644459d0 2286 if (distance > 600)
2287 return;
2288
5a976006 2289 /* If we have a Basic block note, emit it after the basic block note. */
2290 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2291 before = NEXT_INSN (before);
644459d0 2292
2293 branch_label = gen_label_rtx ();
2294 LABEL_NUSES (branch_label)++;
2295 LABEL_PRESERVE_P (branch_label) = 1;
2296 insn = emit_label_before (branch_label, branch);
2297 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2298 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2299
2300 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2301 recog_memoized (hint);
2302 HINTED_P (branch) = 1;
644459d0 2303
5a976006 2304 if (GET_CODE (target) == LABEL_REF)
2305 HINTED_P (XEXP (target, 0)) = 1;
2306 else if (tablejump_p (branch, 0, &table))
644459d0 2307 {
5a976006 2308 rtvec vec;
2309 int j;
2310 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2311 vec = XVEC (PATTERN (table), 0);
2312 else
2313 vec = XVEC (PATTERN (table), 1);
2314 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2315 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2316 }
5a976006 2317
2318 if (distance >= 588)
644459d0 2319 {
5a976006 2320 /* Make sure the hint isn't scheduled any earlier than this point,
2321 which could make it too far for the branch offest to fit */
2322 recog_memoized (emit_insn_before (gen_blockage (), hint));
2323 }
2324 else if (distance <= 8 * 4)
2325 {
2326 /* To guarantee at least 8 insns between the hint and branch we
2327 insert nops. */
2328 int d;
2329 for (d = distance; d < 8 * 4; d += 4)
2330 {
2331 insn =
2332 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2333 recog_memoized (insn);
2334 }
2335
2336 /* Make sure any nops inserted aren't scheduled before the hint. */
2337 recog_memoized (emit_insn_after (gen_blockage (), hint));
2338
2339 /* Make sure any nops inserted aren't scheduled after the call. */
2340 if (CALL_P (branch) && distance < 8 * 4)
2341 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2342 }
644459d0 2343}
2344
2345/* Returns 0 if we don't want a hint for this branch. Otherwise return
2346 the rtx for the branch target. */
2347static rtx
2348get_branch_target (rtx branch)
2349{
2350 if (GET_CODE (branch) == JUMP_INSN)
2351 {
2352 rtx set, src;
2353
2354 /* Return statements */
2355 if (GET_CODE (PATTERN (branch)) == RETURN)
2356 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2357
2358 /* jump table */
2359 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2360 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2361 return 0;
2362
2363 set = single_set (branch);
2364 src = SET_SRC (set);
2365 if (GET_CODE (SET_DEST (set)) != PC)
2366 abort ();
2367
2368 if (GET_CODE (src) == IF_THEN_ELSE)
2369 {
2370 rtx lab = 0;
2371 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2372 if (note)
2373 {
2374 /* If the more probable case is not a fall through, then
2375 try a branch hint. */
2376 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2377 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2378 && GET_CODE (XEXP (src, 1)) != PC)
2379 lab = XEXP (src, 1);
2380 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2381 && GET_CODE (XEXP (src, 2)) != PC)
2382 lab = XEXP (src, 2);
2383 }
2384 if (lab)
2385 {
2386 if (GET_CODE (lab) == RETURN)
2387 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2388 return lab;
2389 }
2390 return 0;
2391 }
2392
2393 return src;
2394 }
2395 else if (GET_CODE (branch) == CALL_INSN)
2396 {
2397 rtx call;
2398 /* All of our call patterns are in a PARALLEL and the CALL is
2399 the first pattern in the PARALLEL. */
2400 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2401 abort ();
2402 call = XVECEXP (PATTERN (branch), 0, 0);
2403 if (GET_CODE (call) == SET)
2404 call = SET_SRC (call);
2405 if (GET_CODE (call) != CALL)
2406 abort ();
2407 return XEXP (XEXP (call, 0), 0);
2408 }
2409 return 0;
2410}
2411
5a976006 2412/* The special $hbr register is used to prevent the insn scheduler from
2413 moving hbr insns across instructions which invalidate them. It
2414 should only be used in a clobber, and this function searches for
2415 insns which clobber it. */
2416static bool
2417insn_clobbers_hbr (rtx insn)
2418{
2419 if (INSN_P (insn)
2420 && GET_CODE (PATTERN (insn)) == PARALLEL)
2421 {
2422 rtx parallel = PATTERN (insn);
2423 rtx clobber;
2424 int j;
2425 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2426 {
2427 clobber = XVECEXP (parallel, 0, j);
2428 if (GET_CODE (clobber) == CLOBBER
2429 && GET_CODE (XEXP (clobber, 0)) == REG
2430 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2431 return 1;
2432 }
2433 }
2434 return 0;
2435}
2436
2437/* Search up to 32 insns starting at FIRST:
2438 - at any kind of hinted branch, just return
2439 - at any unconditional branch in the first 15 insns, just return
2440 - at a call or indirect branch, after the first 15 insns, force it to
2441 an even address and return
2442 - at any unconditional branch, after the first 15 insns, force it to
2443 an even address.
2444 At then end of the search, insert an hbrp within 4 insns of FIRST,
2445 and an hbrp within 16 instructions of FIRST.
2446 */
644459d0 2447static void
5a976006 2448insert_hbrp_for_ilb_runout (rtx first)
644459d0 2449{
5a976006 2450 rtx insn, before_4 = 0, before_16 = 0;
2451 int addr = 0, length, first_addr = -1;
2452 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2453 int insert_lnop_after = 0;
2454 for (insn = first; insn; insn = NEXT_INSN (insn))
2455 if (INSN_P (insn))
2456 {
2457 if (first_addr == -1)
2458 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2459 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2460 length = get_attr_length (insn);
2461
2462 if (before_4 == 0 && addr + length >= 4 * 4)
2463 before_4 = insn;
2464 /* We test for 14 instructions because the first hbrp will add
2465 up to 2 instructions. */
2466 if (before_16 == 0 && addr + length >= 14 * 4)
2467 before_16 = insn;
2468
2469 if (INSN_CODE (insn) == CODE_FOR_hbr)
2470 {
2471 /* Make sure an hbrp is at least 2 cycles away from a hint.
2472 Insert an lnop after the hbrp when necessary. */
2473 if (before_4 == 0 && addr > 0)
2474 {
2475 before_4 = insn;
2476 insert_lnop_after |= 1;
2477 }
2478 else if (before_4 && addr <= 4 * 4)
2479 insert_lnop_after |= 1;
2480 if (before_16 == 0 && addr > 10 * 4)
2481 {
2482 before_16 = insn;
2483 insert_lnop_after |= 2;
2484 }
2485 else if (before_16 && addr <= 14 * 4)
2486 insert_lnop_after |= 2;
2487 }
644459d0 2488
5a976006 2489 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2490 {
2491 if (addr < hbrp_addr0)
2492 hbrp_addr0 = addr;
2493 else if (addr < hbrp_addr1)
2494 hbrp_addr1 = addr;
2495 }
644459d0 2496
5a976006 2497 if (CALL_P (insn) || JUMP_P (insn))
2498 {
2499 if (HINTED_P (insn))
2500 return;
2501
2502 /* Any branch after the first 15 insns should be on an even
2503 address to avoid a special case branch. There might be
2504 some nops and/or hbrps inserted, so we test after 10
2505 insns. */
2506 if (addr > 10 * 4)
2507 SCHED_ON_EVEN_P (insn) = 1;
2508 }
644459d0 2509
5a976006 2510 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2511 return;
2512
2513
2514 if (addr + length >= 32 * 4)
644459d0 2515 {
5a976006 2516 gcc_assert (before_4 && before_16);
2517 if (hbrp_addr0 > 4 * 4)
644459d0 2518 {
5a976006 2519 insn =
2520 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2521 recog_memoized (insn);
2522 INSN_ADDRESSES_NEW (insn,
2523 INSN_ADDRESSES (INSN_UID (before_4)));
2524 PUT_MODE (insn, GET_MODE (before_4));
2525 PUT_MODE (before_4, TImode);
2526 if (insert_lnop_after & 1)
644459d0 2527 {
5a976006 2528 insn = emit_insn_before (gen_lnop (), before_4);
2529 recog_memoized (insn);
2530 INSN_ADDRESSES_NEW (insn,
2531 INSN_ADDRESSES (INSN_UID (before_4)));
2532 PUT_MODE (insn, TImode);
644459d0 2533 }
644459d0 2534 }
5a976006 2535 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2536 && hbrp_addr1 > 16 * 4)
644459d0 2537 {
5a976006 2538 insn =
2539 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2540 recog_memoized (insn);
2541 INSN_ADDRESSES_NEW (insn,
2542 INSN_ADDRESSES (INSN_UID (before_16)));
2543 PUT_MODE (insn, GET_MODE (before_16));
2544 PUT_MODE (before_16, TImode);
2545 if (insert_lnop_after & 2)
644459d0 2546 {
5a976006 2547 insn = emit_insn_before (gen_lnop (), before_16);
2548 recog_memoized (insn);
2549 INSN_ADDRESSES_NEW (insn,
2550 INSN_ADDRESSES (INSN_UID
2551 (before_16)));
2552 PUT_MODE (insn, TImode);
644459d0 2553 }
2554 }
5a976006 2555 return;
644459d0 2556 }
644459d0 2557 }
5a976006 2558 else if (BARRIER_P (insn))
2559 return;
644459d0 2560
644459d0 2561}
5a976006 2562
2563/* The SPU might hang when it executes 48 inline instructions after a
2564 hinted branch jumps to its hinted target. The beginning of a
2565 function and the return from a call might have been hinted, and must
2566 be handled as well. To prevent a hang we insert 2 hbrps. The first
2567 should be within 6 insns of the branch target. The second should be
2568 within 22 insns of the branch target. When determining if hbrps are
2569 necessary, we look for only 32 inline instructions, because up to to
2570 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2571 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2572static void
5a976006 2573insert_hbrp (void)
644459d0 2574{
5a976006 2575 rtx insn;
2576 if (TARGET_SAFE_HINTS)
644459d0 2577 {
5a976006 2578 shorten_branches (get_insns ());
2579 /* Insert hbrp at beginning of function */
2580 insn = next_active_insn (get_insns ());
2581 if (insn)
2582 insert_hbrp_for_ilb_runout (insn);
2583 /* Insert hbrp after hinted targets. */
2584 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2585 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2586 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2587 }
644459d0 2588}
2589
5a976006 2590static int in_spu_reorg;
2591
2592/* Insert branch hints. There are no branch optimizations after this
2593 pass, so it's safe to set our branch hints now. */
644459d0 2594static void
5a976006 2595spu_machine_dependent_reorg (void)
644459d0 2596{
5a976006 2597 sbitmap blocks;
2598 basic_block bb;
2599 rtx branch, insn;
2600 rtx branch_target = 0;
2601 int branch_addr = 0, insn_addr, required_dist = 0;
2602 int i;
2603 unsigned int j;
644459d0 2604
5a976006 2605 if (!TARGET_BRANCH_HINTS || optimize == 0)
2606 {
2607 /* We still do it for unoptimized code because an external
2608 function might have hinted a call or return. */
2609 insert_hbrp ();
2610 pad_bb ();
2611 return;
2612 }
644459d0 2613
5a976006 2614 blocks = sbitmap_alloc (last_basic_block);
2615 sbitmap_zero (blocks);
644459d0 2616
5a976006 2617 in_spu_reorg = 1;
2618 compute_bb_for_insn ();
2619
2620 compact_blocks ();
2621
2622 spu_bb_info =
2623 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2624 sizeof (struct spu_bb_info));
2625
2626 /* We need exact insn addresses and lengths. */
2627 shorten_branches (get_insns ());
2628
2629 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2630 {
5a976006 2631 bb = BASIC_BLOCK (i);
2632 branch = 0;
2633 if (spu_bb_info[i].prop_jump)
644459d0 2634 {
5a976006 2635 branch = spu_bb_info[i].prop_jump;
2636 branch_target = get_branch_target (branch);
2637 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2638 required_dist = spu_hint_dist;
2639 }
2640 /* Search from end of a block to beginning. In this loop, find
2641 jumps which need a branch and emit them only when:
2642 - it's an indirect branch and we're at the insn which sets
2643 the register
2644 - we're at an insn that will invalidate the hint. e.g., a
2645 call, another hint insn, inline asm that clobbers $hbr, and
2646 some inlined operations (divmodsi4). Don't consider jumps
2647 because they are only at the end of a block and are
2648 considered when we are deciding whether to propagate
2649 - we're getting too far away from the branch. The hbr insns
2650 only have a signed 10 bit offset
2651 We go back as far as possible so the branch will be considered
2652 for propagation when we get to the beginning of the block. */
2653 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2654 {
2655 if (INSN_P (insn))
2656 {
2657 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2658 if (branch
2659 && ((GET_CODE (branch_target) == REG
2660 && set_of (branch_target, insn) != NULL_RTX)
2661 || insn_clobbers_hbr (insn)
2662 || branch_addr - insn_addr > 600))
2663 {
2664 rtx next = NEXT_INSN (insn);
2665 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2666 if (insn != BB_END (bb)
2667 && branch_addr - next_addr >= required_dist)
2668 {
2669 if (dump_file)
2670 fprintf (dump_file,
2671 "hint for %i in block %i before %i\n",
2672 INSN_UID (branch), bb->index,
2673 INSN_UID (next));
2674 spu_emit_branch_hint (next, branch, branch_target,
2675 branch_addr - next_addr, blocks);
2676 }
2677 branch = 0;
2678 }
2679
2680 /* JUMP_P will only be true at the end of a block. When
2681 branch is already set it means we've previously decided
2682 to propagate a hint for that branch into this block. */
2683 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2684 {
2685 branch = 0;
2686 if ((branch_target = get_branch_target (insn)))
2687 {
2688 branch = insn;
2689 branch_addr = insn_addr;
2690 required_dist = spu_hint_dist;
2691 }
2692 }
2693 }
2694 if (insn == BB_HEAD (bb))
2695 break;
2696 }
2697
2698 if (branch)
2699 {
2700 /* If we haven't emitted a hint for this branch yet, it might
2701 be profitable to emit it in one of the predecessor blocks,
2702 especially for loops. */
2703 rtx bbend;
2704 basic_block prev = 0, prop = 0, prev2 = 0;
2705 int loop_exit = 0, simple_loop = 0;
2706 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2707
2708 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2709 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2710 prev = EDGE_PRED (bb, j)->src;
2711 else
2712 prev2 = EDGE_PRED (bb, j)->src;
2713
2714 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2715 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2716 loop_exit = 1;
2717 else if (EDGE_SUCC (bb, j)->dest == bb)
2718 simple_loop = 1;
2719
2720 /* If this branch is a loop exit then propagate to previous
2721 fallthru block. This catches the cases when it is a simple
2722 loop or when there is an initial branch into the loop. */
2723 if (prev && (loop_exit || simple_loop)
2724 && prev->loop_depth <= bb->loop_depth)
2725 prop = prev;
2726
2727 /* If there is only one adjacent predecessor. Don't propagate
2728 outside this loop. This loop_depth test isn't perfect, but
2729 I'm not sure the loop_father member is valid at this point. */
2730 else if (prev && single_pred_p (bb)
2731 && prev->loop_depth == bb->loop_depth)
2732 prop = prev;
2733
2734 /* If this is the JOIN block of a simple IF-THEN then
2735 propogate the hint to the HEADER block. */
2736 else if (prev && prev2
2737 && EDGE_COUNT (bb->preds) == 2
2738 && EDGE_COUNT (prev->preds) == 1
2739 && EDGE_PRED (prev, 0)->src == prev2
2740 && prev2->loop_depth == bb->loop_depth
2741 && GET_CODE (branch_target) != REG)
2742 prop = prev;
2743
2744 /* Don't propagate when:
2745 - this is a simple loop and the hint would be too far
2746 - this is not a simple loop and there are 16 insns in
2747 this block already
2748 - the predecessor block ends in a branch that will be
2749 hinted
2750 - the predecessor block ends in an insn that invalidates
2751 the hint */
2752 if (prop
2753 && prop->index >= 0
2754 && (bbend = BB_END (prop))
2755 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2756 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2757 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2758 {
2759 if (dump_file)
2760 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2761 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2762 bb->index, prop->index, bb->loop_depth,
2763 INSN_UID (branch), loop_exit, simple_loop,
2764 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2765
2766 spu_bb_info[prop->index].prop_jump = branch;
2767 spu_bb_info[prop->index].bb_index = i;
2768 }
2769 else if (branch_addr - next_addr >= required_dist)
2770 {
2771 if (dump_file)
2772 fprintf (dump_file, "hint for %i in block %i before %i\n",
2773 INSN_UID (branch), bb->index,
2774 INSN_UID (NEXT_INSN (insn)));
2775 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2776 branch_addr - next_addr, blocks);
2777 }
2778 branch = 0;
644459d0 2779 }
644459d0 2780 }
5a976006 2781 free (spu_bb_info);
644459d0 2782
5a976006 2783 if (!sbitmap_empty_p (blocks))
2784 find_many_sub_basic_blocks (blocks);
2785
2786 /* We have to schedule to make sure alignment is ok. */
2787 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2788
2789 /* The hints need to be scheduled, so call it again. */
2790 schedule_insns ();
2791
2792 insert_hbrp ();
2793
2794 pad_bb ();
2795
8f1d58ad 2796 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2797 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2798 {
2799 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2800 between its branch label and the branch . We don't move the
2801 label because GCC expects it at the beginning of the block. */
2802 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2803 rtx label_ref = XVECEXP (unspec, 0, 0);
2804 rtx label = XEXP (label_ref, 0);
2805 rtx branch;
2806 int offset = 0;
2807 for (branch = NEXT_INSN (label);
2808 !JUMP_P (branch) && !CALL_P (branch);
2809 branch = NEXT_INSN (branch))
2810 if (NONJUMP_INSN_P (branch))
2811 offset += get_attr_length (branch);
2812 if (offset > 0)
2813 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2814 }
5a976006 2815
2816 if (spu_flag_var_tracking)
644459d0 2817 {
5a976006 2818 df_analyze ();
2819 timevar_push (TV_VAR_TRACKING);
2820 variable_tracking_main ();
2821 timevar_pop (TV_VAR_TRACKING);
2822 df_finish_pass (false);
644459d0 2823 }
5a976006 2824
2825 free_bb_for_insn ();
2826
2827 in_spu_reorg = 0;
644459d0 2828}
2829\f
2830
2831/* Insn scheduling routines, primarily for dual issue. */
2832static int
2833spu_sched_issue_rate (void)
2834{
2835 return 2;
2836}
2837
2838static int
5a976006 2839uses_ls_unit(rtx insn)
644459d0 2840{
5a976006 2841 rtx set = single_set (insn);
2842 if (set != 0
2843 && (GET_CODE (SET_DEST (set)) == MEM
2844 || GET_CODE (SET_SRC (set)) == MEM))
2845 return 1;
2846 return 0;
644459d0 2847}
2848
2849static int
2850get_pipe (rtx insn)
2851{
2852 enum attr_type t;
2853 /* Handle inline asm */
2854 if (INSN_CODE (insn) == -1)
2855 return -1;
2856 t = get_attr_type (insn);
2857 switch (t)
2858 {
2859 case TYPE_CONVERT:
2860 return -2;
2861 case TYPE_MULTI0:
2862 return -1;
2863
2864 case TYPE_FX2:
2865 case TYPE_FX3:
2866 case TYPE_SPR:
2867 case TYPE_NOP:
2868 case TYPE_FXB:
2869 case TYPE_FPD:
2870 case TYPE_FP6:
2871 case TYPE_FP7:
644459d0 2872 return 0;
2873
2874 case TYPE_LNOP:
2875 case TYPE_SHUF:
2876 case TYPE_LOAD:
2877 case TYPE_STORE:
2878 case TYPE_BR:
2879 case TYPE_MULTI1:
2880 case TYPE_HBR:
5a976006 2881 case TYPE_IPREFETCH:
644459d0 2882 return 1;
2883 default:
2884 abort ();
2885 }
2886}
2887
5a976006 2888
2889/* haifa-sched.c has a static variable that keeps track of the current
2890 cycle. It is passed to spu_sched_reorder, and we record it here for
2891 use by spu_sched_variable_issue. It won't be accurate if the
2892 scheduler updates it's clock_var between the two calls. */
2893static int clock_var;
2894
2895/* This is used to keep track of insn alignment. Set to 0 at the
2896 beginning of each block and increased by the "length" attr of each
2897 insn scheduled. */
2898static int spu_sched_length;
2899
2900/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2901 ready list appropriately in spu_sched_reorder(). */
2902static int pipe0_clock;
2903static int pipe1_clock;
2904
2905static int prev_clock_var;
2906
2907static int prev_priority;
2908
2909/* The SPU needs to load the next ilb sometime during the execution of
2910 the previous ilb. There is a potential conflict if every cycle has a
2911 load or store. To avoid the conflict we make sure the load/store
2912 unit is free for at least one cycle during the execution of insns in
2913 the previous ilb. */
2914static int spu_ls_first;
2915static int prev_ls_clock;
2916
2917static void
2918spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2919 int max_ready ATTRIBUTE_UNUSED)
2920{
2921 spu_sched_length = 0;
2922}
2923
2924static void
2925spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2926 int max_ready ATTRIBUTE_UNUSED)
2927{
2928 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2929 {
2930 /* When any block might be at least 8-byte aligned, assume they
2931 will all be at least 8-byte aligned to make sure dual issue
2932 works out correctly. */
2933 spu_sched_length = 0;
2934 }
2935 spu_ls_first = INT_MAX;
2936 clock_var = -1;
2937 prev_ls_clock = -1;
2938 pipe0_clock = -1;
2939 pipe1_clock = -1;
2940 prev_clock_var = -1;
2941 prev_priority = -1;
2942}
2943
644459d0 2944static int
5a976006 2945spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2946 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2947{
5a976006 2948 int len;
2949 int p;
644459d0 2950 if (GET_CODE (PATTERN (insn)) == USE
2951 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2952 || (len = get_attr_length (insn)) == 0)
2953 return more;
2954
2955 spu_sched_length += len;
2956
2957 /* Reset on inline asm */
2958 if (INSN_CODE (insn) == -1)
2959 {
2960 spu_ls_first = INT_MAX;
2961 pipe0_clock = -1;
2962 pipe1_clock = -1;
2963 return 0;
2964 }
2965 p = get_pipe (insn);
2966 if (p == 0)
2967 pipe0_clock = clock_var;
2968 else
2969 pipe1_clock = clock_var;
2970
2971 if (in_spu_reorg)
2972 {
2973 if (clock_var - prev_ls_clock > 1
2974 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2975 spu_ls_first = INT_MAX;
2976 if (uses_ls_unit (insn))
2977 {
2978 if (spu_ls_first == INT_MAX)
2979 spu_ls_first = spu_sched_length;
2980 prev_ls_clock = clock_var;
2981 }
2982
2983 /* The scheduler hasn't inserted the nop, but we will later on.
2984 Include those nops in spu_sched_length. */
2985 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2986 spu_sched_length += 4;
2987 prev_clock_var = clock_var;
2988
2989 /* more is -1 when called from spu_sched_reorder for new insns
2990 that don't have INSN_PRIORITY */
2991 if (more >= 0)
2992 prev_priority = INSN_PRIORITY (insn);
2993 }
2994
2995 /* Always try issueing more insns. spu_sched_reorder will decide
2996 when the cycle should be advanced. */
2997 return 1;
2998}
2999
3000/* This function is called for both TARGET_SCHED_REORDER and
3001 TARGET_SCHED_REORDER2. */
3002static int
3003spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3004 rtx *ready, int *nreadyp, int clock)
3005{
3006 int i, nready = *nreadyp;
3007 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3008 rtx insn;
3009
3010 clock_var = clock;
3011
3012 if (nready <= 0 || pipe1_clock >= clock)
3013 return 0;
3014
3015 /* Find any rtl insns that don't generate assembly insns and schedule
3016 them first. */
3017 for (i = nready - 1; i >= 0; i--)
3018 {
3019 insn = ready[i];
3020 if (INSN_CODE (insn) == -1
3021 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3022 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3023 {
3024 ready[i] = ready[nready - 1];
3025 ready[nready - 1] = insn;
3026 return 1;
3027 }
3028 }
3029
3030 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3031 for (i = 0; i < nready; i++)
3032 if (INSN_CODE (ready[i]) != -1)
3033 {
3034 insn = ready[i];
3035 switch (get_attr_type (insn))
3036 {
3037 default:
3038 case TYPE_MULTI0:
3039 case TYPE_CONVERT:
3040 case TYPE_FX2:
3041 case TYPE_FX3:
3042 case TYPE_SPR:
3043 case TYPE_NOP:
3044 case TYPE_FXB:
3045 case TYPE_FPD:
3046 case TYPE_FP6:
3047 case TYPE_FP7:
3048 pipe_0 = i;
3049 break;
3050 case TYPE_LOAD:
3051 case TYPE_STORE:
3052 pipe_ls = i;
3053 case TYPE_LNOP:
3054 case TYPE_SHUF:
3055 case TYPE_BR:
3056 case TYPE_MULTI1:
3057 case TYPE_HBR:
3058 pipe_1 = i;
3059 break;
3060 case TYPE_IPREFETCH:
3061 pipe_hbrp = i;
3062 break;
3063 }
3064 }
3065
3066 /* In the first scheduling phase, schedule loads and stores together
3067 to increase the chance they will get merged during postreload CSE. */
3068 if (!reload_completed && pipe_ls >= 0)
3069 {
3070 insn = ready[pipe_ls];
3071 ready[pipe_ls] = ready[nready - 1];
3072 ready[nready - 1] = insn;
3073 return 1;
3074 }
3075
3076 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3077 if (pipe_hbrp >= 0)
3078 pipe_1 = pipe_hbrp;
3079
3080 /* When we have loads/stores in every cycle of the last 15 insns and
3081 we are about to schedule another load/store, emit an hbrp insn
3082 instead. */
3083 if (in_spu_reorg
3084 && spu_sched_length - spu_ls_first >= 4 * 15
3085 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3086 {
3087 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3088 recog_memoized (insn);
3089 if (pipe0_clock < clock)
3090 PUT_MODE (insn, TImode);
3091 spu_sched_variable_issue (file, verbose, insn, -1);
3092 return 0;
3093 }
3094
3095 /* In general, we want to emit nops to increase dual issue, but dual
3096 issue isn't faster when one of the insns could be scheduled later
3097 without effecting the critical path. We look at INSN_PRIORITY to
3098 make a good guess, but it isn't perfect so -mdual-nops=n can be
3099 used to effect it. */
3100 if (in_spu_reorg && spu_dual_nops < 10)
3101 {
3102 /* When we are at an even address and we are not issueing nops to
3103 improve scheduling then we need to advance the cycle. */
3104 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3105 && (spu_dual_nops == 0
3106 || (pipe_1 != -1
3107 && prev_priority >
3108 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3109 return 0;
3110
3111 /* When at an odd address, schedule the highest priority insn
3112 without considering pipeline. */
3113 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3114 && (spu_dual_nops == 0
3115 || (prev_priority >
3116 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3117 return 1;
3118 }
3119
3120
3121 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3122 pipe0 insn in the ready list, schedule it. */
3123 if (pipe0_clock < clock && pipe_0 >= 0)
3124 schedule_i = pipe_0;
3125
3126 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3127 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3128 else
3129 schedule_i = pipe_1;
3130
3131 if (schedule_i > -1)
3132 {
3133 insn = ready[schedule_i];
3134 ready[schedule_i] = ready[nready - 1];
3135 ready[nready - 1] = insn;
3136 return 1;
3137 }
3138 return 0;
644459d0 3139}
3140
3141/* INSN is dependent on DEP_INSN. */
3142static int
5a976006 3143spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3144{
5a976006 3145 rtx set;
3146
3147 /* The blockage pattern is used to prevent instructions from being
3148 moved across it and has no cost. */
3149 if (INSN_CODE (insn) == CODE_FOR_blockage
3150 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3151 return 0;
3152
9d98604b 3153 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3154 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3155 return 0;
3156
3157 /* Make sure hbrps are spread out. */
3158 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3159 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3160 return 8;
3161
3162 /* Make sure hints and hbrps are 2 cycles apart. */
3163 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3164 || INSN_CODE (insn) == CODE_FOR_hbr)
3165 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3166 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3167 return 2;
3168
3169 /* An hbrp has no real dependency on other insns. */
3170 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3171 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3172 return 0;
3173
3174 /* Assuming that it is unlikely an argument register will be used in
3175 the first cycle of the called function, we reduce the cost for
3176 slightly better scheduling of dep_insn. When not hinted, the
3177 mispredicted branch would hide the cost as well. */
3178 if (CALL_P (insn))
3179 {
3180 rtx target = get_branch_target (insn);
3181 if (GET_CODE (target) != REG || !set_of (target, insn))
3182 return cost - 2;
3183 return cost;
3184 }
3185
3186 /* And when returning from a function, let's assume the return values
3187 are completed sooner too. */
3188 if (CALL_P (dep_insn))
644459d0 3189 return cost - 2;
5a976006 3190
3191 /* Make sure an instruction that loads from the back chain is schedule
3192 away from the return instruction so a hint is more likely to get
3193 issued. */
3194 if (INSN_CODE (insn) == CODE_FOR__return
3195 && (set = single_set (dep_insn))
3196 && GET_CODE (SET_DEST (set)) == REG
3197 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3198 return 20;
3199
644459d0 3200 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3201 scheduler makes every insn in a block anti-dependent on the final
3202 jump_insn. We adjust here so higher cost insns will get scheduled
3203 earlier. */
5a976006 3204 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3205 return insn_cost (dep_insn) - 3;
5a976006 3206
644459d0 3207 return cost;
3208}
3209\f
3210/* Create a CONST_DOUBLE from a string. */
3211struct rtx_def *
3212spu_float_const (const char *string, enum machine_mode mode)
3213{
3214 REAL_VALUE_TYPE value;
3215 value = REAL_VALUE_ATOF (string, mode);
3216 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3217}
3218
644459d0 3219int
3220spu_constant_address_p (rtx x)
3221{
3222 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3223 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3224 || GET_CODE (x) == HIGH);
3225}
3226
3227static enum spu_immediate
3228which_immediate_load (HOST_WIDE_INT val)
3229{
3230 gcc_assert (val == trunc_int_for_mode (val, SImode));
3231
3232 if (val >= -0x8000 && val <= 0x7fff)
3233 return SPU_IL;
3234 if (val >= 0 && val <= 0x3ffff)
3235 return SPU_ILA;
3236 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3237 return SPU_ILH;
3238 if ((val & 0xffff) == 0)
3239 return SPU_ILHU;
3240
3241 return SPU_NONE;
3242}
3243
dea01258 3244/* Return true when OP can be loaded by one of the il instructions, or
3245 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3246int
3247immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3248{
3249 if (CONSTANT_P (op))
3250 {
3251 enum immediate_class c = classify_immediate (op, mode);
5df189be 3252 return c == IC_IL1 || c == IC_IL1s
3072d30e 3253 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3254 }
3255 return 0;
3256}
3257
3258/* Return true if the first SIZE bytes of arr is a constant that can be
3259 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3260 represent the size and offset of the instruction to use. */
3261static int
3262cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3263{
3264 int cpat, run, i, start;
3265 cpat = 1;
3266 run = 0;
3267 start = -1;
3268 for (i = 0; i < size && cpat; i++)
3269 if (arr[i] != i+16)
3270 {
3271 if (!run)
3272 {
3273 start = i;
3274 if (arr[i] == 3)
3275 run = 1;
3276 else if (arr[i] == 2 && arr[i+1] == 3)
3277 run = 2;
3278 else if (arr[i] == 0)
3279 {
3280 while (arr[i+run] == run && i+run < 16)
3281 run++;
3282 if (run != 4 && run != 8)
3283 cpat = 0;
3284 }
3285 else
3286 cpat = 0;
3287 if ((i & (run-1)) != 0)
3288 cpat = 0;
3289 i += run;
3290 }
3291 else
3292 cpat = 0;
3293 }
b01a6dc3 3294 if (cpat && (run || size < 16))
dea01258 3295 {
3296 if (run == 0)
3297 run = 1;
3298 if (prun)
3299 *prun = run;
3300 if (pstart)
3301 *pstart = start == -1 ? 16-run : start;
3302 return 1;
3303 }
3304 return 0;
3305}
3306
3307/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3308 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3309static enum immediate_class
3310classify_immediate (rtx op, enum machine_mode mode)
644459d0 3311{
3312 HOST_WIDE_INT val;
3313 unsigned char arr[16];
5df189be 3314 int i, j, repeated, fsmbi, repeat;
dea01258 3315
3316 gcc_assert (CONSTANT_P (op));
3317
644459d0 3318 if (GET_MODE (op) != VOIDmode)
3319 mode = GET_MODE (op);
3320
dea01258 3321 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3322 if (!flag_pic
3323 && mode == V4SImode
dea01258 3324 && GET_CODE (op) == CONST_VECTOR
3325 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3326 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3327 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3328 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3329 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3330 op = CONST_VECTOR_ELT (op, 0);
644459d0 3331
dea01258 3332 switch (GET_CODE (op))
3333 {
3334 case SYMBOL_REF:
3335 case LABEL_REF:
3336 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3337
dea01258 3338 case CONST:
0cfc65d4 3339 /* We can never know if the resulting address fits in 18 bits and can be
3340 loaded with ila. For now, assume the address will not overflow if
3341 the displacement is "small" (fits 'K' constraint). */
3342 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3343 {
3344 rtx sym = XEXP (XEXP (op, 0), 0);
3345 rtx cst = XEXP (XEXP (op, 0), 1);
3346
3347 if (GET_CODE (sym) == SYMBOL_REF
3348 && GET_CODE (cst) == CONST_INT
3349 && satisfies_constraint_K (cst))
3350 return IC_IL1s;
3351 }
3352 return IC_IL2s;
644459d0 3353
dea01258 3354 case HIGH:
3355 return IC_IL1s;
3356
3357 case CONST_VECTOR:
3358 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3359 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3360 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3361 return IC_POOL;
3362 /* Fall through. */
3363
3364 case CONST_INT:
3365 case CONST_DOUBLE:
3366 constant_to_array (mode, op, arr);
644459d0 3367
dea01258 3368 /* Check that each 4-byte slot is identical. */
3369 repeated = 1;
3370 for (i = 4; i < 16; i += 4)
3371 for (j = 0; j < 4; j++)
3372 if (arr[j] != arr[i + j])
3373 repeated = 0;
3374
3375 if (repeated)
3376 {
3377 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3378 val = trunc_int_for_mode (val, SImode);
3379
3380 if (which_immediate_load (val) != SPU_NONE)
3381 return IC_IL1;
3382 }
3383
3384 /* Any mode of 2 bytes or smaller can be loaded with an il
3385 instruction. */
3386 gcc_assert (GET_MODE_SIZE (mode) > 2);
3387
3388 fsmbi = 1;
5df189be 3389 repeat = 0;
dea01258 3390 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3391 if (arr[i] != 0 && repeat == 0)
3392 repeat = arr[i];
3393 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3394 fsmbi = 0;
3395 if (fsmbi)
5df189be 3396 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3397
3398 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3399 return IC_CPAT;
3400
3401 if (repeated)
3402 return IC_IL2;
3403
3404 return IC_POOL;
3405 default:
3406 break;
3407 }
3408 gcc_unreachable ();
644459d0 3409}
3410
3411static enum spu_immediate
3412which_logical_immediate (HOST_WIDE_INT val)
3413{
3414 gcc_assert (val == trunc_int_for_mode (val, SImode));
3415
3416 if (val >= -0x200 && val <= 0x1ff)
3417 return SPU_ORI;
3418 if (val >= 0 && val <= 0xffff)
3419 return SPU_IOHL;
3420 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3421 {
3422 val = trunc_int_for_mode (val, HImode);
3423 if (val >= -0x200 && val <= 0x1ff)
3424 return SPU_ORHI;
3425 if ((val & 0xff) == ((val >> 8) & 0xff))
3426 {
3427 val = trunc_int_for_mode (val, QImode);
3428 if (val >= -0x200 && val <= 0x1ff)
3429 return SPU_ORBI;
3430 }
3431 }
3432 return SPU_NONE;
3433}
3434
5df189be 3435/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3436 CONST_DOUBLEs. */
3437static int
3438const_vector_immediate_p (rtx x)
3439{
3440 int i;
3441 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3442 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3443 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3444 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3445 return 0;
3446 return 1;
3447}
3448
644459d0 3449int
3450logical_immediate_p (rtx op, enum machine_mode mode)
3451{
3452 HOST_WIDE_INT val;
3453 unsigned char arr[16];
3454 int i, j;
3455
3456 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3457 || GET_CODE (op) == CONST_VECTOR);
3458
5df189be 3459 if (GET_CODE (op) == CONST_VECTOR
3460 && !const_vector_immediate_p (op))
3461 return 0;
3462
644459d0 3463 if (GET_MODE (op) != VOIDmode)
3464 mode = GET_MODE (op);
3465
3466 constant_to_array (mode, op, arr);
3467
3468 /* Check that bytes are repeated. */
3469 for (i = 4; i < 16; i += 4)
3470 for (j = 0; j < 4; j++)
3471 if (arr[j] != arr[i + j])
3472 return 0;
3473
3474 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3475 val = trunc_int_for_mode (val, SImode);
3476
3477 i = which_logical_immediate (val);
3478 return i != SPU_NONE && i != SPU_IOHL;
3479}
3480
3481int
3482iohl_immediate_p (rtx op, enum machine_mode mode)
3483{
3484 HOST_WIDE_INT val;
3485 unsigned char arr[16];
3486 int i, j;
3487
3488 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3489 || GET_CODE (op) == CONST_VECTOR);
3490
5df189be 3491 if (GET_CODE (op) == CONST_VECTOR
3492 && !const_vector_immediate_p (op))
3493 return 0;
3494
644459d0 3495 if (GET_MODE (op) != VOIDmode)
3496 mode = GET_MODE (op);
3497
3498 constant_to_array (mode, op, arr);
3499
3500 /* Check that bytes are repeated. */
3501 for (i = 4; i < 16; i += 4)
3502 for (j = 0; j < 4; j++)
3503 if (arr[j] != arr[i + j])
3504 return 0;
3505
3506 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3507 val = trunc_int_for_mode (val, SImode);
3508
3509 return val >= 0 && val <= 0xffff;
3510}
3511
3512int
3513arith_immediate_p (rtx op, enum machine_mode mode,
3514 HOST_WIDE_INT low, HOST_WIDE_INT high)
3515{
3516 HOST_WIDE_INT val;
3517 unsigned char arr[16];
3518 int bytes, i, j;
3519
3520 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3521 || GET_CODE (op) == CONST_VECTOR);
3522
5df189be 3523 if (GET_CODE (op) == CONST_VECTOR
3524 && !const_vector_immediate_p (op))
3525 return 0;
3526
644459d0 3527 if (GET_MODE (op) != VOIDmode)
3528 mode = GET_MODE (op);
3529
3530 constant_to_array (mode, op, arr);
3531
3532 if (VECTOR_MODE_P (mode))
3533 mode = GET_MODE_INNER (mode);
3534
3535 bytes = GET_MODE_SIZE (mode);
3536 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3537
3538 /* Check that bytes are repeated. */
3539 for (i = bytes; i < 16; i += bytes)
3540 for (j = 0; j < bytes; j++)
3541 if (arr[j] != arr[i + j])
3542 return 0;
3543
3544 val = arr[0];
3545 for (j = 1; j < bytes; j++)
3546 val = (val << 8) | arr[j];
3547
3548 val = trunc_int_for_mode (val, mode);
3549
3550 return val >= low && val <= high;
3551}
3552
56c7bfc2 3553/* TRUE when op is an immediate and an exact power of 2, and given that
3554 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3555 all entries must be the same. */
3556bool
3557exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3558{
3559 enum machine_mode int_mode;
3560 HOST_WIDE_INT val;
3561 unsigned char arr[16];
3562 int bytes, i, j;
3563
3564 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3565 || GET_CODE (op) == CONST_VECTOR);
3566
3567 if (GET_CODE (op) == CONST_VECTOR
3568 && !const_vector_immediate_p (op))
3569 return 0;
3570
3571 if (GET_MODE (op) != VOIDmode)
3572 mode = GET_MODE (op);
3573
3574 constant_to_array (mode, op, arr);
3575
3576 if (VECTOR_MODE_P (mode))
3577 mode = GET_MODE_INNER (mode);
3578
3579 bytes = GET_MODE_SIZE (mode);
3580 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3581
3582 /* Check that bytes are repeated. */
3583 for (i = bytes; i < 16; i += bytes)
3584 for (j = 0; j < bytes; j++)
3585 if (arr[j] != arr[i + j])
3586 return 0;
3587
3588 val = arr[0];
3589 for (j = 1; j < bytes; j++)
3590 val = (val << 8) | arr[j];
3591
3592 val = trunc_int_for_mode (val, int_mode);
3593
3594 /* Currently, we only handle SFmode */
3595 gcc_assert (mode == SFmode);
3596 if (mode == SFmode)
3597 {
3598 int exp = (val >> 23) - 127;
3599 return val > 0 && (val & 0x007fffff) == 0
3600 && exp >= low && exp <= high;
3601 }
3602 return FALSE;
3603}
3604
644459d0 3605/* We accept:
5b865faf 3606 - any 32-bit constant (SImode, SFmode)
644459d0 3607 - any constant that can be generated with fsmbi (any mode)
5b865faf 3608 - a 64-bit constant where the high and low bits are identical
644459d0 3609 (DImode, DFmode)
5b865faf 3610 - a 128-bit constant where the four 32-bit words match. */
644459d0 3611int
3612spu_legitimate_constant_p (rtx x)
3613{
5df189be 3614 if (GET_CODE (x) == HIGH)
3615 x = XEXP (x, 0);
644459d0 3616 /* V4SI with all identical symbols is valid. */
5df189be 3617 if (!flag_pic
3618 && GET_MODE (x) == V4SImode
644459d0 3619 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3620 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3621 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3622 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3623 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3624 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3625
5df189be 3626 if (GET_CODE (x) == CONST_VECTOR
3627 && !const_vector_immediate_p (x))
3628 return 0;
644459d0 3629 return 1;
3630}
3631
3632/* Valid address are:
3633 - symbol_ref, label_ref, const
3634 - reg
9d98604b 3635 - reg + const_int, where const_int is 16 byte aligned
644459d0 3636 - reg + reg, alignment doesn't matter
3637 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3638 ignore the 4 least significant bits of the const. We only care about
3639 16 byte modes because the expand phase will change all smaller MEM
3640 references to TImode. */
3641static bool
3642spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3643 rtx x, bool reg_ok_strict)
644459d0 3644{
9d98604b 3645 int aligned = GET_MODE_SIZE (mode) >= 16;
3646 if (aligned
3647 && GET_CODE (x) == AND
644459d0 3648 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3649 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3650 x = XEXP (x, 0);
3651 switch (GET_CODE (x))
3652 {
644459d0 3653 case LABEL_REF:
9d98604b 3654 case SYMBOL_REF:
644459d0 3655 case CONST:
9d98604b 3656 return !TARGET_LARGE_MEM;
644459d0 3657
3658 case CONST_INT:
3659 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3660
3661 case SUBREG:
3662 x = XEXP (x, 0);
9d98604b 3663 if (REG_P (x))
3664 return 0;
644459d0 3665
3666 case REG:
3667 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3668
3669 case PLUS:
3670 case LO_SUM:
3671 {
3672 rtx op0 = XEXP (x, 0);
3673 rtx op1 = XEXP (x, 1);
3674 if (GET_CODE (op0) == SUBREG)
3675 op0 = XEXP (op0, 0);
3676 if (GET_CODE (op1) == SUBREG)
3677 op1 = XEXP (op1, 0);
644459d0 3678 if (GET_CODE (op0) == REG
3679 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3680 && GET_CODE (op1) == CONST_INT
3681 && INTVAL (op1) >= -0x2000
3682 && INTVAL (op1) <= 0x1fff
9d98604b 3683 && (!aligned || (INTVAL (op1) & 15) == 0))
3684 return TRUE;
644459d0 3685 if (GET_CODE (op0) == REG
3686 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3687 && GET_CODE (op1) == REG
3688 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3689 return TRUE;
644459d0 3690 }
3691 break;
3692
3693 default:
3694 break;
3695 }
9d98604b 3696 return FALSE;
644459d0 3697}
3698
3699/* When the address is reg + const_int, force the const_int into a
fa7637bd 3700 register. */
644459d0 3701rtx
3702spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3703 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3704{
3705 rtx op0, op1;
3706 /* Make sure both operands are registers. */
3707 if (GET_CODE (x) == PLUS)
3708 {
3709 op0 = XEXP (x, 0);
3710 op1 = XEXP (x, 1);
3711 if (ALIGNED_SYMBOL_REF_P (op0))
3712 {
3713 op0 = force_reg (Pmode, op0);
3714 mark_reg_pointer (op0, 128);
3715 }
3716 else if (GET_CODE (op0) != REG)
3717 op0 = force_reg (Pmode, op0);
3718 if (ALIGNED_SYMBOL_REF_P (op1))
3719 {
3720 op1 = force_reg (Pmode, op1);
3721 mark_reg_pointer (op1, 128);
3722 }
3723 else if (GET_CODE (op1) != REG)
3724 op1 = force_reg (Pmode, op1);
3725 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3726 }
41e3a0c7 3727 return x;
644459d0 3728}
3729
3730/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3731 struct attribute_spec.handler. */
3732static tree
3733spu_handle_fndecl_attribute (tree * node,
3734 tree name,
3735 tree args ATTRIBUTE_UNUSED,
3736 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3737{
3738 if (TREE_CODE (*node) != FUNCTION_DECL)
3739 {
67a779df 3740 warning (0, "%qE attribute only applies to functions",
3741 name);
644459d0 3742 *no_add_attrs = true;
3743 }
3744
3745 return NULL_TREE;
3746}
3747
3748/* Handle the "vector" attribute. */
3749static tree
3750spu_handle_vector_attribute (tree * node, tree name,
3751 tree args ATTRIBUTE_UNUSED,
3752 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3753{
3754 tree type = *node, result = NULL_TREE;
3755 enum machine_mode mode;
3756 int unsigned_p;
3757
3758 while (POINTER_TYPE_P (type)
3759 || TREE_CODE (type) == FUNCTION_TYPE
3760 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3761 type = TREE_TYPE (type);
3762
3763 mode = TYPE_MODE (type);
3764
3765 unsigned_p = TYPE_UNSIGNED (type);
3766 switch (mode)
3767 {
3768 case DImode:
3769 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3770 break;
3771 case SImode:
3772 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3773 break;
3774 case HImode:
3775 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3776 break;
3777 case QImode:
3778 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3779 break;
3780 case SFmode:
3781 result = V4SF_type_node;
3782 break;
3783 case DFmode:
3784 result = V2DF_type_node;
3785 break;
3786 default:
3787 break;
3788 }
3789
3790 /* Propagate qualifiers attached to the element type
3791 onto the vector type. */
3792 if (result && result != type && TYPE_QUALS (type))
3793 result = build_qualified_type (result, TYPE_QUALS (type));
3794
3795 *no_add_attrs = true; /* No need to hang on to the attribute. */
3796
3797 if (!result)
67a779df 3798 warning (0, "%qE attribute ignored", name);
644459d0 3799 else
d991e6e8 3800 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3801
3802 return NULL_TREE;
3803}
3804
f2b32076 3805/* Return nonzero if FUNC is a naked function. */
644459d0 3806static int
3807spu_naked_function_p (tree func)
3808{
3809 tree a;
3810
3811 if (TREE_CODE (func) != FUNCTION_DECL)
3812 abort ();
3813
3814 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3815 return a != NULL_TREE;
3816}
3817
3818int
3819spu_initial_elimination_offset (int from, int to)
3820{
3821 int saved_regs_size = spu_saved_regs_size ();
3822 int sp_offset = 0;
abe32cce 3823 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3824 || get_frame_size () || saved_regs_size)
3825 sp_offset = STACK_POINTER_OFFSET;
3826 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3827 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3828 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3829 return get_frame_size ();
644459d0 3830 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3831 return sp_offset + crtl->outgoing_args_size
644459d0 3832 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3833 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3834 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3835 else
3836 gcc_unreachable ();
644459d0 3837}
3838
3839rtx
fb80456a 3840spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3841{
3842 enum machine_mode mode = TYPE_MODE (type);
3843 int byte_size = ((mode == BLKmode)
3844 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3845
3846 /* Make sure small structs are left justified in a register. */
3847 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3848 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3849 {
3850 enum machine_mode smode;
3851 rtvec v;
3852 int i;
3853 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3854 int n = byte_size / UNITS_PER_WORD;
3855 v = rtvec_alloc (nregs);
3856 for (i = 0; i < n; i++)
3857 {
3858 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3859 gen_rtx_REG (TImode,
3860 FIRST_RETURN_REGNUM
3861 + i),
3862 GEN_INT (UNITS_PER_WORD * i));
3863 byte_size -= UNITS_PER_WORD;
3864 }
3865
3866 if (n < nregs)
3867 {
3868 if (byte_size < 4)
3869 byte_size = 4;
3870 smode =
3871 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3872 RTVEC_ELT (v, n) =
3873 gen_rtx_EXPR_LIST (VOIDmode,
3874 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3875 GEN_INT (UNITS_PER_WORD * n));
3876 }
3877 return gen_rtx_PARALLEL (mode, v);
3878 }
3879 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3880}
3881
3882rtx
3883spu_function_arg (CUMULATIVE_ARGS cum,
3884 enum machine_mode mode,
3885 tree type, int named ATTRIBUTE_UNUSED)
3886{
3887 int byte_size;
3888
3889 if (cum >= MAX_REGISTER_ARGS)
3890 return 0;
3891
3892 byte_size = ((mode == BLKmode)
3893 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3894
3895 /* The ABI does not allow parameters to be passed partially in
3896 reg and partially in stack. */
3897 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3898 return 0;
3899
3900 /* Make sure small structs are left justified in a register. */
3901 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3902 && byte_size < UNITS_PER_WORD && byte_size > 0)
3903 {
3904 enum machine_mode smode;
3905 rtx gr_reg;
3906 if (byte_size < 4)
3907 byte_size = 4;
3908 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3909 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3910 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3911 const0_rtx);
3912 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3913 }
3914 else
3915 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3916}
3917
3918/* Variable sized types are passed by reference. */
3919static bool
3920spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3921 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3922 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3923{
3924 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3925}
3926\f
3927
3928/* Var args. */
3929
3930/* Create and return the va_list datatype.
3931
3932 On SPU, va_list is an array type equivalent to
3933
3934 typedef struct __va_list_tag
3935 {
3936 void *__args __attribute__((__aligned(16)));
3937 void *__skip __attribute__((__aligned(16)));
3938
3939 } va_list[1];
3940
fa7637bd 3941 where __args points to the arg that will be returned by the next
644459d0 3942 va_arg(), and __skip points to the previous stack frame such that
3943 when __args == __skip we should advance __args by 32 bytes. */
3944static tree
3945spu_build_builtin_va_list (void)
3946{
3947 tree f_args, f_skip, record, type_decl;
3948 bool owp;
3949
3950 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3951
3952 type_decl =
54e46243 3953 build_decl (BUILTINS_LOCATION,
3954 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3955
54e46243 3956 f_args = build_decl (BUILTINS_LOCATION,
3957 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3958 f_skip = build_decl (BUILTINS_LOCATION,
3959 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3960
3961 DECL_FIELD_CONTEXT (f_args) = record;
3962 DECL_ALIGN (f_args) = 128;
3963 DECL_USER_ALIGN (f_args) = 1;
3964
3965 DECL_FIELD_CONTEXT (f_skip) = record;
3966 DECL_ALIGN (f_skip) = 128;
3967 DECL_USER_ALIGN (f_skip) = 1;
3968
3969 TREE_CHAIN (record) = type_decl;
3970 TYPE_NAME (record) = type_decl;
3971 TYPE_FIELDS (record) = f_args;
3972 TREE_CHAIN (f_args) = f_skip;
3973
3974 /* We know this is being padded and we want it too. It is an internal
3975 type so hide the warnings from the user. */
3976 owp = warn_padded;
3977 warn_padded = false;
3978
3979 layout_type (record);
3980
3981 warn_padded = owp;
3982
3983 /* The correct type is an array type of one element. */
3984 return build_array_type (record, build_index_type (size_zero_node));
3985}
3986
3987/* Implement va_start by filling the va_list structure VALIST.
3988 NEXTARG points to the first anonymous stack argument.
3989
3990 The following global variables are used to initialize
3991 the va_list structure:
3992
abe32cce 3993 crtl->args.info;
644459d0 3994 the CUMULATIVE_ARGS for this function
3995
abe32cce 3996 crtl->args.arg_offset_rtx:
644459d0 3997 holds the offset of the first anonymous stack argument
3998 (relative to the virtual arg pointer). */
3999
8a58ed0a 4000static void
644459d0 4001spu_va_start (tree valist, rtx nextarg)
4002{
4003 tree f_args, f_skip;
4004 tree args, skip, t;
4005
4006 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4007 f_skip = TREE_CHAIN (f_args);
4008
4009 valist = build_va_arg_indirect_ref (valist);
4010 args =
4011 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4012 skip =
4013 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4014
4015 /* Find the __args area. */
4016 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4017 if (crtl->args.pretend_args_size > 0)
0de36bdb 4018 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4019 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4020 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4021 TREE_SIDE_EFFECTS (t) = 1;
4022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4023
4024 /* Find the __skip area. */
4025 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4026 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4027 size_int (crtl->args.pretend_args_size
0de36bdb 4028 - STACK_POINTER_OFFSET));
75a70cf9 4029 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4030 TREE_SIDE_EFFECTS (t) = 1;
4031 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4032}
4033
4034/* Gimplify va_arg by updating the va_list structure
4035 VALIST as required to retrieve an argument of type
4036 TYPE, and returning that argument.
4037
4038 ret = va_arg(VALIST, TYPE);
4039
4040 generates code equivalent to:
4041
4042 paddedsize = (sizeof(TYPE) + 15) & -16;
4043 if (VALIST.__args + paddedsize > VALIST.__skip
4044 && VALIST.__args <= VALIST.__skip)
4045 addr = VALIST.__skip + 32;
4046 else
4047 addr = VALIST.__args;
4048 VALIST.__args = addr + paddedsize;
4049 ret = *(TYPE *)addr;
4050 */
4051static tree
75a70cf9 4052spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4053 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4054{
4055 tree f_args, f_skip;
4056 tree args, skip;
4057 HOST_WIDE_INT size, rsize;
4058 tree paddedsize, addr, tmp;
4059 bool pass_by_reference_p;
4060
4061 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4062 f_skip = TREE_CHAIN (f_args);
4063
4064 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4065 args =
4066 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4067 skip =
4068 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4069
4070 addr = create_tmp_var (ptr_type_node, "va_arg");
4071 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4072
4073 /* if an object is dynamically sized, a pointer to it is passed
4074 instead of the object itself. */
4075 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4076 false);
4077 if (pass_by_reference_p)
4078 type = build_pointer_type (type);
4079 size = int_size_in_bytes (type);
4080 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4081
4082 /* build conditional expression to calculate addr. The expression
4083 will be gimplified later. */
0de36bdb 4084 paddedsize = size_int (rsize);
75a70cf9 4085 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4086 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4087 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4088 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4089 unshare_expr (skip)));
644459d0 4090
4091 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4092 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4093 size_int (32)), unshare_expr (args));
644459d0 4094
75a70cf9 4095 gimplify_assign (addr, tmp, pre_p);
644459d0 4096
4097 /* update VALIST.__args */
0de36bdb 4098 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4099 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4100
4101 addr = fold_convert (build_pointer_type (type), addr);
4102
4103 if (pass_by_reference_p)
4104 addr = build_va_arg_indirect_ref (addr);
4105
4106 return build_va_arg_indirect_ref (addr);
4107}
4108
4109/* Save parameter registers starting with the register that corresponds
4110 to the first unnamed parameters. If the first unnamed parameter is
4111 in the stack then save no registers. Set pretend_args_size to the
4112 amount of space needed to save the registers. */
4113void
4114spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4115 tree type, int *pretend_size, int no_rtl)
4116{
4117 if (!no_rtl)
4118 {
4119 rtx tmp;
4120 int regno;
4121 int offset;
4122 int ncum = *cum;
4123
4124 /* cum currently points to the last named argument, we want to
4125 start at the next argument. */
4126 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4127
4128 offset = -STACK_POINTER_OFFSET;
4129 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4130 {
4131 tmp = gen_frame_mem (V4SImode,
4132 plus_constant (virtual_incoming_args_rtx,
4133 offset));
4134 emit_move_insn (tmp,
4135 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4136 offset += 16;
4137 }
4138 *pretend_size = offset + STACK_POINTER_OFFSET;
4139 }
4140}
4141\f
4142void
4143spu_conditional_register_usage (void)
4144{
4145 if (flag_pic)
4146 {
4147 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4148 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4149 }
644459d0 4150}
4151
9d98604b 4152/* This is called any time we inspect the alignment of a register for
4153 addresses. */
644459d0 4154static int
9d98604b 4155reg_aligned_for_addr (rtx x)
644459d0 4156{
9d98604b 4157 int regno =
4158 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4159 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4160}
4161
69ced2d6 4162/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4163 into its SYMBOL_REF_FLAGS. */
4164static void
4165spu_encode_section_info (tree decl, rtx rtl, int first)
4166{
4167 default_encode_section_info (decl, rtl, first);
4168
4169 /* If a variable has a forced alignment to < 16 bytes, mark it with
4170 SYMBOL_FLAG_ALIGN1. */
4171 if (TREE_CODE (decl) == VAR_DECL
4172 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4173 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4174}
4175
644459d0 4176/* Return TRUE if we are certain the mem refers to a complete object
4177 which is both 16-byte aligned and padded to a 16-byte boundary. This
4178 would make it safe to store with a single instruction.
4179 We guarantee the alignment and padding for static objects by aligning
4180 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4181 FIXME: We currently cannot guarantee this for objects on the stack
4182 because assign_parm_setup_stack calls assign_stack_local with the
4183 alignment of the parameter mode and in that case the alignment never
4184 gets adjusted by LOCAL_ALIGNMENT. */
4185static int
4186store_with_one_insn_p (rtx mem)
4187{
9d98604b 4188 enum machine_mode mode = GET_MODE (mem);
644459d0 4189 rtx addr = XEXP (mem, 0);
9d98604b 4190 if (mode == BLKmode)
644459d0 4191 return 0;
9d98604b 4192 if (GET_MODE_SIZE (mode) >= 16)
4193 return 1;
644459d0 4194 /* Only static objects. */
4195 if (GET_CODE (addr) == SYMBOL_REF)
4196 {
4197 /* We use the associated declaration to make sure the access is
fa7637bd 4198 referring to the whole object.
644459d0 4199 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4200 if it is necessary. Will there be cases where one exists, and
4201 the other does not? Will there be cases where both exist, but
4202 have different types? */
4203 tree decl = MEM_EXPR (mem);
4204 if (decl
4205 && TREE_CODE (decl) == VAR_DECL
4206 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4207 return 1;
4208 decl = SYMBOL_REF_DECL (addr);
4209 if (decl
4210 && TREE_CODE (decl) == VAR_DECL
4211 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4212 return 1;
4213 }
4214 return 0;
4215}
4216
9d98604b 4217/* Return 1 when the address is not valid for a simple load and store as
4218 required by the '_mov*' patterns. We could make this less strict
4219 for loads, but we prefer mem's to look the same so they are more
4220 likely to be merged. */
4221static int
4222address_needs_split (rtx mem)
4223{
4224 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4225 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4226 || !(store_with_one_insn_p (mem)
4227 || mem_is_padded_component_ref (mem))))
4228 return 1;
4229
4230 return 0;
4231}
4232
644459d0 4233int
4234spu_expand_mov (rtx * ops, enum machine_mode mode)
4235{
4236 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4237 abort ();
4238
4239 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4240 {
4241 rtx from = SUBREG_REG (ops[1]);
8d72495d 4242 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4243
4244 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4245 && GET_MODE_CLASS (imode) == MODE_INT
4246 && subreg_lowpart_p (ops[1]));
4247
4248 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4249 imode = SImode;
4250 if (imode != GET_MODE (from))
4251 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4252
4253 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4254 {
99bdde56 4255 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4256 emit_insn (GEN_FCN (icode) (ops[0], from));
4257 }
4258 else
4259 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4260 return 1;
4261 }
4262
4263 /* At least one of the operands needs to be a register. */
4264 if ((reload_in_progress | reload_completed) == 0
4265 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4266 {
4267 rtx temp = force_reg (mode, ops[1]);
4268 emit_move_insn (ops[0], temp);
4269 return 1;
4270 }
4271 if (reload_in_progress || reload_completed)
4272 {
dea01258 4273 if (CONSTANT_P (ops[1]))
4274 return spu_split_immediate (ops);
644459d0 4275 return 0;
4276 }
9d98604b 4277
4278 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4279 extend them. */
4280 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4281 {
9d98604b 4282 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4283 if (val != INTVAL (ops[1]))
644459d0 4284 {
9d98604b 4285 emit_move_insn (ops[0], GEN_INT (val));
4286 return 1;
644459d0 4287 }
4288 }
9d98604b 4289 if (MEM_P (ops[0]))
4290 return spu_split_store (ops);
4291 if (MEM_P (ops[1]))
4292 return spu_split_load (ops);
4293
644459d0 4294 return 0;
4295}
4296
9d98604b 4297static void
4298spu_convert_move (rtx dst, rtx src)
644459d0 4299{
9d98604b 4300 enum machine_mode mode = GET_MODE (dst);
4301 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4302 rtx reg;
4303 gcc_assert (GET_MODE (src) == TImode);
4304 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4305 emit_insn (gen_rtx_SET (VOIDmode, reg,
4306 gen_rtx_TRUNCATE (int_mode,
4307 gen_rtx_LSHIFTRT (TImode, src,
4308 GEN_INT (int_mode == DImode ? 64 : 96)))));
4309 if (int_mode != mode)
4310 {
4311 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4312 emit_move_insn (dst, reg);
4313 }
4314}
644459d0 4315
9d98604b 4316/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4317 the address from SRC and SRC+16. Return a REG or CONST_INT that
4318 specifies how many bytes to rotate the loaded registers, plus any
4319 extra from EXTRA_ROTQBY. The address and rotate amounts are
4320 normalized to improve merging of loads and rotate computations. */
4321static rtx
4322spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4323{
4324 rtx addr = XEXP (src, 0);
4325 rtx p0, p1, rot, addr0, addr1;
4326 int rot_amt;
644459d0 4327
4328 rot = 0;
4329 rot_amt = 0;
9d98604b 4330
4331 if (MEM_ALIGN (src) >= 128)
4332 /* Address is already aligned; simply perform a TImode load. */ ;
4333 else if (GET_CODE (addr) == PLUS)
644459d0 4334 {
4335 /* 8 cases:
4336 aligned reg + aligned reg => lqx
4337 aligned reg + unaligned reg => lqx, rotqby
4338 aligned reg + aligned const => lqd
4339 aligned reg + unaligned const => lqd, rotqbyi
4340 unaligned reg + aligned reg => lqx, rotqby
4341 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4342 unaligned reg + aligned const => lqd, rotqby
4343 unaligned reg + unaligned const -> not allowed by legitimate address
4344 */
4345 p0 = XEXP (addr, 0);
4346 p1 = XEXP (addr, 1);
9d98604b 4347 if (!reg_aligned_for_addr (p0))
644459d0 4348 {
9d98604b 4349 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4350 {
9d98604b 4351 rot = gen_reg_rtx (SImode);
4352 emit_insn (gen_addsi3 (rot, p0, p1));
4353 }
4354 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4355 {
4356 if (INTVAL (p1) > 0
4357 && REG_POINTER (p0)
4358 && INTVAL (p1) * BITS_PER_UNIT
4359 < REGNO_POINTER_ALIGN (REGNO (p0)))
4360 {
4361 rot = gen_reg_rtx (SImode);
4362 emit_insn (gen_addsi3 (rot, p0, p1));
4363 addr = p0;
4364 }
4365 else
4366 {
4367 rtx x = gen_reg_rtx (SImode);
4368 emit_move_insn (x, p1);
4369 if (!spu_arith_operand (p1, SImode))
4370 p1 = x;
4371 rot = gen_reg_rtx (SImode);
4372 emit_insn (gen_addsi3 (rot, p0, p1));
4373 addr = gen_rtx_PLUS (Pmode, p0, x);
4374 }
644459d0 4375 }
4376 else
4377 rot = p0;
4378 }
4379 else
4380 {
4381 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4382 {
4383 rot_amt = INTVAL (p1) & 15;
9d98604b 4384 if (INTVAL (p1) & -16)
4385 {
4386 p1 = GEN_INT (INTVAL (p1) & -16);
4387 addr = gen_rtx_PLUS (SImode, p0, p1);
4388 }
4389 else
4390 addr = p0;
644459d0 4391 }
9d98604b 4392 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4393 rot = p1;
4394 }
4395 }
9d98604b 4396 else if (REG_P (addr))
644459d0 4397 {
9d98604b 4398 if (!reg_aligned_for_addr (addr))
644459d0 4399 rot = addr;
4400 }
4401 else if (GET_CODE (addr) == CONST)
4402 {
4403 if (GET_CODE (XEXP (addr, 0)) == PLUS
4404 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4405 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4406 {
4407 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4408 if (rot_amt & -16)
4409 addr = gen_rtx_CONST (Pmode,
4410 gen_rtx_PLUS (Pmode,
4411 XEXP (XEXP (addr, 0), 0),
4412 GEN_INT (rot_amt & -16)));
4413 else
4414 addr = XEXP (XEXP (addr, 0), 0);
4415 }
4416 else
9d98604b 4417 {
4418 rot = gen_reg_rtx (Pmode);
4419 emit_move_insn (rot, addr);
4420 }
644459d0 4421 }
4422 else if (GET_CODE (addr) == CONST_INT)
4423 {
4424 rot_amt = INTVAL (addr);
4425 addr = GEN_INT (rot_amt & -16);
4426 }
4427 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4428 {
4429 rot = gen_reg_rtx (Pmode);
4430 emit_move_insn (rot, addr);
4431 }
644459d0 4432
9d98604b 4433 rot_amt += extra_rotby;
644459d0 4434
4435 rot_amt &= 15;
4436
4437 if (rot && rot_amt)
4438 {
9d98604b 4439 rtx x = gen_reg_rtx (SImode);
4440 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4441 rot = x;
644459d0 4442 rot_amt = 0;
4443 }
9d98604b 4444 if (!rot && rot_amt)
4445 rot = GEN_INT (rot_amt);
4446
4447 addr0 = copy_rtx (addr);
4448 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4449 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4450
4451 if (dst1)
4452 {
4453 addr1 = plus_constant (copy_rtx (addr), 16);
4454 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4455 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4456 }
644459d0 4457
9d98604b 4458 return rot;
4459}
4460
4461int
4462spu_split_load (rtx * ops)
4463{
4464 enum machine_mode mode = GET_MODE (ops[0]);
4465 rtx addr, load, rot;
4466 int rot_amt;
644459d0 4467
9d98604b 4468 if (GET_MODE_SIZE (mode) >= 16)
4469 return 0;
644459d0 4470
9d98604b 4471 addr = XEXP (ops[1], 0);
4472 gcc_assert (GET_CODE (addr) != AND);
4473
4474 if (!address_needs_split (ops[1]))
4475 {
4476 ops[1] = change_address (ops[1], TImode, addr);
4477 load = gen_reg_rtx (TImode);
4478 emit_insn (gen__movti (load, ops[1]));
4479 spu_convert_move (ops[0], load);
4480 return 1;
4481 }
4482
4483 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4484
4485 load = gen_reg_rtx (TImode);
4486 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4487
4488 if (rot)
4489 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4490
9d98604b 4491 spu_convert_move (ops[0], load);
4492 return 1;
644459d0 4493}
4494
9d98604b 4495int
644459d0 4496spu_split_store (rtx * ops)
4497{
4498 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4499 rtx reg;
644459d0 4500 rtx addr, p0, p1, p1_lo, smem;
4501 int aform;
4502 int scalar;
4503
9d98604b 4504 if (GET_MODE_SIZE (mode) >= 16)
4505 return 0;
4506
644459d0 4507 addr = XEXP (ops[0], 0);
9d98604b 4508 gcc_assert (GET_CODE (addr) != AND);
4509
4510 if (!address_needs_split (ops[0]))
4511 {
4512 reg = gen_reg_rtx (TImode);
4513 emit_insn (gen_spu_convert (reg, ops[1]));
4514 ops[0] = change_address (ops[0], TImode, addr);
4515 emit_move_insn (ops[0], reg);
4516 return 1;
4517 }
644459d0 4518
4519 if (GET_CODE (addr) == PLUS)
4520 {
4521 /* 8 cases:
4522 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4523 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4524 aligned reg + aligned const => lqd, c?d, shuf, stqx
4525 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4526 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4527 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4528 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4529 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4530 */
4531 aform = 0;
4532 p0 = XEXP (addr, 0);
4533 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4534 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4535 {
4536 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4537 if (reg_aligned_for_addr (p0))
4538 {
4539 p1 = GEN_INT (INTVAL (p1) & -16);
4540 if (p1 == const0_rtx)
4541 addr = p0;
4542 else
4543 addr = gen_rtx_PLUS (SImode, p0, p1);
4544 }
4545 else
4546 {
4547 rtx x = gen_reg_rtx (SImode);
4548 emit_move_insn (x, p1);
4549 addr = gen_rtx_PLUS (SImode, p0, x);
4550 }
644459d0 4551 }
4552 }
9d98604b 4553 else if (REG_P (addr))
644459d0 4554 {
4555 aform = 0;
4556 p0 = addr;
4557 p1 = p1_lo = const0_rtx;
4558 }
4559 else
4560 {
4561 aform = 1;
4562 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4563 p1 = 0; /* aform doesn't use p1 */
4564 p1_lo = addr;
4565 if (ALIGNED_SYMBOL_REF_P (addr))
4566 p1_lo = const0_rtx;
9d98604b 4567 else if (GET_CODE (addr) == CONST
4568 && GET_CODE (XEXP (addr, 0)) == PLUS
4569 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4570 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4571 {
9d98604b 4572 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4573 if ((v & -16) != 0)
4574 addr = gen_rtx_CONST (Pmode,
4575 gen_rtx_PLUS (Pmode,
4576 XEXP (XEXP (addr, 0), 0),
4577 GEN_INT (v & -16)));
4578 else
4579 addr = XEXP (XEXP (addr, 0), 0);
4580 p1_lo = GEN_INT (v & 15);
644459d0 4581 }
4582 else if (GET_CODE (addr) == CONST_INT)
4583 {
4584 p1_lo = GEN_INT (INTVAL (addr) & 15);
4585 addr = GEN_INT (INTVAL (addr) & -16);
4586 }
9d98604b 4587 else
4588 {
4589 p1_lo = gen_reg_rtx (SImode);
4590 emit_move_insn (p1_lo, addr);
4591 }
644459d0 4592 }
4593
9d98604b 4594 reg = gen_reg_rtx (TImode);
e04cf423 4595
644459d0 4596 scalar = store_with_one_insn_p (ops[0]);
4597 if (!scalar)
4598 {
4599 /* We could copy the flags from the ops[0] MEM to mem here,
4600 We don't because we want this load to be optimized away if
4601 possible, and copying the flags will prevent that in certain
4602 cases, e.g. consider the volatile flag. */
4603
9d98604b 4604 rtx pat = gen_reg_rtx (TImode);
e04cf423 4605 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4606 set_mem_alias_set (lmem, 0);
4607 emit_insn (gen_movti (reg, lmem));
644459d0 4608
9d98604b 4609 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4610 p0 = stack_pointer_rtx;
4611 if (!p1_lo)
4612 p1_lo = const0_rtx;
4613
4614 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4615 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4616 }
644459d0 4617 else
4618 {
4619 if (GET_CODE (ops[1]) == REG)
4620 emit_insn (gen_spu_convert (reg, ops[1]));
4621 else if (GET_CODE (ops[1]) == SUBREG)
4622 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4623 else
4624 abort ();
4625 }
4626
4627 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4628 emit_insn (gen_ashlti3
4629 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4630
9d98604b 4631 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4632 /* We can't use the previous alias set because the memory has changed
4633 size and can potentially overlap objects of other types. */
4634 set_mem_alias_set (smem, 0);
4635
e04cf423 4636 emit_insn (gen_movti (smem, reg));
9d98604b 4637 return 1;
644459d0 4638}
4639
4640/* Return TRUE if X is MEM which is a struct member reference
4641 and the member can safely be loaded and stored with a single
4642 instruction because it is padded. */
4643static int
4644mem_is_padded_component_ref (rtx x)
4645{
4646 tree t = MEM_EXPR (x);
4647 tree r;
4648 if (!t || TREE_CODE (t) != COMPONENT_REF)
4649 return 0;
4650 t = TREE_OPERAND (t, 1);
4651 if (!t || TREE_CODE (t) != FIELD_DECL
4652 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4653 return 0;
4654 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4655 r = DECL_FIELD_CONTEXT (t);
4656 if (!r || TREE_CODE (r) != RECORD_TYPE)
4657 return 0;
4658 /* Make sure they are the same mode */
4659 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4660 return 0;
4661 /* If there are no following fields then the field alignment assures
fa7637bd 4662 the structure is padded to the alignment which means this field is
4663 padded too. */
644459d0 4664 if (TREE_CHAIN (t) == 0)
4665 return 1;
4666 /* If the following field is also aligned then this field will be
4667 padded. */
4668 t = TREE_CHAIN (t);
4669 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4670 return 1;
4671 return 0;
4672}
4673
c7b91b14 4674/* Parse the -mfixed-range= option string. */
4675static void
4676fix_range (const char *const_str)
4677{
4678 int i, first, last;
4679 char *str, *dash, *comma;
4680
4681 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4682 REG2 are either register names or register numbers. The effect
4683 of this option is to mark the registers in the range from REG1 to
4684 REG2 as ``fixed'' so they won't be used by the compiler. */
4685
4686 i = strlen (const_str);
4687 str = (char *) alloca (i + 1);
4688 memcpy (str, const_str, i + 1);
4689
4690 while (1)
4691 {
4692 dash = strchr (str, '-');
4693 if (!dash)
4694 {
4695 warning (0, "value of -mfixed-range must have form REG1-REG2");
4696 return;
4697 }
4698 *dash = '\0';
4699 comma = strchr (dash + 1, ',');
4700 if (comma)
4701 *comma = '\0';
4702
4703 first = decode_reg_name (str);
4704 if (first < 0)
4705 {
4706 warning (0, "unknown register name: %s", str);
4707 return;
4708 }
4709
4710 last = decode_reg_name (dash + 1);
4711 if (last < 0)
4712 {
4713 warning (0, "unknown register name: %s", dash + 1);
4714 return;
4715 }
4716
4717 *dash = '-';
4718
4719 if (first > last)
4720 {
4721 warning (0, "%s-%s is an empty range", str, dash + 1);
4722 return;
4723 }
4724
4725 for (i = first; i <= last; ++i)
4726 fixed_regs[i] = call_used_regs[i] = 1;
4727
4728 if (!comma)
4729 break;
4730
4731 *comma = ',';
4732 str = comma + 1;
4733 }
4734}
4735
644459d0 4736/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4737 can be generated using the fsmbi instruction. */
4738int
4739fsmbi_const_p (rtx x)
4740{
dea01258 4741 if (CONSTANT_P (x))
4742 {
5df189be 4743 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4744 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4745 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4746 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4747 }
4748 return 0;
4749}
4750
4751/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4752 can be generated using the cbd, chd, cwd or cdd instruction. */
4753int
4754cpat_const_p (rtx x, enum machine_mode mode)
4755{
4756 if (CONSTANT_P (x))
4757 {
4758 enum immediate_class c = classify_immediate (x, mode);
4759 return c == IC_CPAT;
4760 }
4761 return 0;
4762}
644459d0 4763
dea01258 4764rtx
4765gen_cpat_const (rtx * ops)
4766{
4767 unsigned char dst[16];
4768 int i, offset, shift, isize;
4769 if (GET_CODE (ops[3]) != CONST_INT
4770 || GET_CODE (ops[2]) != CONST_INT
4771 || (GET_CODE (ops[1]) != CONST_INT
4772 && GET_CODE (ops[1]) != REG))
4773 return 0;
4774 if (GET_CODE (ops[1]) == REG
4775 && (!REG_POINTER (ops[1])
4776 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4777 return 0;
644459d0 4778
4779 for (i = 0; i < 16; i++)
dea01258 4780 dst[i] = i + 16;
4781 isize = INTVAL (ops[3]);
4782 if (isize == 1)
4783 shift = 3;
4784 else if (isize == 2)
4785 shift = 2;
4786 else
4787 shift = 0;
4788 offset = (INTVAL (ops[2]) +
4789 (GET_CODE (ops[1]) ==
4790 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4791 for (i = 0; i < isize; i++)
4792 dst[offset + i] = i + shift;
4793 return array_to_constant (TImode, dst);
644459d0 4794}
4795
4796/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4797 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4798 than 16 bytes, the value is repeated across the rest of the array. */
4799void
4800constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4801{
4802 HOST_WIDE_INT val;
4803 int i, j, first;
4804
4805 memset (arr, 0, 16);
4806 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4807 if (GET_CODE (x) == CONST_INT
4808 || (GET_CODE (x) == CONST_DOUBLE
4809 && (mode == SFmode || mode == DFmode)))
4810 {
4811 gcc_assert (mode != VOIDmode && mode != BLKmode);
4812
4813 if (GET_CODE (x) == CONST_DOUBLE)
4814 val = const_double_to_hwint (x);
4815 else
4816 val = INTVAL (x);
4817 first = GET_MODE_SIZE (mode) - 1;
4818 for (i = first; i >= 0; i--)
4819 {
4820 arr[i] = val & 0xff;
4821 val >>= 8;
4822 }
4823 /* Splat the constant across the whole array. */
4824 for (j = 0, i = first + 1; i < 16; i++)
4825 {
4826 arr[i] = arr[j];
4827 j = (j == first) ? 0 : j + 1;
4828 }
4829 }
4830 else if (GET_CODE (x) == CONST_DOUBLE)
4831 {
4832 val = CONST_DOUBLE_LOW (x);
4833 for (i = 15; i >= 8; i--)
4834 {
4835 arr[i] = val & 0xff;
4836 val >>= 8;
4837 }
4838 val = CONST_DOUBLE_HIGH (x);
4839 for (i = 7; i >= 0; i--)
4840 {
4841 arr[i] = val & 0xff;
4842 val >>= 8;
4843 }
4844 }
4845 else if (GET_CODE (x) == CONST_VECTOR)
4846 {
4847 int units;
4848 rtx elt;
4849 mode = GET_MODE_INNER (mode);
4850 units = CONST_VECTOR_NUNITS (x);
4851 for (i = 0; i < units; i++)
4852 {
4853 elt = CONST_VECTOR_ELT (x, i);
4854 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4855 {
4856 if (GET_CODE (elt) == CONST_DOUBLE)
4857 val = const_double_to_hwint (elt);
4858 else
4859 val = INTVAL (elt);
4860 first = GET_MODE_SIZE (mode) - 1;
4861 if (first + i * GET_MODE_SIZE (mode) > 16)
4862 abort ();
4863 for (j = first; j >= 0; j--)
4864 {
4865 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4866 val >>= 8;
4867 }
4868 }
4869 }
4870 }
4871 else
4872 gcc_unreachable();
4873}
4874
4875/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4876 smaller than 16 bytes, use the bytes that would represent that value
4877 in a register, e.g., for QImode return the value of arr[3]. */
4878rtx
4879array_to_constant (enum machine_mode mode, unsigned char arr[16])
4880{
4881 enum machine_mode inner_mode;
4882 rtvec v;
4883 int units, size, i, j, k;
4884 HOST_WIDE_INT val;
4885
4886 if (GET_MODE_CLASS (mode) == MODE_INT
4887 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4888 {
4889 j = GET_MODE_SIZE (mode);
4890 i = j < 4 ? 4 - j : 0;
4891 for (val = 0; i < j; i++)
4892 val = (val << 8) | arr[i];
4893 val = trunc_int_for_mode (val, mode);
4894 return GEN_INT (val);
4895 }
4896
4897 if (mode == TImode)
4898 {
4899 HOST_WIDE_INT high;
4900 for (i = high = 0; i < 8; i++)
4901 high = (high << 8) | arr[i];
4902 for (i = 8, val = 0; i < 16; i++)
4903 val = (val << 8) | arr[i];
4904 return immed_double_const (val, high, TImode);
4905 }
4906 if (mode == SFmode)
4907 {
4908 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4909 val = trunc_int_for_mode (val, SImode);
171b6d22 4910 return hwint_to_const_double (SFmode, val);
644459d0 4911 }
4912 if (mode == DFmode)
4913 {
1f915911 4914 for (i = 0, val = 0; i < 8; i++)
4915 val = (val << 8) | arr[i];
171b6d22 4916 return hwint_to_const_double (DFmode, val);
644459d0 4917 }
4918
4919 if (!VECTOR_MODE_P (mode))
4920 abort ();
4921
4922 units = GET_MODE_NUNITS (mode);
4923 size = GET_MODE_UNIT_SIZE (mode);
4924 inner_mode = GET_MODE_INNER (mode);
4925 v = rtvec_alloc (units);
4926
4927 for (k = i = 0; i < units; ++i)
4928 {
4929 val = 0;
4930 for (j = 0; j < size; j++, k++)
4931 val = (val << 8) | arr[k];
4932
4933 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4934 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4935 else
4936 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4937 }
4938 if (k > 16)
4939 abort ();
4940
4941 return gen_rtx_CONST_VECTOR (mode, v);
4942}
4943
4944static void
4945reloc_diagnostic (rtx x)
4946{
4947 tree loc_decl, decl = 0;
4948 const char *msg;
4949 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4950 return;
4951
4952 if (GET_CODE (x) == SYMBOL_REF)
4953 decl = SYMBOL_REF_DECL (x);
4954 else if (GET_CODE (x) == CONST
4955 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4956 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4957
4958 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4959 if (decl && !DECL_P (decl))
4960 decl = 0;
4961
4962 /* We use last_assemble_variable_decl to get line information. It's
4963 not always going to be right and might not even be close, but will
4964 be right for the more common cases. */
5df189be 4965 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4966 loc_decl = decl;
4967 else
4968 loc_decl = last_assemble_variable_decl;
4969
4970 /* The decl could be a string constant. */
4971 if (decl && DECL_P (decl))
4972 msg = "%Jcreating run-time relocation for %qD";
4973 else
4974 msg = "creating run-time relocation";
4975
99369027 4976 if (TARGET_WARN_RELOC)
644459d0 4977 warning (0, msg, loc_decl, decl);
99369027 4978 else
4979 error (msg, loc_decl, decl);
644459d0 4980}
4981
4982/* Hook into assemble_integer so we can generate an error for run-time
4983 relocations. The SPU ABI disallows them. */
4984static bool
4985spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4986{
4987 /* By default run-time relocations aren't supported, but we allow them
4988 in case users support it in their own run-time loader. And we provide
4989 a warning for those users that don't. */
4990 if ((GET_CODE (x) == SYMBOL_REF)
4991 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4992 reloc_diagnostic (x);
4993
4994 return default_assemble_integer (x, size, aligned_p);
4995}
4996
4997static void
4998spu_asm_globalize_label (FILE * file, const char *name)
4999{
5000 fputs ("\t.global\t", file);
5001 assemble_name (file, name);
5002 fputs ("\n", file);
5003}
5004
5005static bool
f529eb25 5006spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5007 bool speed ATTRIBUTE_UNUSED)
644459d0 5008{
5009 enum machine_mode mode = GET_MODE (x);
5010 int cost = COSTS_N_INSNS (2);
5011
5012 /* Folding to a CONST_VECTOR will use extra space but there might
5013 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5014 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5015 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5016 because this cost will only be compared against a single insn.
5017 if (code == CONST_VECTOR)
5018 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5019 */
5020
5021 /* Use defaults for float operations. Not accurate but good enough. */
5022 if (mode == DFmode)
5023 {
5024 *total = COSTS_N_INSNS (13);
5025 return true;
5026 }
5027 if (mode == SFmode)
5028 {
5029 *total = COSTS_N_INSNS (6);
5030 return true;
5031 }
5032 switch (code)
5033 {
5034 case CONST_INT:
5035 if (satisfies_constraint_K (x))
5036 *total = 0;
5037 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5038 *total = COSTS_N_INSNS (1);
5039 else
5040 *total = COSTS_N_INSNS (3);
5041 return true;
5042
5043 case CONST:
5044 *total = COSTS_N_INSNS (3);
5045 return true;
5046
5047 case LABEL_REF:
5048 case SYMBOL_REF:
5049 *total = COSTS_N_INSNS (0);
5050 return true;
5051
5052 case CONST_DOUBLE:
5053 *total = COSTS_N_INSNS (5);
5054 return true;
5055
5056 case FLOAT_EXTEND:
5057 case FLOAT_TRUNCATE:
5058 case FLOAT:
5059 case UNSIGNED_FLOAT:
5060 case FIX:
5061 case UNSIGNED_FIX:
5062 *total = COSTS_N_INSNS (7);
5063 return true;
5064
5065 case PLUS:
5066 if (mode == TImode)
5067 {
5068 *total = COSTS_N_INSNS (9);
5069 return true;
5070 }
5071 break;
5072
5073 case MULT:
5074 cost =
5075 GET_CODE (XEXP (x, 0)) ==
5076 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5077 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5078 {
5079 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5080 {
5081 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5082 cost = COSTS_N_INSNS (14);
5083 if ((val & 0xffff) == 0)
5084 cost = COSTS_N_INSNS (9);
5085 else if (val > 0 && val < 0x10000)
5086 cost = COSTS_N_INSNS (11);
5087 }
5088 }
5089 *total = cost;
5090 return true;
5091 case DIV:
5092 case UDIV:
5093 case MOD:
5094 case UMOD:
5095 *total = COSTS_N_INSNS (20);
5096 return true;
5097 case ROTATE:
5098 case ROTATERT:
5099 case ASHIFT:
5100 case ASHIFTRT:
5101 case LSHIFTRT:
5102 *total = COSTS_N_INSNS (4);
5103 return true;
5104 case UNSPEC:
5105 if (XINT (x, 1) == UNSPEC_CONVERT)
5106 *total = COSTS_N_INSNS (0);
5107 else
5108 *total = COSTS_N_INSNS (4);
5109 return true;
5110 }
5111 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5112 if (GET_MODE_CLASS (mode) == MODE_INT
5113 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5114 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5115 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5116 *total = cost;
5117 return true;
5118}
5119
1bd43494 5120static enum machine_mode
5121spu_unwind_word_mode (void)
644459d0 5122{
1bd43494 5123 return SImode;
644459d0 5124}
5125
5126/* Decide whether we can make a sibling call to a function. DECL is the
5127 declaration of the function being targeted by the call and EXP is the
5128 CALL_EXPR representing the call. */
5129static bool
5130spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5131{
5132 return decl && !TARGET_LARGE_MEM;
5133}
5134
5135/* We need to correctly update the back chain pointer and the Available
5136 Stack Size (which is in the second slot of the sp register.) */
5137void
5138spu_allocate_stack (rtx op0, rtx op1)
5139{
5140 HOST_WIDE_INT v;
5141 rtx chain = gen_reg_rtx (V4SImode);
5142 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5143 rtx sp = gen_reg_rtx (V4SImode);
5144 rtx splatted = gen_reg_rtx (V4SImode);
5145 rtx pat = gen_reg_rtx (TImode);
5146
5147 /* copy the back chain so we can save it back again. */
5148 emit_move_insn (chain, stack_bot);
5149
5150 op1 = force_reg (SImode, op1);
5151
5152 v = 0x1020300010203ll;
5153 emit_move_insn (pat, immed_double_const (v, v, TImode));
5154 emit_insn (gen_shufb (splatted, op1, op1, pat));
5155
5156 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5157 emit_insn (gen_subv4si3 (sp, sp, splatted));
5158
5159 if (flag_stack_check)
5160 {
5161 rtx avail = gen_reg_rtx(SImode);
5162 rtx result = gen_reg_rtx(SImode);
5163 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5164 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5165 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5166 }
5167
5168 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5169
5170 emit_move_insn (stack_bot, chain);
5171
5172 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5173}
5174
5175void
5176spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5177{
5178 static unsigned char arr[16] =
5179 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5180 rtx temp = gen_reg_rtx (SImode);
5181 rtx temp2 = gen_reg_rtx (SImode);
5182 rtx temp3 = gen_reg_rtx (V4SImode);
5183 rtx temp4 = gen_reg_rtx (V4SImode);
5184 rtx pat = gen_reg_rtx (TImode);
5185 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5186
5187 /* Restore the backchain from the first word, sp from the second. */
5188 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5189 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5190
5191 emit_move_insn (pat, array_to_constant (TImode, arr));
5192
5193 /* Compute Available Stack Size for sp */
5194 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5195 emit_insn (gen_shufb (temp3, temp, temp, pat));
5196
5197 /* Compute Available Stack Size for back chain */
5198 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5199 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5200 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5201
5202 emit_insn (gen_addv4si3 (sp, sp, temp3));
5203 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5204}
5205
5206static void
5207spu_init_libfuncs (void)
5208{
5209 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5210 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5211 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5212 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5213 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5214 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5215 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5216 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5217 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5218 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5219 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5220
5221 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5222 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5223
5224 set_optab_libfunc (smul_optab, TImode, "__multi3");
5225 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5226 set_optab_libfunc (smod_optab, TImode, "__modti3");
5227 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5228 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5229 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5230}
5231
5232/* Make a subreg, stripping any existing subreg. We could possibly just
5233 call simplify_subreg, but in this case we know what we want. */
5234rtx
5235spu_gen_subreg (enum machine_mode mode, rtx x)
5236{
5237 if (GET_CODE (x) == SUBREG)
5238 x = SUBREG_REG (x);
5239 if (GET_MODE (x) == mode)
5240 return x;
5241 return gen_rtx_SUBREG (mode, x, 0);
5242}
5243
5244static bool
fb80456a 5245spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5246{
5247 return (TYPE_MODE (type) == BLKmode
5248 && ((type) == 0
5249 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5250 || int_size_in_bytes (type) >
5251 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5252}
5253\f
5254/* Create the built-in types and functions */
5255
c2233b46 5256enum spu_function_code
5257{
5258#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5259#include "spu-builtins.def"
5260#undef DEF_BUILTIN
5261 NUM_SPU_BUILTINS
5262};
5263
5264extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5265
644459d0 5266struct spu_builtin_description spu_builtins[] = {
5267#define DEF_BUILTIN(fcode, icode, name, type, params) \
5268 {fcode, icode, name, type, params, NULL_TREE},
5269#include "spu-builtins.def"
5270#undef DEF_BUILTIN
5271};
5272
5273static void
5274spu_init_builtins (void)
5275{
5276 struct spu_builtin_description *d;
5277 unsigned int i;
5278
5279 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5280 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5281 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5282 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5283 V4SF_type_node = build_vector_type (float_type_node, 4);
5284 V2DF_type_node = build_vector_type (double_type_node, 2);
5285
5286 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5287 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5288 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5289 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5290
c4ecce0c 5291 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5292
5293 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5294 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5295 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5296 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5297 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5298 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5299 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5300 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5301 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5302 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5303 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5304 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5305
5306 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5307 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5308 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5309 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5310 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5311 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5312 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5313 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5314
5315 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5316 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5317
5318 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5319
5320 spu_builtin_types[SPU_BTI_PTR] =
5321 build_pointer_type (build_qualified_type
5322 (void_type_node,
5323 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5324
5325 /* For each builtin we build a new prototype. The tree code will make
5326 sure nodes are shared. */
5327 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5328 {
5329 tree p;
5330 char name[64]; /* build_function will make a copy. */
5331 int parm;
5332
5333 if (d->name == 0)
5334 continue;
5335
5dfbd18f 5336 /* Find last parm. */
644459d0 5337 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5338 ;
644459d0 5339
5340 p = void_list_node;
5341 while (parm > 1)
5342 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5343
5344 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5345
5346 sprintf (name, "__builtin_%s", d->name);
5347 d->fndecl =
5348 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5349 NULL, NULL_TREE);
a76866d3 5350 if (d->fcode == SPU_MASK_FOR_LOAD)
5351 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5352
5353 /* These builtins don't throw. */
5354 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5355 }
5356}
5357
cf31d486 5358void
5359spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5360{
5361 static unsigned char arr[16] =
5362 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5363
5364 rtx temp = gen_reg_rtx (Pmode);
5365 rtx temp2 = gen_reg_rtx (V4SImode);
5366 rtx temp3 = gen_reg_rtx (V4SImode);
5367 rtx pat = gen_reg_rtx (TImode);
5368 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5369
5370 emit_move_insn (pat, array_to_constant (TImode, arr));
5371
5372 /* Restore the sp. */
5373 emit_move_insn (temp, op1);
5374 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5375
5376 /* Compute available stack size for sp. */
5377 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5378 emit_insn (gen_shufb (temp3, temp, temp, pat));
5379
5380 emit_insn (gen_addv4si3 (sp, sp, temp3));
5381 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5382}
5383
644459d0 5384int
5385spu_safe_dma (HOST_WIDE_INT channel)
5386{
006e4b96 5387 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5388}
5389
5390void
5391spu_builtin_splats (rtx ops[])
5392{
5393 enum machine_mode mode = GET_MODE (ops[0]);
5394 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5395 {
5396 unsigned char arr[16];
5397 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5398 emit_move_insn (ops[0], array_to_constant (mode, arr));
5399 }
644459d0 5400 else
5401 {
5402 rtx reg = gen_reg_rtx (TImode);
5403 rtx shuf;
5404 if (GET_CODE (ops[1]) != REG
5405 && GET_CODE (ops[1]) != SUBREG)
5406 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5407 switch (mode)
5408 {
5409 case V2DImode:
5410 case V2DFmode:
5411 shuf =
5412 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5413 TImode);
5414 break;
5415 case V4SImode:
5416 case V4SFmode:
5417 shuf =
5418 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5419 TImode);
5420 break;
5421 case V8HImode:
5422 shuf =
5423 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5424 TImode);
5425 break;
5426 case V16QImode:
5427 shuf =
5428 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5429 TImode);
5430 break;
5431 default:
5432 abort ();
5433 }
5434 emit_move_insn (reg, shuf);
5435 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5436 }
5437}
5438
5439void
5440spu_builtin_extract (rtx ops[])
5441{
5442 enum machine_mode mode;
5443 rtx rot, from, tmp;
5444
5445 mode = GET_MODE (ops[1]);
5446
5447 if (GET_CODE (ops[2]) == CONST_INT)
5448 {
5449 switch (mode)
5450 {
5451 case V16QImode:
5452 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5453 break;
5454 case V8HImode:
5455 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5456 break;
5457 case V4SFmode:
5458 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5459 break;
5460 case V4SImode:
5461 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5462 break;
5463 case V2DImode:
5464 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5465 break;
5466 case V2DFmode:
5467 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5468 break;
5469 default:
5470 abort ();
5471 }
5472 return;
5473 }
5474
5475 from = spu_gen_subreg (TImode, ops[1]);
5476 rot = gen_reg_rtx (TImode);
5477 tmp = gen_reg_rtx (SImode);
5478
5479 switch (mode)
5480 {
5481 case V16QImode:
5482 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5483 break;
5484 case V8HImode:
5485 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5486 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5487 break;
5488 case V4SFmode:
5489 case V4SImode:
5490 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5491 break;
5492 case V2DImode:
5493 case V2DFmode:
5494 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5495 break;
5496 default:
5497 abort ();
5498 }
5499 emit_insn (gen_rotqby_ti (rot, from, tmp));
5500
5501 emit_insn (gen_spu_convert (ops[0], rot));
5502}
5503
5504void
5505spu_builtin_insert (rtx ops[])
5506{
5507 enum machine_mode mode = GET_MODE (ops[0]);
5508 enum machine_mode imode = GET_MODE_INNER (mode);
5509 rtx mask = gen_reg_rtx (TImode);
5510 rtx offset;
5511
5512 if (GET_CODE (ops[3]) == CONST_INT)
5513 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5514 else
5515 {
5516 offset = gen_reg_rtx (SImode);
5517 emit_insn (gen_mulsi3
5518 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5519 }
5520 emit_insn (gen_cpat
5521 (mask, stack_pointer_rtx, offset,
5522 GEN_INT (GET_MODE_SIZE (imode))));
5523 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5524}
5525
5526void
5527spu_builtin_promote (rtx ops[])
5528{
5529 enum machine_mode mode, imode;
5530 rtx rot, from, offset;
5531 HOST_WIDE_INT pos;
5532
5533 mode = GET_MODE (ops[0]);
5534 imode = GET_MODE_INNER (mode);
5535
5536 from = gen_reg_rtx (TImode);
5537 rot = spu_gen_subreg (TImode, ops[0]);
5538
5539 emit_insn (gen_spu_convert (from, ops[1]));
5540
5541 if (GET_CODE (ops[2]) == CONST_INT)
5542 {
5543 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5544 if (GET_MODE_SIZE (imode) < 4)
5545 pos += 4 - GET_MODE_SIZE (imode);
5546 offset = GEN_INT (pos & 15);
5547 }
5548 else
5549 {
5550 offset = gen_reg_rtx (SImode);
5551 switch (mode)
5552 {
5553 case V16QImode:
5554 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5555 break;
5556 case V8HImode:
5557 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5558 emit_insn (gen_addsi3 (offset, offset, offset));
5559 break;
5560 case V4SFmode:
5561 case V4SImode:
5562 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5563 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5564 break;
5565 case V2DImode:
5566 case V2DFmode:
5567 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5568 break;
5569 default:
5570 abort ();
5571 }
5572 }
5573 emit_insn (gen_rotqby_ti (rot, from, offset));
5574}
5575
5576void
5577spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5578{
5579 rtx shuf = gen_reg_rtx (V4SImode);
5580 rtx insn = gen_reg_rtx (V4SImode);
5581 rtx shufc;
5582 rtx insnc;
5583 rtx mem;
5584
5585 fnaddr = force_reg (SImode, fnaddr);
5586 cxt = force_reg (SImode, cxt);
5587
5588 if (TARGET_LARGE_MEM)
5589 {
5590 rtx rotl = gen_reg_rtx (V4SImode);
5591 rtx mask = gen_reg_rtx (V4SImode);
5592 rtx bi = gen_reg_rtx (SImode);
5593 unsigned char shufa[16] = {
5594 2, 3, 0, 1, 18, 19, 16, 17,
5595 0, 1, 2, 3, 16, 17, 18, 19
5596 };
5597 unsigned char insna[16] = {
5598 0x41, 0, 0, 79,
5599 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5600 0x60, 0x80, 0, 79,
5601 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5602 };
5603
5604 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5605 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5606
5607 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5608 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5609 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5610 emit_insn (gen_selb (insn, insnc, rotl, mask));
5611
5612 mem = memory_address (Pmode, tramp);
5613 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5614
5615 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5616 mem = memory_address (Pmode, plus_constant (tramp, 16));
5617 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5618 }
5619 else
5620 {
5621 rtx scxt = gen_reg_rtx (SImode);
5622 rtx sfnaddr = gen_reg_rtx (SImode);
5623 unsigned char insna[16] = {
5624 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5625 0x30, 0, 0, 0,
5626 0, 0, 0, 0,
5627 0, 0, 0, 0
5628 };
5629
5630 shufc = gen_reg_rtx (TImode);
5631 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5632
5633 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5634 fits 18 bits and the last 4 are zeros. This will be true if
5635 the stack pointer is initialized to 0x3fff0 at program start,
5636 otherwise the ila instruction will be garbage. */
5637
5638 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5639 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5640 emit_insn (gen_cpat
5641 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5642 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5643 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5644
5645 mem = memory_address (Pmode, tramp);
5646 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5647
5648 }
5649 emit_insn (gen_sync ());
5650}
5651
5652void
5653spu_expand_sign_extend (rtx ops[])
5654{
5655 unsigned char arr[16];
5656 rtx pat = gen_reg_rtx (TImode);
5657 rtx sign, c;
5658 int i, last;
5659 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5660 if (GET_MODE (ops[1]) == QImode)
5661 {
5662 sign = gen_reg_rtx (HImode);
5663 emit_insn (gen_extendqihi2 (sign, ops[1]));
5664 for (i = 0; i < 16; i++)
5665 arr[i] = 0x12;
5666 arr[last] = 0x13;
5667 }
5668 else
5669 {
5670 for (i = 0; i < 16; i++)
5671 arr[i] = 0x10;
5672 switch (GET_MODE (ops[1]))
5673 {
5674 case HImode:
5675 sign = gen_reg_rtx (SImode);
5676 emit_insn (gen_extendhisi2 (sign, ops[1]));
5677 arr[last] = 0x03;
5678 arr[last - 1] = 0x02;
5679 break;
5680 case SImode:
5681 sign = gen_reg_rtx (SImode);
5682 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5683 for (i = 0; i < 4; i++)
5684 arr[last - i] = 3 - i;
5685 break;
5686 case DImode:
5687 sign = gen_reg_rtx (SImode);
5688 c = gen_reg_rtx (SImode);
5689 emit_insn (gen_spu_convert (c, ops[1]));
5690 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5691 for (i = 0; i < 8; i++)
5692 arr[last - i] = 7 - i;
5693 break;
5694 default:
5695 abort ();
5696 }
5697 }
5698 emit_move_insn (pat, array_to_constant (TImode, arr));
5699 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5700}
5701
5702/* expand vector initialization. If there are any constant parts,
5703 load constant parts first. Then load any non-constant parts. */
5704void
5705spu_expand_vector_init (rtx target, rtx vals)
5706{
5707 enum machine_mode mode = GET_MODE (target);
5708 int n_elts = GET_MODE_NUNITS (mode);
5709 int n_var = 0;
5710 bool all_same = true;
790c536c 5711 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5712 int i;
5713
5714 first = XVECEXP (vals, 0, 0);
5715 for (i = 0; i < n_elts; ++i)
5716 {
5717 x = XVECEXP (vals, 0, i);
e442af0b 5718 if (!(CONST_INT_P (x)
5719 || GET_CODE (x) == CONST_DOUBLE
5720 || GET_CODE (x) == CONST_FIXED))
644459d0 5721 ++n_var;
5722 else
5723 {
5724 if (first_constant == NULL_RTX)
5725 first_constant = x;
5726 }
5727 if (i > 0 && !rtx_equal_p (x, first))
5728 all_same = false;
5729 }
5730
5731 /* if all elements are the same, use splats to repeat elements */
5732 if (all_same)
5733 {
5734 if (!CONSTANT_P (first)
5735 && !register_operand (first, GET_MODE (x)))
5736 first = force_reg (GET_MODE (first), first);
5737 emit_insn (gen_spu_splats (target, first));
5738 return;
5739 }
5740
5741 /* load constant parts */
5742 if (n_var != n_elts)
5743 {
5744 if (n_var == 0)
5745 {
5746 emit_move_insn (target,
5747 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5748 }
5749 else
5750 {
5751 rtx constant_parts_rtx = copy_rtx (vals);
5752
5753 gcc_assert (first_constant != NULL_RTX);
5754 /* fill empty slots with the first constant, this increases
5755 our chance of using splats in the recursive call below. */
5756 for (i = 0; i < n_elts; ++i)
e442af0b 5757 {
5758 x = XVECEXP (constant_parts_rtx, 0, i);
5759 if (!(CONST_INT_P (x)
5760 || GET_CODE (x) == CONST_DOUBLE
5761 || GET_CODE (x) == CONST_FIXED))
5762 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5763 }
644459d0 5764
5765 spu_expand_vector_init (target, constant_parts_rtx);
5766 }
5767 }
5768
5769 /* load variable parts */
5770 if (n_var != 0)
5771 {
5772 rtx insert_operands[4];
5773
5774 insert_operands[0] = target;
5775 insert_operands[2] = target;
5776 for (i = 0; i < n_elts; ++i)
5777 {
5778 x = XVECEXP (vals, 0, i);
e442af0b 5779 if (!(CONST_INT_P (x)
5780 || GET_CODE (x) == CONST_DOUBLE
5781 || GET_CODE (x) == CONST_FIXED))
644459d0 5782 {
5783 if (!register_operand (x, GET_MODE (x)))
5784 x = force_reg (GET_MODE (x), x);
5785 insert_operands[1] = x;
5786 insert_operands[3] = GEN_INT (i);
5787 spu_builtin_insert (insert_operands);
5788 }
5789 }
5790 }
5791}
6352eedf 5792
5474166e 5793/* Return insn index for the vector compare instruction for given CODE,
5794 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5795
5796static int
5797get_vec_cmp_insn (enum rtx_code code,
5798 enum machine_mode dest_mode,
5799 enum machine_mode op_mode)
5800
5801{
5802 switch (code)
5803 {
5804 case EQ:
5805 if (dest_mode == V16QImode && op_mode == V16QImode)
5806 return CODE_FOR_ceq_v16qi;
5807 if (dest_mode == V8HImode && op_mode == V8HImode)
5808 return CODE_FOR_ceq_v8hi;
5809 if (dest_mode == V4SImode && op_mode == V4SImode)
5810 return CODE_FOR_ceq_v4si;
5811 if (dest_mode == V4SImode && op_mode == V4SFmode)
5812 return CODE_FOR_ceq_v4sf;
5813 if (dest_mode == V2DImode && op_mode == V2DFmode)
5814 return CODE_FOR_ceq_v2df;
5815 break;
5816 case GT:
5817 if (dest_mode == V16QImode && op_mode == V16QImode)
5818 return CODE_FOR_cgt_v16qi;
5819 if (dest_mode == V8HImode && op_mode == V8HImode)
5820 return CODE_FOR_cgt_v8hi;
5821 if (dest_mode == V4SImode && op_mode == V4SImode)
5822 return CODE_FOR_cgt_v4si;
5823 if (dest_mode == V4SImode && op_mode == V4SFmode)
5824 return CODE_FOR_cgt_v4sf;
5825 if (dest_mode == V2DImode && op_mode == V2DFmode)
5826 return CODE_FOR_cgt_v2df;
5827 break;
5828 case GTU:
5829 if (dest_mode == V16QImode && op_mode == V16QImode)
5830 return CODE_FOR_clgt_v16qi;
5831 if (dest_mode == V8HImode && op_mode == V8HImode)
5832 return CODE_FOR_clgt_v8hi;
5833 if (dest_mode == V4SImode && op_mode == V4SImode)
5834 return CODE_FOR_clgt_v4si;
5835 break;
5836 default:
5837 break;
5838 }
5839 return -1;
5840}
5841
5842/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5843 DMODE is expected destination mode. This is a recursive function. */
5844
5845static rtx
5846spu_emit_vector_compare (enum rtx_code rcode,
5847 rtx op0, rtx op1,
5848 enum machine_mode dmode)
5849{
5850 int vec_cmp_insn;
5851 rtx mask;
5852 enum machine_mode dest_mode;
5853 enum machine_mode op_mode = GET_MODE (op1);
5854
5855 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5856
5857 /* Floating point vector compare instructions uses destination V4SImode.
5858 Double floating point vector compare instructions uses destination V2DImode.
5859 Move destination to appropriate mode later. */
5860 if (dmode == V4SFmode)
5861 dest_mode = V4SImode;
5862 else if (dmode == V2DFmode)
5863 dest_mode = V2DImode;
5864 else
5865 dest_mode = dmode;
5866
5867 mask = gen_reg_rtx (dest_mode);
5868 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5869
5870 if (vec_cmp_insn == -1)
5871 {
5872 bool swap_operands = false;
5873 bool try_again = false;
5874 switch (rcode)
5875 {
5876 case LT:
5877 rcode = GT;
5878 swap_operands = true;
5879 try_again = true;
5880 break;
5881 case LTU:
5882 rcode = GTU;
5883 swap_operands = true;
5884 try_again = true;
5885 break;
5886 case NE:
5887 /* Treat A != B as ~(A==B). */
5888 {
5889 enum insn_code nor_code;
5890 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5891 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5892 gcc_assert (nor_code != CODE_FOR_nothing);
5893 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5894 if (dmode != dest_mode)
5895 {
5896 rtx temp = gen_reg_rtx (dest_mode);
5897 convert_move (temp, mask, 0);
5898 return temp;
5899 }
5900 return mask;
5901 }
5902 break;
5903 case GE:
5904 case GEU:
5905 case LE:
5906 case LEU:
5907 /* Try GT/GTU/LT/LTU OR EQ */
5908 {
5909 rtx c_rtx, eq_rtx;
5910 enum insn_code ior_code;
5911 enum rtx_code new_code;
5912
5913 switch (rcode)
5914 {
5915 case GE: new_code = GT; break;
5916 case GEU: new_code = GTU; break;
5917 case LE: new_code = LT; break;
5918 case LEU: new_code = LTU; break;
5919 default:
5920 gcc_unreachable ();
5921 }
5922
5923 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5924 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5925
99bdde56 5926 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5927 gcc_assert (ior_code != CODE_FOR_nothing);
5928 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5929 if (dmode != dest_mode)
5930 {
5931 rtx temp = gen_reg_rtx (dest_mode);
5932 convert_move (temp, mask, 0);
5933 return temp;
5934 }
5935 return mask;
5936 }
5937 break;
5938 default:
5939 gcc_unreachable ();
5940 }
5941
5942 /* You only get two chances. */
5943 if (try_again)
5944 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5945
5946 gcc_assert (vec_cmp_insn != -1);
5947
5948 if (swap_operands)
5949 {
5950 rtx tmp;
5951 tmp = op0;
5952 op0 = op1;
5953 op1 = tmp;
5954 }
5955 }
5956
5957 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5958 if (dmode != dest_mode)
5959 {
5960 rtx temp = gen_reg_rtx (dest_mode);
5961 convert_move (temp, mask, 0);
5962 return temp;
5963 }
5964 return mask;
5965}
5966
5967
5968/* Emit vector conditional expression.
5969 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5970 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5971
5972int
5973spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5974 rtx cond, rtx cc_op0, rtx cc_op1)
5975{
5976 enum machine_mode dest_mode = GET_MODE (dest);
5977 enum rtx_code rcode = GET_CODE (cond);
5978 rtx mask;
5979
5980 /* Get the vector mask for the given relational operations. */
5981 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5982
5983 emit_insn(gen_selb (dest, op2, op1, mask));
5984
5985 return 1;
5986}
5987
6352eedf 5988static rtx
5989spu_force_reg (enum machine_mode mode, rtx op)
5990{
5991 rtx x, r;
5992 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5993 {
5994 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5995 || GET_MODE (op) == BLKmode)
5996 return force_reg (mode, convert_to_mode (mode, op, 0));
5997 abort ();
5998 }
5999
6000 r = force_reg (GET_MODE (op), op);
6001 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6002 {
6003 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6004 if (x)
6005 return x;
6006 }
6007
6008 x = gen_reg_rtx (mode);
6009 emit_insn (gen_spu_convert (x, r));
6010 return x;
6011}
6012
6013static void
6014spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6015{
6016 HOST_WIDE_INT v = 0;
6017 int lsbits;
6018 /* Check the range of immediate operands. */
6019 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6020 {
6021 int range = p - SPU_BTI_7;
5df189be 6022
6023 if (!CONSTANT_P (op))
6352eedf 6024 error ("%s expects an integer literal in the range [%d, %d].",
6025 d->name,
6026 spu_builtin_range[range].low, spu_builtin_range[range].high);
6027
6028 if (GET_CODE (op) == CONST
6029 && (GET_CODE (XEXP (op, 0)) == PLUS
6030 || GET_CODE (XEXP (op, 0)) == MINUS))
6031 {
6032 v = INTVAL (XEXP (XEXP (op, 0), 1));
6033 op = XEXP (XEXP (op, 0), 0);
6034 }
6035 else if (GET_CODE (op) == CONST_INT)
6036 v = INTVAL (op);
5df189be 6037 else if (GET_CODE (op) == CONST_VECTOR
6038 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6039 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6040
6041 /* The default for v is 0 which is valid in every range. */
6042 if (v < spu_builtin_range[range].low
6043 || v > spu_builtin_range[range].high)
6044 error ("%s expects an integer literal in the range [%d, %d]. ("
6045 HOST_WIDE_INT_PRINT_DEC ")",
6046 d->name,
6047 spu_builtin_range[range].low, spu_builtin_range[range].high,
6048 v);
6352eedf 6049
6050 switch (p)
6051 {
6052 case SPU_BTI_S10_4:
6053 lsbits = 4;
6054 break;
6055 case SPU_BTI_U16_2:
6056 /* This is only used in lqa, and stqa. Even though the insns
6057 encode 16 bits of the address (all but the 2 least
6058 significant), only 14 bits are used because it is masked to
6059 be 16 byte aligned. */
6060 lsbits = 4;
6061 break;
6062 case SPU_BTI_S16_2:
6063 /* This is used for lqr and stqr. */
6064 lsbits = 2;
6065 break;
6066 default:
6067 lsbits = 0;
6068 }
6069
6070 if (GET_CODE (op) == LABEL_REF
6071 || (GET_CODE (op) == SYMBOL_REF
6072 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6073 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 6074 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6075 d->name);
6076 }
6077}
6078
6079
70ca06f8 6080static int
5df189be 6081expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6082 rtx target, rtx ops[])
6083{
bc620c5c 6084 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6085 int i = 0, a;
6352eedf 6086
6087 /* Expand the arguments into rtl. */
6088
6089 if (d->parm[0] != SPU_BTI_VOID)
6090 ops[i++] = target;
6091
70ca06f8 6092 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6093 {
5df189be 6094 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6095 if (arg == 0)
6096 abort ();
b9c74b4d 6097 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6098 }
70ca06f8 6099
6100 /* The insn pattern may have additional operands (SCRATCH).
6101 Return the number of actual non-SCRATCH operands. */
6102 gcc_assert (i <= insn_data[icode].n_operands);
6103 return i;
6352eedf 6104}
6105
6106static rtx
6107spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6108 tree exp, rtx target)
6352eedf 6109{
6110 rtx pat;
6111 rtx ops[8];
bc620c5c 6112 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6113 enum machine_mode mode, tmode;
6114 int i, p;
70ca06f8 6115 int n_operands;
6352eedf 6116 tree return_type;
6117
6118 /* Set up ops[] with values from arglist. */
70ca06f8 6119 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6120
6121 /* Handle the target operand which must be operand 0. */
6122 i = 0;
6123 if (d->parm[0] != SPU_BTI_VOID)
6124 {
6125
6126 /* We prefer the mode specified for the match_operand otherwise
6127 use the mode from the builtin function prototype. */
6128 tmode = insn_data[d->icode].operand[0].mode;
6129 if (tmode == VOIDmode)
6130 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6131
6132 /* Try to use target because not using it can lead to extra copies
6133 and when we are using all of the registers extra copies leads
6134 to extra spills. */
6135 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6136 ops[0] = target;
6137 else
6138 target = ops[0] = gen_reg_rtx (tmode);
6139
6140 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6141 abort ();
6142
6143 i++;
6144 }
6145
a76866d3 6146 if (d->fcode == SPU_MASK_FOR_LOAD)
6147 {
6148 enum machine_mode mode = insn_data[icode].operand[1].mode;
6149 tree arg;
6150 rtx addr, op, pat;
6151
6152 /* get addr */
5df189be 6153 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 6154 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6155 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6156 addr = memory_address (mode, op);
6157
6158 /* negate addr */
6159 op = gen_reg_rtx (GET_MODE (addr));
6160 emit_insn (gen_rtx_SET (VOIDmode, op,
6161 gen_rtx_NEG (GET_MODE (addr), addr)));
6162 op = gen_rtx_MEM (mode, op);
6163
6164 pat = GEN_FCN (icode) (target, op);
6165 if (!pat)
6166 return 0;
6167 emit_insn (pat);
6168 return target;
6169 }
6170
6352eedf 6171 /* Ignore align_hint, but still expand it's args in case they have
6172 side effects. */
6173 if (icode == CODE_FOR_spu_align_hint)
6174 return 0;
6175
6176 /* Handle the rest of the operands. */
70ca06f8 6177 for (p = 1; i < n_operands; i++, p++)
6352eedf 6178 {
6179 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6180 mode = insn_data[d->icode].operand[i].mode;
6181 else
6182 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6183
6184 /* mode can be VOIDmode here for labels */
6185
6186 /* For specific intrinsics with an immediate operand, e.g.,
6187 si_ai(), we sometimes need to convert the scalar argument to a
6188 vector argument by splatting the scalar. */
6189 if (VECTOR_MODE_P (mode)
6190 && (GET_CODE (ops[i]) == CONST_INT
6191 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6192 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6193 {
6194 if (GET_CODE (ops[i]) == CONST_INT)
6195 ops[i] = spu_const (mode, INTVAL (ops[i]));
6196 else
6197 {
6198 rtx reg = gen_reg_rtx (mode);
6199 enum machine_mode imode = GET_MODE_INNER (mode);
6200 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6201 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6202 if (imode != GET_MODE (ops[i]))
6203 ops[i] = convert_to_mode (imode, ops[i],
6204 TYPE_UNSIGNED (spu_builtin_types
6205 [d->parm[i]]));
6206 emit_insn (gen_spu_splats (reg, ops[i]));
6207 ops[i] = reg;
6208 }
6209 }
6210
5df189be 6211 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6212
6352eedf 6213 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6214 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6215 }
6216
70ca06f8 6217 switch (n_operands)
6352eedf 6218 {
6219 case 0:
6220 pat = GEN_FCN (icode) (0);
6221 break;
6222 case 1:
6223 pat = GEN_FCN (icode) (ops[0]);
6224 break;
6225 case 2:
6226 pat = GEN_FCN (icode) (ops[0], ops[1]);
6227 break;
6228 case 3:
6229 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6230 break;
6231 case 4:
6232 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6233 break;
6234 case 5:
6235 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6236 break;
6237 case 6:
6238 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6239 break;
6240 default:
6241 abort ();
6242 }
6243
6244 if (!pat)
6245 abort ();
6246
6247 if (d->type == B_CALL || d->type == B_BISLED)
6248 emit_call_insn (pat);
6249 else if (d->type == B_JUMP)
6250 {
6251 emit_jump_insn (pat);
6252 emit_barrier ();
6253 }
6254 else
6255 emit_insn (pat);
6256
6257 return_type = spu_builtin_types[d->parm[0]];
6258 if (d->parm[0] != SPU_BTI_VOID
6259 && GET_MODE (target) != TYPE_MODE (return_type))
6260 {
6261 /* target is the return value. It should always be the mode of
6262 the builtin function prototype. */
6263 target = spu_force_reg (TYPE_MODE (return_type), target);
6264 }
6265
6266 return target;
6267}
6268
6269rtx
6270spu_expand_builtin (tree exp,
6271 rtx target,
6272 rtx subtarget ATTRIBUTE_UNUSED,
6273 enum machine_mode mode ATTRIBUTE_UNUSED,
6274 int ignore ATTRIBUTE_UNUSED)
6275{
5df189be 6276 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6277 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6278 struct spu_builtin_description *d;
6279
6280 if (fcode < NUM_SPU_BUILTINS)
6281 {
6282 d = &spu_builtins[fcode];
6283
5df189be 6284 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6285 }
6286 abort ();
6287}
6288
e99f512d 6289/* Implement targetm.vectorize.builtin_mul_widen_even. */
6290static tree
6291spu_builtin_mul_widen_even (tree type)
6292{
e99f512d 6293 switch (TYPE_MODE (type))
6294 {
6295 case V8HImode:
6296 if (TYPE_UNSIGNED (type))
6297 return spu_builtins[SPU_MULE_0].fndecl;
6298 else
6299 return spu_builtins[SPU_MULE_1].fndecl;
6300 break;
6301 default:
6302 return NULL_TREE;
6303 }
6304}
6305
6306/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6307static tree
6308spu_builtin_mul_widen_odd (tree type)
6309{
6310 switch (TYPE_MODE (type))
6311 {
6312 case V8HImode:
6313 if (TYPE_UNSIGNED (type))
6314 return spu_builtins[SPU_MULO_1].fndecl;
6315 else
6316 return spu_builtins[SPU_MULO_0].fndecl;
6317 break;
6318 default:
6319 return NULL_TREE;
6320 }
6321}
6322
a76866d3 6323/* Implement targetm.vectorize.builtin_mask_for_load. */
6324static tree
6325spu_builtin_mask_for_load (void)
6326{
6327 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6328 gcc_assert (d);
6329 return d->fndecl;
6330}
5df189be 6331
a28df51d 6332/* Implement targetm.vectorize.builtin_vectorization_cost. */
6333static int
6334spu_builtin_vectorization_cost (bool runtime_test)
6335{
6336 /* If the branch of the runtime test is taken - i.e. - the vectorized
6337 version is skipped - this incurs a misprediction cost (because the
6338 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6339 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6340 when the vectorized version is executed. */
6341 if (runtime_test)
6342 return -19;
6343 else
6344 return 0;
6345}
6346
0e87db76 6347/* Return true iff, data reference of TYPE can reach vector alignment (16)
6348 after applying N number of iterations. This routine does not determine
6349 how may iterations are required to reach desired alignment. */
6350
6351static bool
a9f1838b 6352spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6353{
6354 if (is_packed)
6355 return false;
6356
6357 /* All other types are naturally aligned. */
6358 return true;
6359}
6360
a0515226 6361/* Implement targetm.vectorize.builtin_vec_perm. */
6362tree
6363spu_builtin_vec_perm (tree type, tree *mask_element_type)
6364{
6365 struct spu_builtin_description *d;
6366
6367 *mask_element_type = unsigned_char_type_node;
6368
6369 switch (TYPE_MODE (type))
6370 {
6371 case V16QImode:
6372 if (TYPE_UNSIGNED (type))
6373 d = &spu_builtins[SPU_SHUFFLE_0];
6374 else
6375 d = &spu_builtins[SPU_SHUFFLE_1];
6376 break;
6377
6378 case V8HImode:
6379 if (TYPE_UNSIGNED (type))
6380 d = &spu_builtins[SPU_SHUFFLE_2];
6381 else
6382 d = &spu_builtins[SPU_SHUFFLE_3];
6383 break;
6384
6385 case V4SImode:
6386 if (TYPE_UNSIGNED (type))
6387 d = &spu_builtins[SPU_SHUFFLE_4];
6388 else
6389 d = &spu_builtins[SPU_SHUFFLE_5];
6390 break;
6391
6392 case V2DImode:
6393 if (TYPE_UNSIGNED (type))
6394 d = &spu_builtins[SPU_SHUFFLE_6];
6395 else
6396 d = &spu_builtins[SPU_SHUFFLE_7];
6397 break;
6398
6399 case V4SFmode:
6400 d = &spu_builtins[SPU_SHUFFLE_8];
6401 break;
6402
6403 case V2DFmode:
6404 d = &spu_builtins[SPU_SHUFFLE_9];
6405 break;
6406
6407 default:
6408 return NULL_TREE;
6409 }
6410
6411 gcc_assert (d);
6412 return d->fndecl;
6413}
6414
d52fd16a 6415/* Count the total number of instructions in each pipe and return the
6416 maximum, which is used as the Minimum Iteration Interval (MII)
6417 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6418 -2 are instructions that can go in pipe0 or pipe1. */
6419static int
6420spu_sms_res_mii (struct ddg *g)
6421{
6422 int i;
6423 unsigned t[4] = {0, 0, 0, 0};
6424
6425 for (i = 0; i < g->num_nodes; i++)
6426 {
6427 rtx insn = g->nodes[i].insn;
6428 int p = get_pipe (insn) + 2;
6429
6430 assert (p >= 0);
6431 assert (p < 4);
6432
6433 t[p]++;
6434 if (dump_file && INSN_P (insn))
6435 fprintf (dump_file, "i%d %s %d %d\n",
6436 INSN_UID (insn),
6437 insn_data[INSN_CODE(insn)].name,
6438 p, t[p]);
6439 }
6440 if (dump_file)
6441 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6442
6443 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6444}
6445
6446
5df189be 6447void
6448spu_init_expanders (void)
9d98604b 6449{
5df189be 6450 if (cfun)
9d98604b 6451 {
6452 rtx r0, r1;
6453 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6454 frame_pointer_needed is true. We don't know that until we're
6455 expanding the prologue. */
6456 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6457
6458 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6459 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6460 to be treated as aligned, so generate them here. */
6461 r0 = gen_reg_rtx (SImode);
6462 r1 = gen_reg_rtx (SImode);
6463 mark_reg_pointer (r0, 128);
6464 mark_reg_pointer (r1, 128);
6465 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6466 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6467 }
ea32e033 6468}
6469
6470static enum machine_mode
6471spu_libgcc_cmp_return_mode (void)
6472{
6473
6474/* For SPU word mode is TI mode so it is better to use SImode
6475 for compare returns. */
6476 return SImode;
6477}
6478
6479static enum machine_mode
6480spu_libgcc_shift_count_mode (void)
6481{
6482/* For SPU word mode is TI mode so it is better to use SImode
6483 for shift counts. */
6484 return SImode;
6485}
5a976006 6486
6487/* An early place to adjust some flags after GCC has finished processing
6488 * them. */
6489static void
6490asm_file_start (void)
6491{
6492 /* Variable tracking should be run after all optimizations which
6493 change order of insns. It also needs a valid CFG. */
6494 spu_flag_var_tracking = flag_var_tracking;
6495 flag_var_tracking = 0;
6496
6497 default_file_start ();
6498}
6499
a08dfd55 6500/* Implement targetm.section_type_flags. */
6501static unsigned int
6502spu_section_type_flags (tree decl, const char *name, int reloc)
6503{
6504 /* .toe needs to have type @nobits. */
6505 if (strcmp (name, ".toe") == 0)
6506 return SECTION_BSS;
6507 return default_section_type_flags (decl, name, reloc);
6508}
c2233b46 6509
56c7bfc2 6510/* Generate a constant or register which contains 2^SCALE. We assume
6511 the result is valid for MODE. Currently, MODE must be V4SFmode and
6512 SCALE must be SImode. */
6513rtx
6514spu_gen_exp2 (enum machine_mode mode, rtx scale)
6515{
6516 gcc_assert (mode == V4SFmode);
6517 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6518 if (GET_CODE (scale) != CONST_INT)
6519 {
6520 /* unsigned int exp = (127 + scale) << 23;
6521 __vector float m = (__vector float) spu_splats (exp); */
6522 rtx reg = force_reg (SImode, scale);
6523 rtx exp = gen_reg_rtx (SImode);
6524 rtx mul = gen_reg_rtx (mode);
6525 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6526 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6527 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6528 return mul;
6529 }
6530 else
6531 {
6532 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6533 unsigned char arr[16];
6534 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6535 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6536 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6537 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6538 return array_to_constant (mode, arr);
6539 }
6540}
6541
9d98604b 6542/* After reload, just change the convert into a move instruction
6543 or a dead instruction. */
6544void
6545spu_split_convert (rtx ops[])
6546{
6547 if (REGNO (ops[0]) == REGNO (ops[1]))
6548 emit_note (NOTE_INSN_DELETED);
6549 else
6550 {
6551 /* Use TImode always as this might help hard reg copyprop. */
6552 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6553 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6554 emit_insn (gen_move_insn (op0, op1));
6555 }
6556}
6557
c2233b46 6558#include "gt-spu.h"