]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
target-def.h (TARGET_HAVE_NAMED_SECTIONS): Move to common/common-target-def.h.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
3
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3 of the License, or (at your option)
7 any later version.
8
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
17
18 #include "config.h"
19 #include "system.h"
20 #include "coretypes.h"
21 #include "tm.h"
22 #include "rtl.h"
23 #include "regs.h"
24 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "diagnostic-core.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "machmode.h"
51 #include "gimple.h"
52 #include "tm-constrs.h"
53 #include "ddg.h"
54 #include "sbitmap.h"
55 #include "timevar.h"
56 #include "df.h"
57
58 /* Builtin types, data and prototypes. */
59
60 enum spu_builtin_type_index
61 {
62 SPU_BTI_END_OF_PARAMS,
63
64 /* We create new type nodes for these. */
65 SPU_BTI_V16QI,
66 SPU_BTI_V8HI,
67 SPU_BTI_V4SI,
68 SPU_BTI_V2DI,
69 SPU_BTI_V4SF,
70 SPU_BTI_V2DF,
71 SPU_BTI_UV16QI,
72 SPU_BTI_UV8HI,
73 SPU_BTI_UV4SI,
74 SPU_BTI_UV2DI,
75
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
77 SPU_BTI_QUADWORD,
78
79 /* These all correspond to intSI_type_node */
80 SPU_BTI_7,
81 SPU_BTI_S7,
82 SPU_BTI_U7,
83 SPU_BTI_S10,
84 SPU_BTI_S10_4,
85 SPU_BTI_U14,
86 SPU_BTI_16,
87 SPU_BTI_S16,
88 SPU_BTI_S16_2,
89 SPU_BTI_U16,
90 SPU_BTI_U16_2,
91 SPU_BTI_U18,
92
93 /* These correspond to the standard types */
94 SPU_BTI_INTQI,
95 SPU_BTI_INTHI,
96 SPU_BTI_INTSI,
97 SPU_BTI_INTDI,
98
99 SPU_BTI_UINTQI,
100 SPU_BTI_UINTHI,
101 SPU_BTI_UINTSI,
102 SPU_BTI_UINTDI,
103
104 SPU_BTI_FLOAT,
105 SPU_BTI_DOUBLE,
106
107 SPU_BTI_VOID,
108 SPU_BTI_PTR,
109
110 SPU_BTI_MAX
111 };
112
113 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
123
124 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
125
126 struct spu_builtin_range
127 {
128 int low, high;
129 };
130
131 static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
144 };
145
146 \f
147 /* Target specific attribute specifications. */
148 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
149
150 /* Prototypes and external defs. */
151 static void spu_option_override (void);
152 static void spu_option_default_params (void);
153 static void spu_init_builtins (void);
154 static tree spu_builtin_decl (unsigned, bool);
155 static bool spu_scalar_mode_supported_p (enum machine_mode mode);
156 static bool spu_vector_mode_supported_p (enum machine_mode mode);
157 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
158 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 bool, addr_space_t);
160 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
161 static rtx get_pic_reg (void);
162 static int need_to_save_reg (int regno, int saving);
163 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
164 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
165 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 rtx scratch);
167 static void emit_nop_for_insn (rtx insn);
168 static bool insn_clobbers_hbr (rtx insn);
169 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
170 int distance, sbitmap blocks);
171 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
172 enum machine_mode dmode);
173 static rtx get_branch_target (rtx branch);
174 static void spu_machine_dependent_reorg (void);
175 static int spu_sched_issue_rate (void);
176 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 int can_issue_more);
178 static int get_pipe (rtx insn);
179 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
180 static void spu_sched_init_global (FILE *, int, int);
181 static void spu_sched_init (FILE *, int, int);
182 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
183 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 int flags,
185 bool *no_add_attrs);
186 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 int flags,
188 bool *no_add_attrs);
189 static int spu_naked_function_p (tree func);
190 static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
191 const_tree type, bool named);
192 static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194 static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
196 static tree spu_build_builtin_va_list (void);
197 static void spu_va_start (tree, rtx);
198 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
199 gimple_seq * pre_p, gimple_seq * post_p);
200 static int store_with_one_insn_p (rtx mem);
201 static int mem_is_padded_component_ref (rtx x);
202 static int reg_aligned_for_addr (rtx x);
203 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
204 static void spu_asm_globalize_label (FILE * file, const char *name);
205 static bool spu_rtx_costs (rtx x, int code, int outer_code,
206 int *total, bool speed);
207 static bool spu_function_ok_for_sibcall (tree decl, tree exp);
208 static void spu_init_libfuncs (void);
209 static bool spu_return_in_memory (const_tree type, const_tree fntype);
210 static void fix_range (const char *);
211 static void spu_encode_section_info (tree, rtx, int);
212 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
213 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
214 addr_space_t);
215 static tree spu_builtin_mul_widen_even (tree);
216 static tree spu_builtin_mul_widen_odd (tree);
217 static tree spu_builtin_mask_for_load (void);
218 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
219 static bool spu_vector_alignment_reachable (const_tree, bool);
220 static tree spu_builtin_vec_perm (tree, tree *);
221 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224 static rtx spu_addr_space_convert (rtx, tree, tree);
225 static int spu_sms_res_mii (struct ddg *g);
226 static void asm_file_start (void);
227 static unsigned int spu_section_type_flags (tree, const char *, int);
228 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229 static void spu_unique_section (tree, int);
230 static rtx spu_expand_load (rtx, rtx, rtx, int);
231 static void spu_trampoline_init (rtx, tree, rtx);
232 static void spu_conditional_register_usage (void);
233 static bool spu_ref_may_alias_errno (ao_ref *);
234 static void spu_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
235 HOST_WIDE_INT, tree);
236
237 /* Which instruction set architecture to use. */
238 int spu_arch;
239 /* Which cpu are we tuning for. */
240 int spu_tune;
241
242 /* The hardware requires 8 insns between a hint and the branch it
243 effects. This variable describes how many rtl instructions the
244 compiler needs to see before inserting a hint, and then the compiler
245 will insert enough nops to make it at least 8 insns. The default is
246 for the compiler to allow up to 2 nops be emitted. The nops are
247 inserted in pairs, so we round down. */
248 int spu_hint_dist = (8*4) - (2*4);
249
250 enum spu_immediate {
251 SPU_NONE,
252 SPU_IL,
253 SPU_ILA,
254 SPU_ILH,
255 SPU_ILHU,
256 SPU_ORI,
257 SPU_ORHI,
258 SPU_ORBI,
259 SPU_IOHL
260 };
261 enum immediate_class
262 {
263 IC_POOL, /* constant pool */
264 IC_IL1, /* one il* instruction */
265 IC_IL2, /* both ilhu and iohl instructions */
266 IC_IL1s, /* one il* instruction */
267 IC_IL2s, /* both ilhu and iohl instructions */
268 IC_FSMBI, /* the fsmbi instruction */
269 IC_CPAT, /* one of the c*d instructions */
270 IC_FSMBI2 /* fsmbi plus 1 other instruction */
271 };
272
273 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
274 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
275 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
276 static enum immediate_class classify_immediate (rtx op,
277 enum machine_mode mode);
278
279 static enum machine_mode spu_unwind_word_mode (void);
280
281 static enum machine_mode
282 spu_libgcc_cmp_return_mode (void);
283
284 static enum machine_mode
285 spu_libgcc_shift_count_mode (void);
286
287 /* Pointer mode for __ea references. */
288 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
289
290 \f
291 /* Table of machine attributes. */
292 static const struct attribute_spec spu_attribute_table[] =
293 {
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
297 false },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
299 false },
300 { NULL, 0, 0, false, false, false, NULL, false }
301 };
302 \f
303 /* TARGET overrides. */
304
305 #undef TARGET_ADDR_SPACE_POINTER_MODE
306 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
307
308 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
309 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
310
311 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
312 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
313 spu_addr_space_legitimate_address_p
314
315 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
316 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
317
318 #undef TARGET_ADDR_SPACE_SUBSET_P
319 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
320
321 #undef TARGET_ADDR_SPACE_CONVERT
322 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
323
324 #undef TARGET_INIT_BUILTINS
325 #define TARGET_INIT_BUILTINS spu_init_builtins
326 #undef TARGET_BUILTIN_DECL
327 #define TARGET_BUILTIN_DECL spu_builtin_decl
328
329 #undef TARGET_EXPAND_BUILTIN
330 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
331
332 #undef TARGET_UNWIND_WORD_MODE
333 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
334
335 #undef TARGET_LEGITIMIZE_ADDRESS
336 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
337
338 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
339 and .quad for the debugger. When it is known that the assembler is fixed,
340 these can be removed. */
341 #undef TARGET_ASM_UNALIGNED_SI_OP
342 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
343
344 #undef TARGET_ASM_ALIGNED_DI_OP
345 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
346
347 /* The .8byte directive doesn't seem to work well for a 32 bit
348 architecture. */
349 #undef TARGET_ASM_UNALIGNED_DI_OP
350 #define TARGET_ASM_UNALIGNED_DI_OP NULL
351
352 #undef TARGET_RTX_COSTS
353 #define TARGET_RTX_COSTS spu_rtx_costs
354
355 #undef TARGET_ADDRESS_COST
356 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
357
358 #undef TARGET_SCHED_ISSUE_RATE
359 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
360
361 #undef TARGET_SCHED_INIT_GLOBAL
362 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
363
364 #undef TARGET_SCHED_INIT
365 #define TARGET_SCHED_INIT spu_sched_init
366
367 #undef TARGET_SCHED_VARIABLE_ISSUE
368 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
369
370 #undef TARGET_SCHED_REORDER
371 #define TARGET_SCHED_REORDER spu_sched_reorder
372
373 #undef TARGET_SCHED_REORDER2
374 #define TARGET_SCHED_REORDER2 spu_sched_reorder
375
376 #undef TARGET_SCHED_ADJUST_COST
377 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
378
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
381
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER spu_assemble_integer
384
385 #undef TARGET_SCALAR_MODE_SUPPORTED_P
386 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
387
388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
389 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
390
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
393
394 #undef TARGET_ASM_GLOBALIZE_LABEL
395 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
396
397 #undef TARGET_PASS_BY_REFERENCE
398 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
399
400 #undef TARGET_FUNCTION_ARG
401 #define TARGET_FUNCTION_ARG spu_function_arg
402
403 #undef TARGET_FUNCTION_ARG_ADVANCE
404 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
405
406 #undef TARGET_MUST_PASS_IN_STACK
407 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
408
409 #undef TARGET_BUILD_BUILTIN_VA_LIST
410 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
411
412 #undef TARGET_EXPAND_BUILTIN_VA_START
413 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
414
415 #undef TARGET_SETUP_INCOMING_VARARGS
416 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
417
418 #undef TARGET_MACHINE_DEPENDENT_REORG
419 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
420
421 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
422 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
423
424 #undef TARGET_INIT_LIBFUNCS
425 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
426
427 #undef TARGET_RETURN_IN_MEMORY
428 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
429
430 #undef TARGET_ENCODE_SECTION_INFO
431 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
432
433 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
434 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
435
436 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
437 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
438
439 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
440 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
441
442 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
443 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
444
445 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
446 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
447
448 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
449 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
450
451 #undef TARGET_LIBGCC_CMP_RETURN_MODE
452 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
453
454 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
455 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
456
457 #undef TARGET_SCHED_SMS_RES_MII
458 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
459
460 #undef TARGET_ASM_FILE_START
461 #define TARGET_ASM_FILE_START asm_file_start
462
463 #undef TARGET_SECTION_TYPE_FLAGS
464 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
465
466 #undef TARGET_ASM_SELECT_SECTION
467 #define TARGET_ASM_SELECT_SECTION spu_select_section
468
469 #undef TARGET_ASM_UNIQUE_SECTION
470 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
471
472 #undef TARGET_LEGITIMATE_ADDRESS_P
473 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
474
475 #undef TARGET_LEGITIMATE_CONSTANT_P
476 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
477
478 #undef TARGET_TRAMPOLINE_INIT
479 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
480
481 #undef TARGET_OPTION_OVERRIDE
482 #define TARGET_OPTION_OVERRIDE spu_option_override
483
484 #undef TARGET_OPTION_DEFAULT_PARAMS
485 #define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
486
487 #undef TARGET_CONDITIONAL_REGISTER_USAGE
488 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
489
490 #undef TARGET_REF_MAY_ALIAS_ERRNO
491 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
492
493 #undef TARGET_ASM_OUTPUT_MI_THUNK
494 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
497
498 /* Variable tracking should be run after all optimizations which
499 change order of insns. It also needs a valid CFG. */
500 #undef TARGET_DELAY_VARTRACK
501 #define TARGET_DELAY_VARTRACK true
502
503 struct gcc_target targetm = TARGET_INITIALIZER;
504
505 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
506 static void
507 spu_option_default_params (void)
508 {
509 /* Override some of the default param values. With so many registers
510 larger values are better for these params. */
511 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
512 }
513
514 /* Implement TARGET_OPTION_OVERRIDE. */
515 static void
516 spu_option_override (void)
517 {
518 /* Small loops will be unpeeled at -O3. For SPU it is more important
519 to keep code small by default. */
520 if (!flag_unroll_loops && !flag_peel_loops)
521 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
522 global_options.x_param_values,
523 global_options_set.x_param_values);
524
525 flag_omit_frame_pointer = 1;
526
527 /* Functions must be 8 byte aligned so we correctly handle dual issue */
528 if (align_functions < 8)
529 align_functions = 8;
530
531 spu_hint_dist = 8*4 - spu_max_nops*4;
532 if (spu_hint_dist < 0)
533 spu_hint_dist = 0;
534
535 if (spu_fixed_range_string)
536 fix_range (spu_fixed_range_string);
537
538 /* Determine processor architectural level. */
539 if (spu_arch_string)
540 {
541 if (strcmp (&spu_arch_string[0], "cell") == 0)
542 spu_arch = PROCESSOR_CELL;
543 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
544 spu_arch = PROCESSOR_CELLEDP;
545 else
546 error ("bad value (%s) for -march= switch", spu_arch_string);
547 }
548
549 /* Determine processor to tune for. */
550 if (spu_tune_string)
551 {
552 if (strcmp (&spu_tune_string[0], "cell") == 0)
553 spu_tune = PROCESSOR_CELL;
554 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
555 spu_tune = PROCESSOR_CELLEDP;
556 else
557 error ("bad value (%s) for -mtune= switch", spu_tune_string);
558 }
559
560 /* Change defaults according to the processor architecture. */
561 if (spu_arch == PROCESSOR_CELLEDP)
562 {
563 /* If no command line option has been otherwise specified, change
564 the default to -mno-safe-hints on celledp -- only the original
565 Cell/B.E. processors require this workaround. */
566 if (!(target_flags_explicit & MASK_SAFE_HINTS))
567 target_flags &= ~MASK_SAFE_HINTS;
568 }
569
570 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
571 }
572 \f
573 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
574 struct attribute_spec.handler. */
575
576 /* True if MODE is valid for the target. By "valid", we mean able to
577 be manipulated in non-trivial ways. In particular, this means all
578 the arithmetic is supported. */
579 static bool
580 spu_scalar_mode_supported_p (enum machine_mode mode)
581 {
582 switch (mode)
583 {
584 case QImode:
585 case HImode:
586 case SImode:
587 case SFmode:
588 case DImode:
589 case TImode:
590 case DFmode:
591 return true;
592
593 default:
594 return false;
595 }
596 }
597
598 /* Similarly for vector modes. "Supported" here is less strict. At
599 least some operations are supported; need to check optabs or builtins
600 for further details. */
601 static bool
602 spu_vector_mode_supported_p (enum machine_mode mode)
603 {
604 switch (mode)
605 {
606 case V16QImode:
607 case V8HImode:
608 case V4SImode:
609 case V2DImode:
610 case V4SFmode:
611 case V2DFmode:
612 return true;
613
614 default:
615 return false;
616 }
617 }
618
619 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
620 least significant bytes of the outer mode. This function returns
621 TRUE for the SUBREG's where this is correct. */
622 int
623 valid_subreg (rtx op)
624 {
625 enum machine_mode om = GET_MODE (op);
626 enum machine_mode im = GET_MODE (SUBREG_REG (op));
627 return om != VOIDmode && im != VOIDmode
628 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
629 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
630 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
631 }
632
633 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
634 and adjust the start offset. */
635 static rtx
636 adjust_operand (rtx op, HOST_WIDE_INT * start)
637 {
638 enum machine_mode mode;
639 int op_size;
640 /* Strip any paradoxical SUBREG. */
641 if (GET_CODE (op) == SUBREG
642 && (GET_MODE_BITSIZE (GET_MODE (op))
643 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644 {
645 if (start)
646 *start -=
647 GET_MODE_BITSIZE (GET_MODE (op)) -
648 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
649 op = SUBREG_REG (op);
650 }
651 /* If it is smaller than SI, assure a SUBREG */
652 op_size = GET_MODE_BITSIZE (GET_MODE (op));
653 if (op_size < 32)
654 {
655 if (start)
656 *start += 32 - op_size;
657 op_size = 32;
658 }
659 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
660 mode = mode_for_size (op_size, MODE_INT, 0);
661 if (mode != GET_MODE (op))
662 op = gen_rtx_SUBREG (mode, op, 0);
663 return op;
664 }
665
666 void
667 spu_expand_extv (rtx ops[], int unsignedp)
668 {
669 rtx dst = ops[0], src = ops[1];
670 HOST_WIDE_INT width = INTVAL (ops[2]);
671 HOST_WIDE_INT start = INTVAL (ops[3]);
672 HOST_WIDE_INT align_mask;
673 rtx s0, s1, mask, r0;
674
675 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
676
677 if (MEM_P (src))
678 {
679 /* First, determine if we need 1 TImode load or 2. We need only 1
680 if the bits being extracted do not cross the alignment boundary
681 as determined by the MEM and its address. */
682
683 align_mask = -MEM_ALIGN (src);
684 if ((start & align_mask) == ((start + width - 1) & align_mask))
685 {
686 /* Alignment is sufficient for 1 load. */
687 s0 = gen_reg_rtx (TImode);
688 r0 = spu_expand_load (s0, 0, src, start / 8);
689 start &= 7;
690 if (r0)
691 emit_insn (gen_rotqby_ti (s0, s0, r0));
692 }
693 else
694 {
695 /* Need 2 loads. */
696 s0 = gen_reg_rtx (TImode);
697 s1 = gen_reg_rtx (TImode);
698 r0 = spu_expand_load (s0, s1, src, start / 8);
699 start &= 7;
700
701 gcc_assert (start + width <= 128);
702 if (r0)
703 {
704 rtx r1 = gen_reg_rtx (SImode);
705 mask = gen_reg_rtx (TImode);
706 emit_move_insn (mask, GEN_INT (-1));
707 emit_insn (gen_rotqby_ti (s0, s0, r0));
708 emit_insn (gen_rotqby_ti (s1, s1, r0));
709 if (GET_CODE (r0) == CONST_INT)
710 r1 = GEN_INT (INTVAL (r0) & 15);
711 else
712 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
713 emit_insn (gen_shlqby_ti (mask, mask, r1));
714 emit_insn (gen_selb (s0, s1, s0, mask));
715 }
716 }
717
718 }
719 else if (GET_CODE (src) == SUBREG)
720 {
721 rtx r = SUBREG_REG (src);
722 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
723 s0 = gen_reg_rtx (TImode);
724 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
725 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
726 else
727 emit_move_insn (s0, src);
728 }
729 else
730 {
731 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
732 s0 = gen_reg_rtx (TImode);
733 emit_move_insn (s0, src);
734 }
735
736 /* Now s0 is TImode and contains the bits to extract at start. */
737
738 if (start)
739 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
740
741 if (128 - width)
742 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
743
744 emit_move_insn (dst, s0);
745 }
746
747 void
748 spu_expand_insv (rtx ops[])
749 {
750 HOST_WIDE_INT width = INTVAL (ops[1]);
751 HOST_WIDE_INT start = INTVAL (ops[2]);
752 HOST_WIDE_INT maskbits;
753 enum machine_mode dst_mode;
754 rtx dst = ops[0], src = ops[3];
755 int dst_size;
756 rtx mask;
757 rtx shift_reg;
758 int shift;
759
760
761 if (GET_CODE (ops[0]) == MEM)
762 dst = gen_reg_rtx (TImode);
763 else
764 dst = adjust_operand (dst, &start);
765 dst_mode = GET_MODE (dst);
766 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
767
768 if (CONSTANT_P (src))
769 {
770 enum machine_mode m =
771 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
772 src = force_reg (m, convert_to_mode (m, src, 0));
773 }
774 src = adjust_operand (src, 0);
775
776 mask = gen_reg_rtx (dst_mode);
777 shift_reg = gen_reg_rtx (dst_mode);
778 shift = dst_size - start - width;
779
780 /* It's not safe to use subreg here because the compiler assumes
781 that the SUBREG_REG is right justified in the SUBREG. */
782 convert_move (shift_reg, src, 1);
783
784 if (shift > 0)
785 {
786 switch (dst_mode)
787 {
788 case SImode:
789 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
790 break;
791 case DImode:
792 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
793 break;
794 case TImode:
795 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
796 break;
797 default:
798 abort ();
799 }
800 }
801 else if (shift < 0)
802 abort ();
803
804 switch (dst_size)
805 {
806 case 32:
807 maskbits = (-1ll << (32 - width - start));
808 if (start)
809 maskbits += (1ll << (32 - start));
810 emit_move_insn (mask, GEN_INT (maskbits));
811 break;
812 case 64:
813 maskbits = (-1ll << (64 - width - start));
814 if (start)
815 maskbits += (1ll << (64 - start));
816 emit_move_insn (mask, GEN_INT (maskbits));
817 break;
818 case 128:
819 {
820 unsigned char arr[16];
821 int i = start / 8;
822 memset (arr, 0, sizeof (arr));
823 arr[i] = 0xff >> (start & 7);
824 for (i++; i <= (start + width - 1) / 8; i++)
825 arr[i] = 0xff;
826 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
827 emit_move_insn (mask, array_to_constant (TImode, arr));
828 }
829 break;
830 default:
831 abort ();
832 }
833 if (GET_CODE (ops[0]) == MEM)
834 {
835 rtx low = gen_reg_rtx (SImode);
836 rtx rotl = gen_reg_rtx (SImode);
837 rtx mask0 = gen_reg_rtx (TImode);
838 rtx addr;
839 rtx addr0;
840 rtx addr1;
841 rtx mem;
842
843 addr = force_reg (Pmode, XEXP (ops[0], 0));
844 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
845 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
846 emit_insn (gen_negsi2 (rotl, low));
847 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
848 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
849 mem = change_address (ops[0], TImode, addr0);
850 set_mem_alias_set (mem, 0);
851 emit_move_insn (dst, mem);
852 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
853 if (start + width > MEM_ALIGN (ops[0]))
854 {
855 rtx shl = gen_reg_rtx (SImode);
856 rtx mask1 = gen_reg_rtx (TImode);
857 rtx dst1 = gen_reg_rtx (TImode);
858 rtx mem1;
859 addr1 = plus_constant (addr, 16);
860 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
861 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
862 emit_insn (gen_shlqby_ti (mask1, mask, shl));
863 mem1 = change_address (ops[0], TImode, addr1);
864 set_mem_alias_set (mem1, 0);
865 emit_move_insn (dst1, mem1);
866 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
867 emit_move_insn (mem1, dst1);
868 }
869 emit_move_insn (mem, dst);
870 }
871 else
872 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
873 }
874
875
876 int
877 spu_expand_block_move (rtx ops[])
878 {
879 HOST_WIDE_INT bytes, align, offset;
880 rtx src, dst, sreg, dreg, target;
881 int i;
882 if (GET_CODE (ops[2]) != CONST_INT
883 || GET_CODE (ops[3]) != CONST_INT
884 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
885 return 0;
886
887 bytes = INTVAL (ops[2]);
888 align = INTVAL (ops[3]);
889
890 if (bytes <= 0)
891 return 1;
892
893 dst = ops[0];
894 src = ops[1];
895
896 if (align == 16)
897 {
898 for (offset = 0; offset + 16 <= bytes; offset += 16)
899 {
900 dst = adjust_address (ops[0], V16QImode, offset);
901 src = adjust_address (ops[1], V16QImode, offset);
902 emit_move_insn (dst, src);
903 }
904 if (offset < bytes)
905 {
906 rtx mask;
907 unsigned char arr[16] = { 0 };
908 for (i = 0; i < bytes - offset; i++)
909 arr[i] = 0xff;
910 dst = adjust_address (ops[0], V16QImode, offset);
911 src = adjust_address (ops[1], V16QImode, offset);
912 mask = gen_reg_rtx (V16QImode);
913 sreg = gen_reg_rtx (V16QImode);
914 dreg = gen_reg_rtx (V16QImode);
915 target = gen_reg_rtx (V16QImode);
916 emit_move_insn (mask, array_to_constant (V16QImode, arr));
917 emit_move_insn (dreg, dst);
918 emit_move_insn (sreg, src);
919 emit_insn (gen_selb (target, dreg, sreg, mask));
920 emit_move_insn (dst, target);
921 }
922 return 1;
923 }
924 return 0;
925 }
926
927 enum spu_comp_code
928 { SPU_EQ, SPU_GT, SPU_GTU };
929
930 int spu_comp_icode[12][3] = {
931 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
932 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
933 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
934 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
935 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
936 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
937 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
938 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
939 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
940 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
941 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
942 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
943 };
944
945 /* Generate a compare for CODE. Return a brand-new rtx that represents
946 the result of the compare. GCC can figure this out too if we don't
947 provide all variations of compares, but GCC always wants to use
948 WORD_MODE, we can generate better code in most cases if we do it
949 ourselves. */
950 void
951 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
952 {
953 int reverse_compare = 0;
954 int reverse_test = 0;
955 rtx compare_result, eq_result;
956 rtx comp_rtx, eq_rtx;
957 enum machine_mode comp_mode;
958 enum machine_mode op_mode;
959 enum spu_comp_code scode, eq_code;
960 enum insn_code ior_code;
961 enum rtx_code code = GET_CODE (cmp);
962 rtx op0 = XEXP (cmp, 0);
963 rtx op1 = XEXP (cmp, 1);
964 int index;
965 int eq_test = 0;
966
967 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
968 and so on, to keep the constant in operand 1. */
969 if (GET_CODE (op1) == CONST_INT)
970 {
971 HOST_WIDE_INT val = INTVAL (op1) - 1;
972 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
973 switch (code)
974 {
975 case GE:
976 op1 = GEN_INT (val);
977 code = GT;
978 break;
979 case LT:
980 op1 = GEN_INT (val);
981 code = LE;
982 break;
983 case GEU:
984 op1 = GEN_INT (val);
985 code = GTU;
986 break;
987 case LTU:
988 op1 = GEN_INT (val);
989 code = LEU;
990 break;
991 default:
992 break;
993 }
994 }
995
996 comp_mode = SImode;
997 op_mode = GET_MODE (op0);
998
999 switch (code)
1000 {
1001 case GE:
1002 scode = SPU_GT;
1003 if (HONOR_NANS (op_mode))
1004 {
1005 reverse_compare = 0;
1006 reverse_test = 0;
1007 eq_test = 1;
1008 eq_code = SPU_EQ;
1009 }
1010 else
1011 {
1012 reverse_compare = 1;
1013 reverse_test = 1;
1014 }
1015 break;
1016 case LE:
1017 scode = SPU_GT;
1018 if (HONOR_NANS (op_mode))
1019 {
1020 reverse_compare = 1;
1021 reverse_test = 0;
1022 eq_test = 1;
1023 eq_code = SPU_EQ;
1024 }
1025 else
1026 {
1027 reverse_compare = 0;
1028 reverse_test = 1;
1029 }
1030 break;
1031 case LT:
1032 reverse_compare = 1;
1033 reverse_test = 0;
1034 scode = SPU_GT;
1035 break;
1036 case GEU:
1037 reverse_compare = 1;
1038 reverse_test = 1;
1039 scode = SPU_GTU;
1040 break;
1041 case LEU:
1042 reverse_compare = 0;
1043 reverse_test = 1;
1044 scode = SPU_GTU;
1045 break;
1046 case LTU:
1047 reverse_compare = 1;
1048 reverse_test = 0;
1049 scode = SPU_GTU;
1050 break;
1051 case NE:
1052 reverse_compare = 0;
1053 reverse_test = 1;
1054 scode = SPU_EQ;
1055 break;
1056
1057 case EQ:
1058 scode = SPU_EQ;
1059 break;
1060 case GT:
1061 scode = SPU_GT;
1062 break;
1063 case GTU:
1064 scode = SPU_GTU;
1065 break;
1066 default:
1067 scode = SPU_EQ;
1068 break;
1069 }
1070
1071 switch (op_mode)
1072 {
1073 case QImode:
1074 index = 0;
1075 comp_mode = QImode;
1076 break;
1077 case HImode:
1078 index = 1;
1079 comp_mode = HImode;
1080 break;
1081 case SImode:
1082 index = 2;
1083 break;
1084 case DImode:
1085 index = 3;
1086 break;
1087 case TImode:
1088 index = 4;
1089 break;
1090 case SFmode:
1091 index = 5;
1092 break;
1093 case DFmode:
1094 index = 6;
1095 break;
1096 case V16QImode:
1097 index = 7;
1098 comp_mode = op_mode;
1099 break;
1100 case V8HImode:
1101 index = 8;
1102 comp_mode = op_mode;
1103 break;
1104 case V4SImode:
1105 index = 9;
1106 comp_mode = op_mode;
1107 break;
1108 case V4SFmode:
1109 index = 10;
1110 comp_mode = V4SImode;
1111 break;
1112 case V2DFmode:
1113 index = 11;
1114 comp_mode = V2DImode;
1115 break;
1116 case V2DImode:
1117 default:
1118 abort ();
1119 }
1120
1121 if (GET_MODE (op1) == DFmode
1122 && (scode != SPU_GT && scode != SPU_EQ))
1123 abort ();
1124
1125 if (is_set == 0 && op1 == const0_rtx
1126 && (GET_MODE (op0) == SImode
1127 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1128 {
1129 /* Don't need to set a register with the result when we are
1130 comparing against zero and branching. */
1131 reverse_test = !reverse_test;
1132 compare_result = op0;
1133 }
1134 else
1135 {
1136 compare_result = gen_reg_rtx (comp_mode);
1137
1138 if (reverse_compare)
1139 {
1140 rtx t = op1;
1141 op1 = op0;
1142 op0 = t;
1143 }
1144
1145 if (spu_comp_icode[index][scode] == 0)
1146 abort ();
1147
1148 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1149 (op0, op_mode))
1150 op0 = force_reg (op_mode, op0);
1151 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1152 (op1, op_mode))
1153 op1 = force_reg (op_mode, op1);
1154 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1155 op0, op1);
1156 if (comp_rtx == 0)
1157 abort ();
1158 emit_insn (comp_rtx);
1159
1160 if (eq_test)
1161 {
1162 eq_result = gen_reg_rtx (comp_mode);
1163 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1164 op0, op1);
1165 if (eq_rtx == 0)
1166 abort ();
1167 emit_insn (eq_rtx);
1168 ior_code = optab_handler (ior_optab, comp_mode);
1169 gcc_assert (ior_code != CODE_FOR_nothing);
1170 emit_insn (GEN_FCN (ior_code)
1171 (compare_result, compare_result, eq_result));
1172 }
1173 }
1174
1175 if (is_set == 0)
1176 {
1177 rtx bcomp;
1178 rtx loc_ref;
1179
1180 /* We don't have branch on QI compare insns, so we convert the
1181 QI compare result to a HI result. */
1182 if (comp_mode == QImode)
1183 {
1184 rtx old_res = compare_result;
1185 compare_result = gen_reg_rtx (HImode);
1186 comp_mode = HImode;
1187 emit_insn (gen_extendqihi2 (compare_result, old_res));
1188 }
1189
1190 if (reverse_test)
1191 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1192 else
1193 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1194
1195 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1196 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1197 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1198 loc_ref, pc_rtx)));
1199 }
1200 else if (is_set == 2)
1201 {
1202 rtx target = operands[0];
1203 int compare_size = GET_MODE_BITSIZE (comp_mode);
1204 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1205 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1206 rtx select_mask;
1207 rtx op_t = operands[2];
1208 rtx op_f = operands[3];
1209
1210 /* The result of the comparison can be SI, HI or QI mode. Create a
1211 mask based on that result. */
1212 if (target_size > compare_size)
1213 {
1214 select_mask = gen_reg_rtx (mode);
1215 emit_insn (gen_extend_compare (select_mask, compare_result));
1216 }
1217 else if (target_size < compare_size)
1218 select_mask =
1219 gen_rtx_SUBREG (mode, compare_result,
1220 (compare_size - target_size) / BITS_PER_UNIT);
1221 else if (comp_mode != mode)
1222 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1223 else
1224 select_mask = compare_result;
1225
1226 if (GET_MODE (target) != GET_MODE (op_t)
1227 || GET_MODE (target) != GET_MODE (op_f))
1228 abort ();
1229
1230 if (reverse_test)
1231 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1232 else
1233 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1234 }
1235 else
1236 {
1237 rtx target = operands[0];
1238 if (reverse_test)
1239 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1240 gen_rtx_NOT (comp_mode, compare_result)));
1241 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1242 emit_insn (gen_extendhisi2 (target, compare_result));
1243 else if (GET_MODE (target) == SImode
1244 && GET_MODE (compare_result) == QImode)
1245 emit_insn (gen_extend_compare (target, compare_result));
1246 else
1247 emit_move_insn (target, compare_result);
1248 }
1249 }
1250
1251 HOST_WIDE_INT
1252 const_double_to_hwint (rtx x)
1253 {
1254 HOST_WIDE_INT val;
1255 REAL_VALUE_TYPE rv;
1256 if (GET_MODE (x) == SFmode)
1257 {
1258 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1259 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1260 }
1261 else if (GET_MODE (x) == DFmode)
1262 {
1263 long l[2];
1264 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1265 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1266 val = l[0];
1267 val = (val << 32) | (l[1] & 0xffffffff);
1268 }
1269 else
1270 abort ();
1271 return val;
1272 }
1273
1274 rtx
1275 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1276 {
1277 long tv[2];
1278 REAL_VALUE_TYPE rv;
1279 gcc_assert (mode == SFmode || mode == DFmode);
1280
1281 if (mode == SFmode)
1282 tv[0] = (v << 32) >> 32;
1283 else if (mode == DFmode)
1284 {
1285 tv[1] = (v << 32) >> 32;
1286 tv[0] = v >> 32;
1287 }
1288 real_from_target (&rv, tv, mode);
1289 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1290 }
1291
1292 void
1293 print_operand_address (FILE * file, register rtx addr)
1294 {
1295 rtx reg;
1296 rtx offset;
1297
1298 if (GET_CODE (addr) == AND
1299 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1300 && INTVAL (XEXP (addr, 1)) == -16)
1301 addr = XEXP (addr, 0);
1302
1303 switch (GET_CODE (addr))
1304 {
1305 case REG:
1306 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1307 break;
1308
1309 case PLUS:
1310 reg = XEXP (addr, 0);
1311 offset = XEXP (addr, 1);
1312 if (GET_CODE (offset) == REG)
1313 {
1314 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1315 reg_names[REGNO (offset)]);
1316 }
1317 else if (GET_CODE (offset) == CONST_INT)
1318 {
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1320 INTVAL (offset), reg_names[REGNO (reg)]);
1321 }
1322 else
1323 abort ();
1324 break;
1325
1326 case CONST:
1327 case LABEL_REF:
1328 case SYMBOL_REF:
1329 case CONST_INT:
1330 output_addr_const (file, addr);
1331 break;
1332
1333 default:
1334 debug_rtx (addr);
1335 abort ();
1336 }
1337 }
1338
1339 void
1340 print_operand (FILE * file, rtx x, int code)
1341 {
1342 enum machine_mode mode = GET_MODE (x);
1343 HOST_WIDE_INT val;
1344 unsigned char arr[16];
1345 int xcode = GET_CODE (x);
1346 int i, info;
1347 if (GET_MODE (x) == VOIDmode)
1348 switch (code)
1349 {
1350 case 'L': /* 128 bits, signed */
1351 case 'm': /* 128 bits, signed */
1352 case 'T': /* 128 bits, signed */
1353 case 't': /* 128 bits, signed */
1354 mode = TImode;
1355 break;
1356 case 'K': /* 64 bits, signed */
1357 case 'k': /* 64 bits, signed */
1358 case 'D': /* 64 bits, signed */
1359 case 'd': /* 64 bits, signed */
1360 mode = DImode;
1361 break;
1362 case 'J': /* 32 bits, signed */
1363 case 'j': /* 32 bits, signed */
1364 case 's': /* 32 bits, signed */
1365 case 'S': /* 32 bits, signed */
1366 mode = SImode;
1367 break;
1368 }
1369 switch (code)
1370 {
1371
1372 case 'j': /* 32 bits, signed */
1373 case 'k': /* 64 bits, signed */
1374 case 'm': /* 128 bits, signed */
1375 if (xcode == CONST_INT
1376 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1377 {
1378 gcc_assert (logical_immediate_p (x, mode));
1379 constant_to_array (mode, x, arr);
1380 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1381 val = trunc_int_for_mode (val, SImode);
1382 switch (which_logical_immediate (val))
1383 {
1384 case SPU_ORI:
1385 break;
1386 case SPU_ORHI:
1387 fprintf (file, "h");
1388 break;
1389 case SPU_ORBI:
1390 fprintf (file, "b");
1391 break;
1392 default:
1393 gcc_unreachable();
1394 }
1395 }
1396 else
1397 gcc_unreachable();
1398 return;
1399
1400 case 'J': /* 32 bits, signed */
1401 case 'K': /* 64 bits, signed */
1402 case 'L': /* 128 bits, signed */
1403 if (xcode == CONST_INT
1404 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1405 {
1406 gcc_assert (logical_immediate_p (x, mode)
1407 || iohl_immediate_p (x, mode));
1408 constant_to_array (mode, x, arr);
1409 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1410 val = trunc_int_for_mode (val, SImode);
1411 switch (which_logical_immediate (val))
1412 {
1413 case SPU_ORI:
1414 case SPU_IOHL:
1415 break;
1416 case SPU_ORHI:
1417 val = trunc_int_for_mode (val, HImode);
1418 break;
1419 case SPU_ORBI:
1420 val = trunc_int_for_mode (val, QImode);
1421 break;
1422 default:
1423 gcc_unreachable();
1424 }
1425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1426 }
1427 else
1428 gcc_unreachable();
1429 return;
1430
1431 case 't': /* 128 bits, signed */
1432 case 'd': /* 64 bits, signed */
1433 case 's': /* 32 bits, signed */
1434 if (CONSTANT_P (x))
1435 {
1436 enum immediate_class c = classify_immediate (x, mode);
1437 switch (c)
1438 {
1439 case IC_IL1:
1440 constant_to_array (mode, x, arr);
1441 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1442 val = trunc_int_for_mode (val, SImode);
1443 switch (which_immediate_load (val))
1444 {
1445 case SPU_IL:
1446 break;
1447 case SPU_ILA:
1448 fprintf (file, "a");
1449 break;
1450 case SPU_ILH:
1451 fprintf (file, "h");
1452 break;
1453 case SPU_ILHU:
1454 fprintf (file, "hu");
1455 break;
1456 default:
1457 gcc_unreachable ();
1458 }
1459 break;
1460 case IC_CPAT:
1461 constant_to_array (mode, x, arr);
1462 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1463 if (info == 1)
1464 fprintf (file, "b");
1465 else if (info == 2)
1466 fprintf (file, "h");
1467 else if (info == 4)
1468 fprintf (file, "w");
1469 else if (info == 8)
1470 fprintf (file, "d");
1471 break;
1472 case IC_IL1s:
1473 if (xcode == CONST_VECTOR)
1474 {
1475 x = CONST_VECTOR_ELT (x, 0);
1476 xcode = GET_CODE (x);
1477 }
1478 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1479 fprintf (file, "a");
1480 else if (xcode == HIGH)
1481 fprintf (file, "hu");
1482 break;
1483 case IC_FSMBI:
1484 case IC_FSMBI2:
1485 case IC_IL2:
1486 case IC_IL2s:
1487 case IC_POOL:
1488 abort ();
1489 }
1490 }
1491 else
1492 gcc_unreachable ();
1493 return;
1494
1495 case 'T': /* 128 bits, signed */
1496 case 'D': /* 64 bits, signed */
1497 case 'S': /* 32 bits, signed */
1498 if (CONSTANT_P (x))
1499 {
1500 enum immediate_class c = classify_immediate (x, mode);
1501 switch (c)
1502 {
1503 case IC_IL1:
1504 constant_to_array (mode, x, arr);
1505 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1506 val = trunc_int_for_mode (val, SImode);
1507 switch (which_immediate_load (val))
1508 {
1509 case SPU_IL:
1510 case SPU_ILA:
1511 break;
1512 case SPU_ILH:
1513 case SPU_ILHU:
1514 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1515 break;
1516 default:
1517 gcc_unreachable ();
1518 }
1519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1520 break;
1521 case IC_FSMBI:
1522 constant_to_array (mode, x, arr);
1523 val = 0;
1524 for (i = 0; i < 16; i++)
1525 {
1526 val <<= 1;
1527 val |= arr[i] & 1;
1528 }
1529 print_operand (file, GEN_INT (val), 0);
1530 break;
1531 case IC_CPAT:
1532 constant_to_array (mode, x, arr);
1533 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1535 break;
1536 case IC_IL1s:
1537 if (xcode == HIGH)
1538 x = XEXP (x, 0);
1539 if (GET_CODE (x) == CONST_VECTOR)
1540 x = CONST_VECTOR_ELT (x, 0);
1541 output_addr_const (file, x);
1542 if (xcode == HIGH)
1543 fprintf (file, "@h");
1544 break;
1545 case IC_IL2:
1546 case IC_IL2s:
1547 case IC_FSMBI2:
1548 case IC_POOL:
1549 abort ();
1550 }
1551 }
1552 else
1553 gcc_unreachable ();
1554 return;
1555
1556 case 'C':
1557 if (xcode == CONST_INT)
1558 {
1559 /* Only 4 least significant bits are relevant for generate
1560 control word instructions. */
1561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1562 return;
1563 }
1564 break;
1565
1566 case 'M': /* print code for c*d */
1567 if (GET_CODE (x) == CONST_INT)
1568 switch (INTVAL (x))
1569 {
1570 case 1:
1571 fprintf (file, "b");
1572 break;
1573 case 2:
1574 fprintf (file, "h");
1575 break;
1576 case 4:
1577 fprintf (file, "w");
1578 break;
1579 case 8:
1580 fprintf (file, "d");
1581 break;
1582 default:
1583 gcc_unreachable();
1584 }
1585 else
1586 gcc_unreachable();
1587 return;
1588
1589 case 'N': /* Negate the operand */
1590 if (xcode == CONST_INT)
1591 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1592 else if (xcode == CONST_VECTOR)
1593 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1594 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1595 return;
1596
1597 case 'I': /* enable/disable interrupts */
1598 if (xcode == CONST_INT)
1599 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1600 return;
1601
1602 case 'b': /* branch modifiers */
1603 if (xcode == REG)
1604 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1605 else if (COMPARISON_P (x))
1606 fprintf (file, "%s", xcode == NE ? "n" : "");
1607 return;
1608
1609 case 'i': /* indirect call */
1610 if (xcode == MEM)
1611 {
1612 if (GET_CODE (XEXP (x, 0)) == REG)
1613 /* Used in indirect function calls. */
1614 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1615 else
1616 output_address (XEXP (x, 0));
1617 }
1618 return;
1619
1620 case 'p': /* load/store */
1621 if (xcode == MEM)
1622 {
1623 x = XEXP (x, 0);
1624 xcode = GET_CODE (x);
1625 }
1626 if (xcode == AND)
1627 {
1628 x = XEXP (x, 0);
1629 xcode = GET_CODE (x);
1630 }
1631 if (xcode == REG)
1632 fprintf (file, "d");
1633 else if (xcode == CONST_INT)
1634 fprintf (file, "a");
1635 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1636 fprintf (file, "r");
1637 else if (xcode == PLUS || xcode == LO_SUM)
1638 {
1639 if (GET_CODE (XEXP (x, 1)) == REG)
1640 fprintf (file, "x");
1641 else
1642 fprintf (file, "d");
1643 }
1644 return;
1645
1646 case 'e':
1647 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1648 val &= 0x7;
1649 output_addr_const (file, GEN_INT (val));
1650 return;
1651
1652 case 'f':
1653 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1654 val &= 0x1f;
1655 output_addr_const (file, GEN_INT (val));
1656 return;
1657
1658 case 'g':
1659 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1660 val &= 0x3f;
1661 output_addr_const (file, GEN_INT (val));
1662 return;
1663
1664 case 'h':
1665 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1666 val = (val >> 3) & 0x1f;
1667 output_addr_const (file, GEN_INT (val));
1668 return;
1669
1670 case 'E':
1671 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1672 val = -val;
1673 val &= 0x7;
1674 output_addr_const (file, GEN_INT (val));
1675 return;
1676
1677 case 'F':
1678 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1679 val = -val;
1680 val &= 0x1f;
1681 output_addr_const (file, GEN_INT (val));
1682 return;
1683
1684 case 'G':
1685 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1686 val = -val;
1687 val &= 0x3f;
1688 output_addr_const (file, GEN_INT (val));
1689 return;
1690
1691 case 'H':
1692 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1693 val = -(val & -8ll);
1694 val = (val >> 3) & 0x1f;
1695 output_addr_const (file, GEN_INT (val));
1696 return;
1697
1698 case 'v':
1699 case 'w':
1700 constant_to_array (mode, x, arr);
1701 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1702 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1703 return;
1704
1705 case 0:
1706 if (xcode == REG)
1707 fprintf (file, "%s", reg_names[REGNO (x)]);
1708 else if (xcode == MEM)
1709 output_address (XEXP (x, 0));
1710 else if (xcode == CONST_VECTOR)
1711 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1712 else
1713 output_addr_const (file, x);
1714 return;
1715
1716 /* unused letters
1717 o qr u yz
1718 AB OPQR UVWXYZ */
1719 default:
1720 output_operand_lossage ("invalid %%xn code");
1721 }
1722 gcc_unreachable ();
1723 }
1724
1725 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1726 caller saved register. For leaf functions it is more efficient to
1727 use a volatile register because we won't need to save and restore the
1728 pic register. This routine is only valid after register allocation
1729 is completed, so we can pick an unused register. */
1730 static rtx
1731 get_pic_reg (void)
1732 {
1733 rtx pic_reg = pic_offset_table_rtx;
1734 if (!reload_completed && !reload_in_progress)
1735 abort ();
1736 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1737 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1738 return pic_reg;
1739 }
1740
1741 /* Split constant addresses to handle cases that are too large.
1742 Add in the pic register when in PIC mode.
1743 Split immediates that require more than 1 instruction. */
1744 int
1745 spu_split_immediate (rtx * ops)
1746 {
1747 enum machine_mode mode = GET_MODE (ops[0]);
1748 enum immediate_class c = classify_immediate (ops[1], mode);
1749
1750 switch (c)
1751 {
1752 case IC_IL2:
1753 {
1754 unsigned char arrhi[16];
1755 unsigned char arrlo[16];
1756 rtx to, temp, hi, lo;
1757 int i;
1758 enum machine_mode imode = mode;
1759 /* We need to do reals as ints because the constant used in the
1760 IOR might not be a legitimate real constant. */
1761 imode = int_mode_for_mode (mode);
1762 constant_to_array (mode, ops[1], arrhi);
1763 if (imode != mode)
1764 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1765 else
1766 to = ops[0];
1767 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1768 for (i = 0; i < 16; i += 4)
1769 {
1770 arrlo[i + 2] = arrhi[i + 2];
1771 arrlo[i + 3] = arrhi[i + 3];
1772 arrlo[i + 0] = arrlo[i + 1] = 0;
1773 arrhi[i + 2] = arrhi[i + 3] = 0;
1774 }
1775 hi = array_to_constant (imode, arrhi);
1776 lo = array_to_constant (imode, arrlo);
1777 emit_move_insn (temp, hi);
1778 emit_insn (gen_rtx_SET
1779 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1780 return 1;
1781 }
1782 case IC_FSMBI2:
1783 {
1784 unsigned char arr_fsmbi[16];
1785 unsigned char arr_andbi[16];
1786 rtx to, reg_fsmbi, reg_and;
1787 int i;
1788 enum machine_mode imode = mode;
1789 /* We need to do reals as ints because the constant used in the
1790 * AND might not be a legitimate real constant. */
1791 imode = int_mode_for_mode (mode);
1792 constant_to_array (mode, ops[1], arr_fsmbi);
1793 if (imode != mode)
1794 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1795 else
1796 to = ops[0];
1797 for (i = 0; i < 16; i++)
1798 if (arr_fsmbi[i] != 0)
1799 {
1800 arr_andbi[0] = arr_fsmbi[i];
1801 arr_fsmbi[i] = 0xff;
1802 }
1803 for (i = 1; i < 16; i++)
1804 arr_andbi[i] = arr_andbi[0];
1805 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1806 reg_and = array_to_constant (imode, arr_andbi);
1807 emit_move_insn (to, reg_fsmbi);
1808 emit_insn (gen_rtx_SET
1809 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1810 return 1;
1811 }
1812 case IC_POOL:
1813 if (reload_in_progress || reload_completed)
1814 {
1815 rtx mem = force_const_mem (mode, ops[1]);
1816 if (TARGET_LARGE_MEM)
1817 {
1818 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1819 emit_move_insn (addr, XEXP (mem, 0));
1820 mem = replace_equiv_address (mem, addr);
1821 }
1822 emit_move_insn (ops[0], mem);
1823 return 1;
1824 }
1825 break;
1826 case IC_IL1s:
1827 case IC_IL2s:
1828 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1829 {
1830 if (c == IC_IL2s)
1831 {
1832 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1833 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1834 }
1835 else if (flag_pic)
1836 emit_insn (gen_pic (ops[0], ops[1]));
1837 if (flag_pic)
1838 {
1839 rtx pic_reg = get_pic_reg ();
1840 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1841 crtl->uses_pic_offset_table = 1;
1842 }
1843 return flag_pic || c == IC_IL2s;
1844 }
1845 break;
1846 case IC_IL1:
1847 case IC_FSMBI:
1848 case IC_CPAT:
1849 break;
1850 }
1851 return 0;
1852 }
1853
1854 /* SAVING is TRUE when we are generating the actual load and store
1855 instructions for REGNO. When determining the size of the stack
1856 needed for saving register we must allocate enough space for the
1857 worst case, because we don't always have the information early enough
1858 to not allocate it. But we can at least eliminate the actual loads
1859 and stores during the prologue/epilogue. */
1860 static int
1861 need_to_save_reg (int regno, int saving)
1862 {
1863 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1864 return 1;
1865 if (flag_pic
1866 && regno == PIC_OFFSET_TABLE_REGNUM
1867 && (!saving || crtl->uses_pic_offset_table)
1868 && (!saving
1869 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1870 return 1;
1871 return 0;
1872 }
1873
1874 /* This function is only correct starting with local register
1875 allocation */
1876 int
1877 spu_saved_regs_size (void)
1878 {
1879 int reg_save_size = 0;
1880 int regno;
1881
1882 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1883 if (need_to_save_reg (regno, 0))
1884 reg_save_size += 0x10;
1885 return reg_save_size;
1886 }
1887
1888 static rtx
1889 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1890 {
1891 rtx reg = gen_rtx_REG (V4SImode, regno);
1892 rtx mem =
1893 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1894 return emit_insn (gen_movv4si (mem, reg));
1895 }
1896
1897 static rtx
1898 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1899 {
1900 rtx reg = gen_rtx_REG (V4SImode, regno);
1901 rtx mem =
1902 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1903 return emit_insn (gen_movv4si (reg, mem));
1904 }
1905
1906 /* This happens after reload, so we need to expand it. */
1907 static rtx
1908 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1909 {
1910 rtx insn;
1911 if (satisfies_constraint_K (GEN_INT (imm)))
1912 {
1913 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1914 }
1915 else
1916 {
1917 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1918 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1919 if (REGNO (src) == REGNO (scratch))
1920 abort ();
1921 }
1922 return insn;
1923 }
1924
1925 /* Return nonzero if this function is known to have a null epilogue. */
1926
1927 int
1928 direct_return (void)
1929 {
1930 if (reload_completed)
1931 {
1932 if (cfun->static_chain_decl == 0
1933 && (spu_saved_regs_size ()
1934 + get_frame_size ()
1935 + crtl->outgoing_args_size
1936 + crtl->args.pretend_args_size == 0)
1937 && current_function_is_leaf)
1938 return 1;
1939 }
1940 return 0;
1941 }
1942
1943 /*
1944 The stack frame looks like this:
1945 +-------------+
1946 | incoming |
1947 | args |
1948 AP -> +-------------+
1949 | $lr save |
1950 +-------------+
1951 prev SP | back chain |
1952 +-------------+
1953 | var args |
1954 | reg save | crtl->args.pretend_args_size bytes
1955 +-------------+
1956 | ... |
1957 | saved regs | spu_saved_regs_size() bytes
1958 FP -> +-------------+
1959 | ... |
1960 | vars | get_frame_size() bytes
1961 HFP -> +-------------+
1962 | ... |
1963 | outgoing |
1964 | args | crtl->outgoing_args_size bytes
1965 +-------------+
1966 | $lr of next |
1967 | frame |
1968 +-------------+
1969 | back chain |
1970 SP -> +-------------+
1971
1972 */
1973 void
1974 spu_expand_prologue (void)
1975 {
1976 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1977 HOST_WIDE_INT total_size;
1978 HOST_WIDE_INT saved_regs_size;
1979 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1980 rtx scratch_reg_0, scratch_reg_1;
1981 rtx insn, real;
1982
1983 if (flag_pic && optimize == 0)
1984 crtl->uses_pic_offset_table = 1;
1985
1986 if (spu_naked_function_p (current_function_decl))
1987 return;
1988
1989 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1990 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1991
1992 saved_regs_size = spu_saved_regs_size ();
1993 total_size = size + saved_regs_size
1994 + crtl->outgoing_args_size
1995 + crtl->args.pretend_args_size;
1996
1997 if (!current_function_is_leaf
1998 || cfun->calls_alloca || total_size > 0)
1999 total_size += STACK_POINTER_OFFSET;
2000
2001 /* Save this first because code after this might use the link
2002 register as a scratch register. */
2003 if (!current_function_is_leaf)
2004 {
2005 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2006 RTX_FRAME_RELATED_P (insn) = 1;
2007 }
2008
2009 if (total_size > 0)
2010 {
2011 offset = -crtl->args.pretend_args_size;
2012 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2013 if (need_to_save_reg (regno, 1))
2014 {
2015 offset -= 16;
2016 insn = frame_emit_store (regno, sp_reg, offset);
2017 RTX_FRAME_RELATED_P (insn) = 1;
2018 }
2019 }
2020
2021 if (flag_pic && crtl->uses_pic_offset_table)
2022 {
2023 rtx pic_reg = get_pic_reg ();
2024 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2025 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2026 }
2027
2028 if (total_size > 0)
2029 {
2030 if (flag_stack_check)
2031 {
2032 /* We compare against total_size-1 because
2033 ($sp >= total_size) <=> ($sp > total_size-1) */
2034 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2035 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2036 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2037 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2038 {
2039 emit_move_insn (scratch_v4si, size_v4si);
2040 size_v4si = scratch_v4si;
2041 }
2042 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2043 emit_insn (gen_vec_extractv4si
2044 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2045 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2046 }
2047
2048 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2049 the value of the previous $sp because we save it as the back
2050 chain. */
2051 if (total_size <= 2000)
2052 {
2053 /* In this case we save the back chain first. */
2054 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2055 insn =
2056 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2057 }
2058 else
2059 {
2060 insn = emit_move_insn (scratch_reg_0, sp_reg);
2061 insn =
2062 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2063 }
2064 RTX_FRAME_RELATED_P (insn) = 1;
2065 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2066 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2067
2068 if (total_size > 2000)
2069 {
2070 /* Save the back chain ptr */
2071 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2072 }
2073
2074 if (frame_pointer_needed)
2075 {
2076 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2077 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2078 + crtl->outgoing_args_size;
2079 /* Set the new frame_pointer */
2080 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2081 RTX_FRAME_RELATED_P (insn) = 1;
2082 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2083 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2084 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2085 }
2086 }
2087
2088 if (flag_stack_usage_info)
2089 current_function_static_stack_size = total_size;
2090 }
2091
2092 void
2093 spu_expand_epilogue (bool sibcall_p)
2094 {
2095 int size = get_frame_size (), offset, regno;
2096 HOST_WIDE_INT saved_regs_size, total_size;
2097 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2098 rtx scratch_reg_0;
2099
2100 if (spu_naked_function_p (current_function_decl))
2101 return;
2102
2103 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2104
2105 saved_regs_size = spu_saved_regs_size ();
2106 total_size = size + saved_regs_size
2107 + crtl->outgoing_args_size
2108 + crtl->args.pretend_args_size;
2109
2110 if (!current_function_is_leaf
2111 || cfun->calls_alloca || total_size > 0)
2112 total_size += STACK_POINTER_OFFSET;
2113
2114 if (total_size > 0)
2115 {
2116 if (cfun->calls_alloca)
2117 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2118 else
2119 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2120
2121
2122 if (saved_regs_size > 0)
2123 {
2124 offset = -crtl->args.pretend_args_size;
2125 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2126 if (need_to_save_reg (regno, 1))
2127 {
2128 offset -= 0x10;
2129 frame_emit_load (regno, sp_reg, offset);
2130 }
2131 }
2132 }
2133
2134 if (!current_function_is_leaf)
2135 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2136
2137 if (!sibcall_p)
2138 {
2139 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2140 emit_jump_insn (gen__return ());
2141 }
2142 }
2143
2144 rtx
2145 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2146 {
2147 if (count != 0)
2148 return 0;
2149 /* This is inefficient because it ends up copying to a save-register
2150 which then gets saved even though $lr has already been saved. But
2151 it does generate better code for leaf functions and we don't need
2152 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2153 used for __builtin_return_address anyway, so maybe we don't care if
2154 it's inefficient. */
2155 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2156 }
2157 \f
2158
2159 /* Given VAL, generate a constant appropriate for MODE.
2160 If MODE is a vector mode, every element will be VAL.
2161 For TImode, VAL will be zero extended to 128 bits. */
2162 rtx
2163 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2164 {
2165 rtx inner;
2166 rtvec v;
2167 int units, i;
2168
2169 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2170 || GET_MODE_CLASS (mode) == MODE_FLOAT
2171 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2172 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2173
2174 if (GET_MODE_CLASS (mode) == MODE_INT)
2175 return immed_double_const (val, 0, mode);
2176
2177 /* val is the bit representation of the float */
2178 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2179 return hwint_to_const_double (mode, val);
2180
2181 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2182 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2183 else
2184 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2185
2186 units = GET_MODE_NUNITS (mode);
2187
2188 v = rtvec_alloc (units);
2189
2190 for (i = 0; i < units; ++i)
2191 RTVEC_ELT (v, i) = inner;
2192
2193 return gen_rtx_CONST_VECTOR (mode, v);
2194 }
2195
2196 /* Create a MODE vector constant from 4 ints. */
2197 rtx
2198 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2199 {
2200 unsigned char arr[16];
2201 arr[0] = (a >> 24) & 0xff;
2202 arr[1] = (a >> 16) & 0xff;
2203 arr[2] = (a >> 8) & 0xff;
2204 arr[3] = (a >> 0) & 0xff;
2205 arr[4] = (b >> 24) & 0xff;
2206 arr[5] = (b >> 16) & 0xff;
2207 arr[6] = (b >> 8) & 0xff;
2208 arr[7] = (b >> 0) & 0xff;
2209 arr[8] = (c >> 24) & 0xff;
2210 arr[9] = (c >> 16) & 0xff;
2211 arr[10] = (c >> 8) & 0xff;
2212 arr[11] = (c >> 0) & 0xff;
2213 arr[12] = (d >> 24) & 0xff;
2214 arr[13] = (d >> 16) & 0xff;
2215 arr[14] = (d >> 8) & 0xff;
2216 arr[15] = (d >> 0) & 0xff;
2217 return array_to_constant(mode, arr);
2218 }
2219 \f
2220 /* branch hint stuff */
2221
2222 /* An array of these is used to propagate hints to predecessor blocks. */
2223 struct spu_bb_info
2224 {
2225 rtx prop_jump; /* propagated from another block */
2226 int bb_index; /* the original block. */
2227 };
2228 static struct spu_bb_info *spu_bb_info;
2229
2230 #define STOP_HINT_P(INSN) \
2231 (GET_CODE(INSN) == CALL_INSN \
2232 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2233 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2234
2235 /* 1 when RTX is a hinted branch or its target. We keep track of
2236 what has been hinted so the safe-hint code can test it easily. */
2237 #define HINTED_P(RTX) \
2238 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2239
2240 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2241 #define SCHED_ON_EVEN_P(RTX) \
2242 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2243
2244 /* Emit a nop for INSN such that the two will dual issue. This assumes
2245 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2246 We check for TImode to handle a MULTI1 insn which has dual issued its
2247 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2248 ADDR_VEC insns. */
2249 static void
2250 emit_nop_for_insn (rtx insn)
2251 {
2252 int p;
2253 rtx new_insn;
2254 p = get_pipe (insn);
2255 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2256 new_insn = emit_insn_after (gen_lnop (), insn);
2257 else if (p == 1 && GET_MODE (insn) == TImode)
2258 {
2259 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2260 PUT_MODE (new_insn, TImode);
2261 PUT_MODE (insn, VOIDmode);
2262 }
2263 else
2264 new_insn = emit_insn_after (gen_lnop (), insn);
2265 recog_memoized (new_insn);
2266 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
2267 }
2268
2269 /* Insert nops in basic blocks to meet dual issue alignment
2270 requirements. Also make sure hbrp and hint instructions are at least
2271 one cycle apart, possibly inserting a nop. */
2272 static void
2273 pad_bb(void)
2274 {
2275 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2276 int length;
2277 int addr;
2278
2279 /* This sets up INSN_ADDRESSES. */
2280 shorten_branches (get_insns ());
2281
2282 /* Keep track of length added by nops. */
2283 length = 0;
2284
2285 prev_insn = 0;
2286 insn = get_insns ();
2287 if (!active_insn_p (insn))
2288 insn = next_active_insn (insn);
2289 for (; insn; insn = next_insn)
2290 {
2291 next_insn = next_active_insn (insn);
2292 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2293 || INSN_CODE (insn) == CODE_FOR_hbr)
2294 {
2295 if (hbr_insn)
2296 {
2297 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2298 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2299 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2300 || (a1 - a0 == 4))
2301 {
2302 prev_insn = emit_insn_before (gen_lnop (), insn);
2303 PUT_MODE (prev_insn, GET_MODE (insn));
2304 PUT_MODE (insn, TImode);
2305 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
2306 length += 4;
2307 }
2308 }
2309 hbr_insn = insn;
2310 }
2311 if (INSN_CODE (insn) == CODE_FOR_blockage)
2312 {
2313 if (GET_MODE (insn) == TImode)
2314 PUT_MODE (next_insn, TImode);
2315 insn = next_insn;
2316 next_insn = next_active_insn (insn);
2317 }
2318 addr = INSN_ADDRESSES (INSN_UID (insn));
2319 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2320 {
2321 if (((addr + length) & 7) != 0)
2322 {
2323 emit_nop_for_insn (prev_insn);
2324 length += 4;
2325 }
2326 }
2327 else if (GET_MODE (insn) == TImode
2328 && ((next_insn && GET_MODE (next_insn) != TImode)
2329 || get_attr_type (insn) == TYPE_MULTI0)
2330 && ((addr + length) & 7) != 0)
2331 {
2332 /* prev_insn will always be set because the first insn is
2333 always 8-byte aligned. */
2334 emit_nop_for_insn (prev_insn);
2335 length += 4;
2336 }
2337 prev_insn = insn;
2338 }
2339 }
2340
2341 \f
2342 /* Routines for branch hints. */
2343
2344 static void
2345 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2346 int distance, sbitmap blocks)
2347 {
2348 rtx branch_label = 0;
2349 rtx hint;
2350 rtx insn;
2351 rtx table;
2352
2353 if (before == 0 || branch == 0 || target == 0)
2354 return;
2355
2356 /* While scheduling we require hints to be no further than 600, so
2357 we need to enforce that here too */
2358 if (distance > 600)
2359 return;
2360
2361 /* If we have a Basic block note, emit it after the basic block note. */
2362 if (NOTE_INSN_BASIC_BLOCK_P (before))
2363 before = NEXT_INSN (before);
2364
2365 branch_label = gen_label_rtx ();
2366 LABEL_NUSES (branch_label)++;
2367 LABEL_PRESERVE_P (branch_label) = 1;
2368 insn = emit_label_before (branch_label, branch);
2369 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2370 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2371
2372 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2373 recog_memoized (hint);
2374 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
2375 HINTED_P (branch) = 1;
2376
2377 if (GET_CODE (target) == LABEL_REF)
2378 HINTED_P (XEXP (target, 0)) = 1;
2379 else if (tablejump_p (branch, 0, &table))
2380 {
2381 rtvec vec;
2382 int j;
2383 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2384 vec = XVEC (PATTERN (table), 0);
2385 else
2386 vec = XVEC (PATTERN (table), 1);
2387 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2388 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2389 }
2390
2391 if (distance >= 588)
2392 {
2393 /* Make sure the hint isn't scheduled any earlier than this point,
2394 which could make it too far for the branch offest to fit */
2395 insn = emit_insn_before (gen_blockage (), hint);
2396 recog_memoized (insn);
2397 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2398 }
2399 else if (distance <= 8 * 4)
2400 {
2401 /* To guarantee at least 8 insns between the hint and branch we
2402 insert nops. */
2403 int d;
2404 for (d = distance; d < 8 * 4; d += 4)
2405 {
2406 insn =
2407 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2408 recog_memoized (insn);
2409 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2410 }
2411
2412 /* Make sure any nops inserted aren't scheduled before the hint. */
2413 insn = emit_insn_after (gen_blockage (), hint);
2414 recog_memoized (insn);
2415 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
2416
2417 /* Make sure any nops inserted aren't scheduled after the call. */
2418 if (CALL_P (branch) && distance < 8 * 4)
2419 {
2420 insn = emit_insn_before (gen_blockage (), branch);
2421 recog_memoized (insn);
2422 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2423 }
2424 }
2425 }
2426
2427 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2428 the rtx for the branch target. */
2429 static rtx
2430 get_branch_target (rtx branch)
2431 {
2432 if (GET_CODE (branch) == JUMP_INSN)
2433 {
2434 rtx set, src;
2435
2436 /* Return statements */
2437 if (GET_CODE (PATTERN (branch)) == RETURN)
2438 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2439
2440 /* jump table */
2441 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2442 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2443 return 0;
2444
2445 /* ASM GOTOs. */
2446 if (extract_asm_operands (PATTERN (branch)) != NULL)
2447 return NULL;
2448
2449 set = single_set (branch);
2450 src = SET_SRC (set);
2451 if (GET_CODE (SET_DEST (set)) != PC)
2452 abort ();
2453
2454 if (GET_CODE (src) == IF_THEN_ELSE)
2455 {
2456 rtx lab = 0;
2457 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2458 if (note)
2459 {
2460 /* If the more probable case is not a fall through, then
2461 try a branch hint. */
2462 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2463 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2464 && GET_CODE (XEXP (src, 1)) != PC)
2465 lab = XEXP (src, 1);
2466 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2467 && GET_CODE (XEXP (src, 2)) != PC)
2468 lab = XEXP (src, 2);
2469 }
2470 if (lab)
2471 {
2472 if (GET_CODE (lab) == RETURN)
2473 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2474 return lab;
2475 }
2476 return 0;
2477 }
2478
2479 return src;
2480 }
2481 else if (GET_CODE (branch) == CALL_INSN)
2482 {
2483 rtx call;
2484 /* All of our call patterns are in a PARALLEL and the CALL is
2485 the first pattern in the PARALLEL. */
2486 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2487 abort ();
2488 call = XVECEXP (PATTERN (branch), 0, 0);
2489 if (GET_CODE (call) == SET)
2490 call = SET_SRC (call);
2491 if (GET_CODE (call) != CALL)
2492 abort ();
2493 return XEXP (XEXP (call, 0), 0);
2494 }
2495 return 0;
2496 }
2497
2498 /* The special $hbr register is used to prevent the insn scheduler from
2499 moving hbr insns across instructions which invalidate them. It
2500 should only be used in a clobber, and this function searches for
2501 insns which clobber it. */
2502 static bool
2503 insn_clobbers_hbr (rtx insn)
2504 {
2505 if (INSN_P (insn)
2506 && GET_CODE (PATTERN (insn)) == PARALLEL)
2507 {
2508 rtx parallel = PATTERN (insn);
2509 rtx clobber;
2510 int j;
2511 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2512 {
2513 clobber = XVECEXP (parallel, 0, j);
2514 if (GET_CODE (clobber) == CLOBBER
2515 && GET_CODE (XEXP (clobber, 0)) == REG
2516 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2517 return 1;
2518 }
2519 }
2520 return 0;
2521 }
2522
2523 /* Search up to 32 insns starting at FIRST:
2524 - at any kind of hinted branch, just return
2525 - at any unconditional branch in the first 15 insns, just return
2526 - at a call or indirect branch, after the first 15 insns, force it to
2527 an even address and return
2528 - at any unconditional branch, after the first 15 insns, force it to
2529 an even address.
2530 At then end of the search, insert an hbrp within 4 insns of FIRST,
2531 and an hbrp within 16 instructions of FIRST.
2532 */
2533 static void
2534 insert_hbrp_for_ilb_runout (rtx first)
2535 {
2536 rtx insn, before_4 = 0, before_16 = 0;
2537 int addr = 0, length, first_addr = -1;
2538 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2539 int insert_lnop_after = 0;
2540 for (insn = first; insn; insn = NEXT_INSN (insn))
2541 if (INSN_P (insn))
2542 {
2543 if (first_addr == -1)
2544 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2545 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2546 length = get_attr_length (insn);
2547
2548 if (before_4 == 0 && addr + length >= 4 * 4)
2549 before_4 = insn;
2550 /* We test for 14 instructions because the first hbrp will add
2551 up to 2 instructions. */
2552 if (before_16 == 0 && addr + length >= 14 * 4)
2553 before_16 = insn;
2554
2555 if (INSN_CODE (insn) == CODE_FOR_hbr)
2556 {
2557 /* Make sure an hbrp is at least 2 cycles away from a hint.
2558 Insert an lnop after the hbrp when necessary. */
2559 if (before_4 == 0 && addr > 0)
2560 {
2561 before_4 = insn;
2562 insert_lnop_after |= 1;
2563 }
2564 else if (before_4 && addr <= 4 * 4)
2565 insert_lnop_after |= 1;
2566 if (before_16 == 0 && addr > 10 * 4)
2567 {
2568 before_16 = insn;
2569 insert_lnop_after |= 2;
2570 }
2571 else if (before_16 && addr <= 14 * 4)
2572 insert_lnop_after |= 2;
2573 }
2574
2575 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2576 {
2577 if (addr < hbrp_addr0)
2578 hbrp_addr0 = addr;
2579 else if (addr < hbrp_addr1)
2580 hbrp_addr1 = addr;
2581 }
2582
2583 if (CALL_P (insn) || JUMP_P (insn))
2584 {
2585 if (HINTED_P (insn))
2586 return;
2587
2588 /* Any branch after the first 15 insns should be on an even
2589 address to avoid a special case branch. There might be
2590 some nops and/or hbrps inserted, so we test after 10
2591 insns. */
2592 if (addr > 10 * 4)
2593 SCHED_ON_EVEN_P (insn) = 1;
2594 }
2595
2596 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2597 return;
2598
2599
2600 if (addr + length >= 32 * 4)
2601 {
2602 gcc_assert (before_4 && before_16);
2603 if (hbrp_addr0 > 4 * 4)
2604 {
2605 insn =
2606 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2607 recog_memoized (insn);
2608 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2609 INSN_ADDRESSES_NEW (insn,
2610 INSN_ADDRESSES (INSN_UID (before_4)));
2611 PUT_MODE (insn, GET_MODE (before_4));
2612 PUT_MODE (before_4, TImode);
2613 if (insert_lnop_after & 1)
2614 {
2615 insn = emit_insn_before (gen_lnop (), before_4);
2616 recog_memoized (insn);
2617 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
2618 INSN_ADDRESSES_NEW (insn,
2619 INSN_ADDRESSES (INSN_UID (before_4)));
2620 PUT_MODE (insn, TImode);
2621 }
2622 }
2623 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2624 && hbrp_addr1 > 16 * 4)
2625 {
2626 insn =
2627 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2628 recog_memoized (insn);
2629 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2630 INSN_ADDRESSES_NEW (insn,
2631 INSN_ADDRESSES (INSN_UID (before_16)));
2632 PUT_MODE (insn, GET_MODE (before_16));
2633 PUT_MODE (before_16, TImode);
2634 if (insert_lnop_after & 2)
2635 {
2636 insn = emit_insn_before (gen_lnop (), before_16);
2637 recog_memoized (insn);
2638 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
2639 INSN_ADDRESSES_NEW (insn,
2640 INSN_ADDRESSES (INSN_UID
2641 (before_16)));
2642 PUT_MODE (insn, TImode);
2643 }
2644 }
2645 return;
2646 }
2647 }
2648 else if (BARRIER_P (insn))
2649 return;
2650
2651 }
2652
2653 /* The SPU might hang when it executes 48 inline instructions after a
2654 hinted branch jumps to its hinted target. The beginning of a
2655 function and the return from a call might have been hinted, and
2656 must be handled as well. To prevent a hang we insert 2 hbrps. The
2657 first should be within 6 insns of the branch target. The second
2658 should be within 22 insns of the branch target. When determining
2659 if hbrps are necessary, we look for only 32 inline instructions,
2660 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2661 when inserting new hbrps, we insert them within 4 and 16 insns of
2662 the target. */
2663 static void
2664 insert_hbrp (void)
2665 {
2666 rtx insn;
2667 if (TARGET_SAFE_HINTS)
2668 {
2669 shorten_branches (get_insns ());
2670 /* Insert hbrp at beginning of function */
2671 insn = next_active_insn (get_insns ());
2672 if (insn)
2673 insert_hbrp_for_ilb_runout (insn);
2674 /* Insert hbrp after hinted targets. */
2675 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2676 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2677 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2678 }
2679 }
2680
2681 static int in_spu_reorg;
2682
2683 static void
2684 spu_var_tracking (void)
2685 {
2686 if (flag_var_tracking)
2687 {
2688 df_analyze ();
2689 timevar_push (TV_VAR_TRACKING);
2690 variable_tracking_main ();
2691 timevar_pop (TV_VAR_TRACKING);
2692 df_finish_pass (false);
2693 }
2694 }
2695
2696 /* Insert branch hints. There are no branch optimizations after this
2697 pass, so it's safe to set our branch hints now. */
2698 static void
2699 spu_machine_dependent_reorg (void)
2700 {
2701 sbitmap blocks;
2702 basic_block bb;
2703 rtx branch, insn;
2704 rtx branch_target = 0;
2705 int branch_addr = 0, insn_addr, required_dist = 0;
2706 int i;
2707 unsigned int j;
2708
2709 if (!TARGET_BRANCH_HINTS || optimize == 0)
2710 {
2711 /* We still do it for unoptimized code because an external
2712 function might have hinted a call or return. */
2713 insert_hbrp ();
2714 pad_bb ();
2715 spu_var_tracking ();
2716 return;
2717 }
2718
2719 blocks = sbitmap_alloc (last_basic_block);
2720 sbitmap_zero (blocks);
2721
2722 in_spu_reorg = 1;
2723 compute_bb_for_insn ();
2724
2725 compact_blocks ();
2726
2727 spu_bb_info =
2728 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2729 sizeof (struct spu_bb_info));
2730
2731 /* We need exact insn addresses and lengths. */
2732 shorten_branches (get_insns ());
2733
2734 for (i = n_basic_blocks - 1; i >= 0; i--)
2735 {
2736 bb = BASIC_BLOCK (i);
2737 branch = 0;
2738 if (spu_bb_info[i].prop_jump)
2739 {
2740 branch = spu_bb_info[i].prop_jump;
2741 branch_target = get_branch_target (branch);
2742 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2743 required_dist = spu_hint_dist;
2744 }
2745 /* Search from end of a block to beginning. In this loop, find
2746 jumps which need a branch and emit them only when:
2747 - it's an indirect branch and we're at the insn which sets
2748 the register
2749 - we're at an insn that will invalidate the hint. e.g., a
2750 call, another hint insn, inline asm that clobbers $hbr, and
2751 some inlined operations (divmodsi4). Don't consider jumps
2752 because they are only at the end of a block and are
2753 considered when we are deciding whether to propagate
2754 - we're getting too far away from the branch. The hbr insns
2755 only have a signed 10 bit offset
2756 We go back as far as possible so the branch will be considered
2757 for propagation when we get to the beginning of the block. */
2758 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2759 {
2760 if (INSN_P (insn))
2761 {
2762 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2763 if (branch
2764 && ((GET_CODE (branch_target) == REG
2765 && set_of (branch_target, insn) != NULL_RTX)
2766 || insn_clobbers_hbr (insn)
2767 || branch_addr - insn_addr > 600))
2768 {
2769 rtx next = NEXT_INSN (insn);
2770 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2771 if (insn != BB_END (bb)
2772 && branch_addr - next_addr >= required_dist)
2773 {
2774 if (dump_file)
2775 fprintf (dump_file,
2776 "hint for %i in block %i before %i\n",
2777 INSN_UID (branch), bb->index,
2778 INSN_UID (next));
2779 spu_emit_branch_hint (next, branch, branch_target,
2780 branch_addr - next_addr, blocks);
2781 }
2782 branch = 0;
2783 }
2784
2785 /* JUMP_P will only be true at the end of a block. When
2786 branch is already set it means we've previously decided
2787 to propagate a hint for that branch into this block. */
2788 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2789 {
2790 branch = 0;
2791 if ((branch_target = get_branch_target (insn)))
2792 {
2793 branch = insn;
2794 branch_addr = insn_addr;
2795 required_dist = spu_hint_dist;
2796 }
2797 }
2798 }
2799 if (insn == BB_HEAD (bb))
2800 break;
2801 }
2802
2803 if (branch)
2804 {
2805 /* If we haven't emitted a hint for this branch yet, it might
2806 be profitable to emit it in one of the predecessor blocks,
2807 especially for loops. */
2808 rtx bbend;
2809 basic_block prev = 0, prop = 0, prev2 = 0;
2810 int loop_exit = 0, simple_loop = 0;
2811 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2812
2813 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2814 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2815 prev = EDGE_PRED (bb, j)->src;
2816 else
2817 prev2 = EDGE_PRED (bb, j)->src;
2818
2819 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2820 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2821 loop_exit = 1;
2822 else if (EDGE_SUCC (bb, j)->dest == bb)
2823 simple_loop = 1;
2824
2825 /* If this branch is a loop exit then propagate to previous
2826 fallthru block. This catches the cases when it is a simple
2827 loop or when there is an initial branch into the loop. */
2828 if (prev && (loop_exit || simple_loop)
2829 && prev->loop_depth <= bb->loop_depth)
2830 prop = prev;
2831
2832 /* If there is only one adjacent predecessor. Don't propagate
2833 outside this loop. This loop_depth test isn't perfect, but
2834 I'm not sure the loop_father member is valid at this point. */
2835 else if (prev && single_pred_p (bb)
2836 && prev->loop_depth == bb->loop_depth)
2837 prop = prev;
2838
2839 /* If this is the JOIN block of a simple IF-THEN then
2840 propogate the hint to the HEADER block. */
2841 else if (prev && prev2
2842 && EDGE_COUNT (bb->preds) == 2
2843 && EDGE_COUNT (prev->preds) == 1
2844 && EDGE_PRED (prev, 0)->src == prev2
2845 && prev2->loop_depth == bb->loop_depth
2846 && GET_CODE (branch_target) != REG)
2847 prop = prev;
2848
2849 /* Don't propagate when:
2850 - this is a simple loop and the hint would be too far
2851 - this is not a simple loop and there are 16 insns in
2852 this block already
2853 - the predecessor block ends in a branch that will be
2854 hinted
2855 - the predecessor block ends in an insn that invalidates
2856 the hint */
2857 if (prop
2858 && prop->index >= 0
2859 && (bbend = BB_END (prop))
2860 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2861 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2862 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2863 {
2864 if (dump_file)
2865 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2866 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2867 bb->index, prop->index, bb->loop_depth,
2868 INSN_UID (branch), loop_exit, simple_loop,
2869 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2870
2871 spu_bb_info[prop->index].prop_jump = branch;
2872 spu_bb_info[prop->index].bb_index = i;
2873 }
2874 else if (branch_addr - next_addr >= required_dist)
2875 {
2876 if (dump_file)
2877 fprintf (dump_file, "hint for %i in block %i before %i\n",
2878 INSN_UID (branch), bb->index,
2879 INSN_UID (NEXT_INSN (insn)));
2880 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2881 branch_addr - next_addr, blocks);
2882 }
2883 branch = 0;
2884 }
2885 }
2886 free (spu_bb_info);
2887
2888 if (!sbitmap_empty_p (blocks))
2889 find_many_sub_basic_blocks (blocks);
2890
2891 /* We have to schedule to make sure alignment is ok. */
2892 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2893
2894 /* The hints need to be scheduled, so call it again. */
2895 schedule_insns ();
2896 df_finish_pass (true);
2897
2898 insert_hbrp ();
2899
2900 pad_bb ();
2901
2902 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2903 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2904 {
2905 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2906 between its branch label and the branch . We don't move the
2907 label because GCC expects it at the beginning of the block. */
2908 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2909 rtx label_ref = XVECEXP (unspec, 0, 0);
2910 rtx label = XEXP (label_ref, 0);
2911 rtx branch;
2912 int offset = 0;
2913 for (branch = NEXT_INSN (label);
2914 !JUMP_P (branch) && !CALL_P (branch);
2915 branch = NEXT_INSN (branch))
2916 if (NONJUMP_INSN_P (branch))
2917 offset += get_attr_length (branch);
2918 if (offset > 0)
2919 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2920 }
2921
2922 spu_var_tracking ();
2923
2924 free_bb_for_insn ();
2925
2926 in_spu_reorg = 0;
2927 }
2928 \f
2929
2930 /* Insn scheduling routines, primarily for dual issue. */
2931 static int
2932 spu_sched_issue_rate (void)
2933 {
2934 return 2;
2935 }
2936
2937 static int
2938 uses_ls_unit(rtx insn)
2939 {
2940 rtx set = single_set (insn);
2941 if (set != 0
2942 && (GET_CODE (SET_DEST (set)) == MEM
2943 || GET_CODE (SET_SRC (set)) == MEM))
2944 return 1;
2945 return 0;
2946 }
2947
2948 static int
2949 get_pipe (rtx insn)
2950 {
2951 enum attr_type t;
2952 /* Handle inline asm */
2953 if (INSN_CODE (insn) == -1)
2954 return -1;
2955 t = get_attr_type (insn);
2956 switch (t)
2957 {
2958 case TYPE_CONVERT:
2959 return -2;
2960 case TYPE_MULTI0:
2961 return -1;
2962
2963 case TYPE_FX2:
2964 case TYPE_FX3:
2965 case TYPE_SPR:
2966 case TYPE_NOP:
2967 case TYPE_FXB:
2968 case TYPE_FPD:
2969 case TYPE_FP6:
2970 case TYPE_FP7:
2971 return 0;
2972
2973 case TYPE_LNOP:
2974 case TYPE_SHUF:
2975 case TYPE_LOAD:
2976 case TYPE_STORE:
2977 case TYPE_BR:
2978 case TYPE_MULTI1:
2979 case TYPE_HBR:
2980 case TYPE_IPREFETCH:
2981 return 1;
2982 default:
2983 abort ();
2984 }
2985 }
2986
2987
2988 /* haifa-sched.c has a static variable that keeps track of the current
2989 cycle. It is passed to spu_sched_reorder, and we record it here for
2990 use by spu_sched_variable_issue. It won't be accurate if the
2991 scheduler updates it's clock_var between the two calls. */
2992 static int clock_var;
2993
2994 /* This is used to keep track of insn alignment. Set to 0 at the
2995 beginning of each block and increased by the "length" attr of each
2996 insn scheduled. */
2997 static int spu_sched_length;
2998
2999 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3000 ready list appropriately in spu_sched_reorder(). */
3001 static int pipe0_clock;
3002 static int pipe1_clock;
3003
3004 static int prev_clock_var;
3005
3006 static int prev_priority;
3007
3008 /* The SPU needs to load the next ilb sometime during the execution of
3009 the previous ilb. There is a potential conflict if every cycle has a
3010 load or store. To avoid the conflict we make sure the load/store
3011 unit is free for at least one cycle during the execution of insns in
3012 the previous ilb. */
3013 static int spu_ls_first;
3014 static int prev_ls_clock;
3015
3016 static void
3017 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3018 int max_ready ATTRIBUTE_UNUSED)
3019 {
3020 spu_sched_length = 0;
3021 }
3022
3023 static void
3024 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3025 int max_ready ATTRIBUTE_UNUSED)
3026 {
3027 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3028 {
3029 /* When any block might be at least 8-byte aligned, assume they
3030 will all be at least 8-byte aligned to make sure dual issue
3031 works out correctly. */
3032 spu_sched_length = 0;
3033 }
3034 spu_ls_first = INT_MAX;
3035 clock_var = -1;
3036 prev_ls_clock = -1;
3037 pipe0_clock = -1;
3038 pipe1_clock = -1;
3039 prev_clock_var = -1;
3040 prev_priority = -1;
3041 }
3042
3043 static int
3044 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3045 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3046 {
3047 int len;
3048 int p;
3049 if (GET_CODE (PATTERN (insn)) == USE
3050 || GET_CODE (PATTERN (insn)) == CLOBBER
3051 || (len = get_attr_length (insn)) == 0)
3052 return more;
3053
3054 spu_sched_length += len;
3055
3056 /* Reset on inline asm */
3057 if (INSN_CODE (insn) == -1)
3058 {
3059 spu_ls_first = INT_MAX;
3060 pipe0_clock = -1;
3061 pipe1_clock = -1;
3062 return 0;
3063 }
3064 p = get_pipe (insn);
3065 if (p == 0)
3066 pipe0_clock = clock_var;
3067 else
3068 pipe1_clock = clock_var;
3069
3070 if (in_spu_reorg)
3071 {
3072 if (clock_var - prev_ls_clock > 1
3073 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3074 spu_ls_first = INT_MAX;
3075 if (uses_ls_unit (insn))
3076 {
3077 if (spu_ls_first == INT_MAX)
3078 spu_ls_first = spu_sched_length;
3079 prev_ls_clock = clock_var;
3080 }
3081
3082 /* The scheduler hasn't inserted the nop, but we will later on.
3083 Include those nops in spu_sched_length. */
3084 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3085 spu_sched_length += 4;
3086 prev_clock_var = clock_var;
3087
3088 /* more is -1 when called from spu_sched_reorder for new insns
3089 that don't have INSN_PRIORITY */
3090 if (more >= 0)
3091 prev_priority = INSN_PRIORITY (insn);
3092 }
3093
3094 /* Always try issueing more insns. spu_sched_reorder will decide
3095 when the cycle should be advanced. */
3096 return 1;
3097 }
3098
3099 /* This function is called for both TARGET_SCHED_REORDER and
3100 TARGET_SCHED_REORDER2. */
3101 static int
3102 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3103 rtx *ready, int *nreadyp, int clock)
3104 {
3105 int i, nready = *nreadyp;
3106 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3107 rtx insn;
3108
3109 clock_var = clock;
3110
3111 if (nready <= 0 || pipe1_clock >= clock)
3112 return 0;
3113
3114 /* Find any rtl insns that don't generate assembly insns and schedule
3115 them first. */
3116 for (i = nready - 1; i >= 0; i--)
3117 {
3118 insn = ready[i];
3119 if (INSN_CODE (insn) == -1
3120 || INSN_CODE (insn) == CODE_FOR_blockage
3121 || (INSN_P (insn) && get_attr_length (insn) == 0))
3122 {
3123 ready[i] = ready[nready - 1];
3124 ready[nready - 1] = insn;
3125 return 1;
3126 }
3127 }
3128
3129 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3130 for (i = 0; i < nready; i++)
3131 if (INSN_CODE (ready[i]) != -1)
3132 {
3133 insn = ready[i];
3134 switch (get_attr_type (insn))
3135 {
3136 default:
3137 case TYPE_MULTI0:
3138 case TYPE_CONVERT:
3139 case TYPE_FX2:
3140 case TYPE_FX3:
3141 case TYPE_SPR:
3142 case TYPE_NOP:
3143 case TYPE_FXB:
3144 case TYPE_FPD:
3145 case TYPE_FP6:
3146 case TYPE_FP7:
3147 pipe_0 = i;
3148 break;
3149 case TYPE_LOAD:
3150 case TYPE_STORE:
3151 pipe_ls = i;
3152 case TYPE_LNOP:
3153 case TYPE_SHUF:
3154 case TYPE_BR:
3155 case TYPE_MULTI1:
3156 case TYPE_HBR:
3157 pipe_1 = i;
3158 break;
3159 case TYPE_IPREFETCH:
3160 pipe_hbrp = i;
3161 break;
3162 }
3163 }
3164
3165 /* In the first scheduling phase, schedule loads and stores together
3166 to increase the chance they will get merged during postreload CSE. */
3167 if (!reload_completed && pipe_ls >= 0)
3168 {
3169 insn = ready[pipe_ls];
3170 ready[pipe_ls] = ready[nready - 1];
3171 ready[nready - 1] = insn;
3172 return 1;
3173 }
3174
3175 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3176 if (pipe_hbrp >= 0)
3177 pipe_1 = pipe_hbrp;
3178
3179 /* When we have loads/stores in every cycle of the last 15 insns and
3180 we are about to schedule another load/store, emit an hbrp insn
3181 instead. */
3182 if (in_spu_reorg
3183 && spu_sched_length - spu_ls_first >= 4 * 15
3184 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3185 {
3186 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3187 recog_memoized (insn);
3188 if (pipe0_clock < clock)
3189 PUT_MODE (insn, TImode);
3190 spu_sched_variable_issue (file, verbose, insn, -1);
3191 return 0;
3192 }
3193
3194 /* In general, we want to emit nops to increase dual issue, but dual
3195 issue isn't faster when one of the insns could be scheduled later
3196 without effecting the critical path. We look at INSN_PRIORITY to
3197 make a good guess, but it isn't perfect so -mdual-nops=n can be
3198 used to effect it. */
3199 if (in_spu_reorg && spu_dual_nops < 10)
3200 {
3201 /* When we are at an even address and we are not issueing nops to
3202 improve scheduling then we need to advance the cycle. */
3203 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3204 && (spu_dual_nops == 0
3205 || (pipe_1 != -1
3206 && prev_priority >
3207 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3208 return 0;
3209
3210 /* When at an odd address, schedule the highest priority insn
3211 without considering pipeline. */
3212 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3213 && (spu_dual_nops == 0
3214 || (prev_priority >
3215 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3216 return 1;
3217 }
3218
3219
3220 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3221 pipe0 insn in the ready list, schedule it. */
3222 if (pipe0_clock < clock && pipe_0 >= 0)
3223 schedule_i = pipe_0;
3224
3225 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3226 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3227 else
3228 schedule_i = pipe_1;
3229
3230 if (schedule_i > -1)
3231 {
3232 insn = ready[schedule_i];
3233 ready[schedule_i] = ready[nready - 1];
3234 ready[nready - 1] = insn;
3235 return 1;
3236 }
3237 return 0;
3238 }
3239
3240 /* INSN is dependent on DEP_INSN. */
3241 static int
3242 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3243 {
3244 rtx set;
3245
3246 /* The blockage pattern is used to prevent instructions from being
3247 moved across it and has no cost. */
3248 if (INSN_CODE (insn) == CODE_FOR_blockage
3249 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3250 return 0;
3251
3252 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3253 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3254 return 0;
3255
3256 /* Make sure hbrps are spread out. */
3257 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3258 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3259 return 8;
3260
3261 /* Make sure hints and hbrps are 2 cycles apart. */
3262 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3263 || INSN_CODE (insn) == CODE_FOR_hbr)
3264 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3265 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3266 return 2;
3267
3268 /* An hbrp has no real dependency on other insns. */
3269 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3270 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3271 return 0;
3272
3273 /* Assuming that it is unlikely an argument register will be used in
3274 the first cycle of the called function, we reduce the cost for
3275 slightly better scheduling of dep_insn. When not hinted, the
3276 mispredicted branch would hide the cost as well. */
3277 if (CALL_P (insn))
3278 {
3279 rtx target = get_branch_target (insn);
3280 if (GET_CODE (target) != REG || !set_of (target, insn))
3281 return cost - 2;
3282 return cost;
3283 }
3284
3285 /* And when returning from a function, let's assume the return values
3286 are completed sooner too. */
3287 if (CALL_P (dep_insn))
3288 return cost - 2;
3289
3290 /* Make sure an instruction that loads from the back chain is schedule
3291 away from the return instruction so a hint is more likely to get
3292 issued. */
3293 if (INSN_CODE (insn) == CODE_FOR__return
3294 && (set = single_set (dep_insn))
3295 && GET_CODE (SET_DEST (set)) == REG
3296 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3297 return 20;
3298
3299 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3300 scheduler makes every insn in a block anti-dependent on the final
3301 jump_insn. We adjust here so higher cost insns will get scheduled
3302 earlier. */
3303 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3304 return insn_cost (dep_insn) - 3;
3305
3306 return cost;
3307 }
3308 \f
3309 /* Create a CONST_DOUBLE from a string. */
3310 rtx
3311 spu_float_const (const char *string, enum machine_mode mode)
3312 {
3313 REAL_VALUE_TYPE value;
3314 value = REAL_VALUE_ATOF (string, mode);
3315 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3316 }
3317
3318 int
3319 spu_constant_address_p (rtx x)
3320 {
3321 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3322 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3323 || GET_CODE (x) == HIGH);
3324 }
3325
3326 static enum spu_immediate
3327 which_immediate_load (HOST_WIDE_INT val)
3328 {
3329 gcc_assert (val == trunc_int_for_mode (val, SImode));
3330
3331 if (val >= -0x8000 && val <= 0x7fff)
3332 return SPU_IL;
3333 if (val >= 0 && val <= 0x3ffff)
3334 return SPU_ILA;
3335 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3336 return SPU_ILH;
3337 if ((val & 0xffff) == 0)
3338 return SPU_ILHU;
3339
3340 return SPU_NONE;
3341 }
3342
3343 /* Return true when OP can be loaded by one of the il instructions, or
3344 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3345 int
3346 immediate_load_p (rtx op, enum machine_mode mode)
3347 {
3348 if (CONSTANT_P (op))
3349 {
3350 enum immediate_class c = classify_immediate (op, mode);
3351 return c == IC_IL1 || c == IC_IL1s
3352 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3353 }
3354 return 0;
3355 }
3356
3357 /* Return true if the first SIZE bytes of arr is a constant that can be
3358 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3359 represent the size and offset of the instruction to use. */
3360 static int
3361 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3362 {
3363 int cpat, run, i, start;
3364 cpat = 1;
3365 run = 0;
3366 start = -1;
3367 for (i = 0; i < size && cpat; i++)
3368 if (arr[i] != i+16)
3369 {
3370 if (!run)
3371 {
3372 start = i;
3373 if (arr[i] == 3)
3374 run = 1;
3375 else if (arr[i] == 2 && arr[i+1] == 3)
3376 run = 2;
3377 else if (arr[i] == 0)
3378 {
3379 while (arr[i+run] == run && i+run < 16)
3380 run++;
3381 if (run != 4 && run != 8)
3382 cpat = 0;
3383 }
3384 else
3385 cpat = 0;
3386 if ((i & (run-1)) != 0)
3387 cpat = 0;
3388 i += run;
3389 }
3390 else
3391 cpat = 0;
3392 }
3393 if (cpat && (run || size < 16))
3394 {
3395 if (run == 0)
3396 run = 1;
3397 if (prun)
3398 *prun = run;
3399 if (pstart)
3400 *pstart = start == -1 ? 16-run : start;
3401 return 1;
3402 }
3403 return 0;
3404 }
3405
3406 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3407 it into a register. MODE is only valid when OP is a CONST_INT. */
3408 static enum immediate_class
3409 classify_immediate (rtx op, enum machine_mode mode)
3410 {
3411 HOST_WIDE_INT val;
3412 unsigned char arr[16];
3413 int i, j, repeated, fsmbi, repeat;
3414
3415 gcc_assert (CONSTANT_P (op));
3416
3417 if (GET_MODE (op) != VOIDmode)
3418 mode = GET_MODE (op);
3419
3420 /* A V4SI const_vector with all identical symbols is ok. */
3421 if (!flag_pic
3422 && mode == V4SImode
3423 && GET_CODE (op) == CONST_VECTOR
3424 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3425 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3426 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3427 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3428 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3429 op = CONST_VECTOR_ELT (op, 0);
3430
3431 switch (GET_CODE (op))
3432 {
3433 case SYMBOL_REF:
3434 case LABEL_REF:
3435 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3436
3437 case CONST:
3438 /* We can never know if the resulting address fits in 18 bits and can be
3439 loaded with ila. For now, assume the address will not overflow if
3440 the displacement is "small" (fits 'K' constraint). */
3441 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3442 {
3443 rtx sym = XEXP (XEXP (op, 0), 0);
3444 rtx cst = XEXP (XEXP (op, 0), 1);
3445
3446 if (GET_CODE (sym) == SYMBOL_REF
3447 && GET_CODE (cst) == CONST_INT
3448 && satisfies_constraint_K (cst))
3449 return IC_IL1s;
3450 }
3451 return IC_IL2s;
3452
3453 case HIGH:
3454 return IC_IL1s;
3455
3456 case CONST_VECTOR:
3457 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3458 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3459 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3460 return IC_POOL;
3461 /* Fall through. */
3462
3463 case CONST_INT:
3464 case CONST_DOUBLE:
3465 constant_to_array (mode, op, arr);
3466
3467 /* Check that each 4-byte slot is identical. */
3468 repeated = 1;
3469 for (i = 4; i < 16; i += 4)
3470 for (j = 0; j < 4; j++)
3471 if (arr[j] != arr[i + j])
3472 repeated = 0;
3473
3474 if (repeated)
3475 {
3476 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3477 val = trunc_int_for_mode (val, SImode);
3478
3479 if (which_immediate_load (val) != SPU_NONE)
3480 return IC_IL1;
3481 }
3482
3483 /* Any mode of 2 bytes or smaller can be loaded with an il
3484 instruction. */
3485 gcc_assert (GET_MODE_SIZE (mode) > 2);
3486
3487 fsmbi = 1;
3488 repeat = 0;
3489 for (i = 0; i < 16 && fsmbi; i++)
3490 if (arr[i] != 0 && repeat == 0)
3491 repeat = arr[i];
3492 else if (arr[i] != 0 && arr[i] != repeat)
3493 fsmbi = 0;
3494 if (fsmbi)
3495 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3496
3497 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3498 return IC_CPAT;
3499
3500 if (repeated)
3501 return IC_IL2;
3502
3503 return IC_POOL;
3504 default:
3505 break;
3506 }
3507 gcc_unreachable ();
3508 }
3509
3510 static enum spu_immediate
3511 which_logical_immediate (HOST_WIDE_INT val)
3512 {
3513 gcc_assert (val == trunc_int_for_mode (val, SImode));
3514
3515 if (val >= -0x200 && val <= 0x1ff)
3516 return SPU_ORI;
3517 if (val >= 0 && val <= 0xffff)
3518 return SPU_IOHL;
3519 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3520 {
3521 val = trunc_int_for_mode (val, HImode);
3522 if (val >= -0x200 && val <= 0x1ff)
3523 return SPU_ORHI;
3524 if ((val & 0xff) == ((val >> 8) & 0xff))
3525 {
3526 val = trunc_int_for_mode (val, QImode);
3527 if (val >= -0x200 && val <= 0x1ff)
3528 return SPU_ORBI;
3529 }
3530 }
3531 return SPU_NONE;
3532 }
3533
3534 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3535 CONST_DOUBLEs. */
3536 static int
3537 const_vector_immediate_p (rtx x)
3538 {
3539 int i;
3540 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3541 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3542 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3543 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3544 return 0;
3545 return 1;
3546 }
3547
3548 int
3549 logical_immediate_p (rtx op, enum machine_mode mode)
3550 {
3551 HOST_WIDE_INT val;
3552 unsigned char arr[16];
3553 int i, j;
3554
3555 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3556 || GET_CODE (op) == CONST_VECTOR);
3557
3558 if (GET_CODE (op) == CONST_VECTOR
3559 && !const_vector_immediate_p (op))
3560 return 0;
3561
3562 if (GET_MODE (op) != VOIDmode)
3563 mode = GET_MODE (op);
3564
3565 constant_to_array (mode, op, arr);
3566
3567 /* Check that bytes are repeated. */
3568 for (i = 4; i < 16; i += 4)
3569 for (j = 0; j < 4; j++)
3570 if (arr[j] != arr[i + j])
3571 return 0;
3572
3573 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3574 val = trunc_int_for_mode (val, SImode);
3575
3576 i = which_logical_immediate (val);
3577 return i != SPU_NONE && i != SPU_IOHL;
3578 }
3579
3580 int
3581 iohl_immediate_p (rtx op, enum machine_mode mode)
3582 {
3583 HOST_WIDE_INT val;
3584 unsigned char arr[16];
3585 int i, j;
3586
3587 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3588 || GET_CODE (op) == CONST_VECTOR);
3589
3590 if (GET_CODE (op) == CONST_VECTOR
3591 && !const_vector_immediate_p (op))
3592 return 0;
3593
3594 if (GET_MODE (op) != VOIDmode)
3595 mode = GET_MODE (op);
3596
3597 constant_to_array (mode, op, arr);
3598
3599 /* Check that bytes are repeated. */
3600 for (i = 4; i < 16; i += 4)
3601 for (j = 0; j < 4; j++)
3602 if (arr[j] != arr[i + j])
3603 return 0;
3604
3605 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3606 val = trunc_int_for_mode (val, SImode);
3607
3608 return val >= 0 && val <= 0xffff;
3609 }
3610
3611 int
3612 arith_immediate_p (rtx op, enum machine_mode mode,
3613 HOST_WIDE_INT low, HOST_WIDE_INT high)
3614 {
3615 HOST_WIDE_INT val;
3616 unsigned char arr[16];
3617 int bytes, i, j;
3618
3619 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3620 || GET_CODE (op) == CONST_VECTOR);
3621
3622 if (GET_CODE (op) == CONST_VECTOR
3623 && !const_vector_immediate_p (op))
3624 return 0;
3625
3626 if (GET_MODE (op) != VOIDmode)
3627 mode = GET_MODE (op);
3628
3629 constant_to_array (mode, op, arr);
3630
3631 if (VECTOR_MODE_P (mode))
3632 mode = GET_MODE_INNER (mode);
3633
3634 bytes = GET_MODE_SIZE (mode);
3635 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3636
3637 /* Check that bytes are repeated. */
3638 for (i = bytes; i < 16; i += bytes)
3639 for (j = 0; j < bytes; j++)
3640 if (arr[j] != arr[i + j])
3641 return 0;
3642
3643 val = arr[0];
3644 for (j = 1; j < bytes; j++)
3645 val = (val << 8) | arr[j];
3646
3647 val = trunc_int_for_mode (val, mode);
3648
3649 return val >= low && val <= high;
3650 }
3651
3652 /* TRUE when op is an immediate and an exact power of 2, and given that
3653 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3654 all entries must be the same. */
3655 bool
3656 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3657 {
3658 enum machine_mode int_mode;
3659 HOST_WIDE_INT val;
3660 unsigned char arr[16];
3661 int bytes, i, j;
3662
3663 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3664 || GET_CODE (op) == CONST_VECTOR);
3665
3666 if (GET_CODE (op) == CONST_VECTOR
3667 && !const_vector_immediate_p (op))
3668 return 0;
3669
3670 if (GET_MODE (op) != VOIDmode)
3671 mode = GET_MODE (op);
3672
3673 constant_to_array (mode, op, arr);
3674
3675 if (VECTOR_MODE_P (mode))
3676 mode = GET_MODE_INNER (mode);
3677
3678 bytes = GET_MODE_SIZE (mode);
3679 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3680
3681 /* Check that bytes are repeated. */
3682 for (i = bytes; i < 16; i += bytes)
3683 for (j = 0; j < bytes; j++)
3684 if (arr[j] != arr[i + j])
3685 return 0;
3686
3687 val = arr[0];
3688 for (j = 1; j < bytes; j++)
3689 val = (val << 8) | arr[j];
3690
3691 val = trunc_int_for_mode (val, int_mode);
3692
3693 /* Currently, we only handle SFmode */
3694 gcc_assert (mode == SFmode);
3695 if (mode == SFmode)
3696 {
3697 int exp = (val >> 23) - 127;
3698 return val > 0 && (val & 0x007fffff) == 0
3699 && exp >= low && exp <= high;
3700 }
3701 return FALSE;
3702 }
3703
3704 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3705
3706 static int
3707 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3708 {
3709 rtx x = *px;
3710 tree decl;
3711
3712 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3713 {
3714 rtx plus = XEXP (x, 0);
3715 rtx op0 = XEXP (plus, 0);
3716 rtx op1 = XEXP (plus, 1);
3717 if (GET_CODE (op1) == CONST_INT)
3718 x = op0;
3719 }
3720
3721 return (GET_CODE (x) == SYMBOL_REF
3722 && (decl = SYMBOL_REF_DECL (x)) != 0
3723 && TREE_CODE (decl) == VAR_DECL
3724 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3725 }
3726
3727 /* We accept:
3728 - any 32-bit constant (SImode, SFmode)
3729 - any constant that can be generated with fsmbi (any mode)
3730 - a 64-bit constant where the high and low bits are identical
3731 (DImode, DFmode)
3732 - a 128-bit constant where the four 32-bit words match. */
3733 bool
3734 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3735 {
3736 if (GET_CODE (x) == HIGH)
3737 x = XEXP (x, 0);
3738
3739 /* Reject any __ea qualified reference. These can't appear in
3740 instructions but must be forced to the constant pool. */
3741 if (for_each_rtx (&x, ea_symbol_ref, 0))
3742 return 0;
3743
3744 /* V4SI with all identical symbols is valid. */
3745 if (!flag_pic
3746 && mode == V4SImode
3747 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3748 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3749 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3750 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3751 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3752 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3753
3754 if (GET_CODE (x) == CONST_VECTOR
3755 && !const_vector_immediate_p (x))
3756 return 0;
3757 return 1;
3758 }
3759
3760 /* Valid address are:
3761 - symbol_ref, label_ref, const
3762 - reg
3763 - reg + const_int, where const_int is 16 byte aligned
3764 - reg + reg, alignment doesn't matter
3765 The alignment matters in the reg+const case because lqd and stqd
3766 ignore the 4 least significant bits of the const. We only care about
3767 16 byte modes because the expand phase will change all smaller MEM
3768 references to TImode. */
3769 static bool
3770 spu_legitimate_address_p (enum machine_mode mode,
3771 rtx x, bool reg_ok_strict)
3772 {
3773 int aligned = GET_MODE_SIZE (mode) >= 16;
3774 if (aligned
3775 && GET_CODE (x) == AND
3776 && GET_CODE (XEXP (x, 1)) == CONST_INT
3777 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3778 x = XEXP (x, 0);
3779 switch (GET_CODE (x))
3780 {
3781 case LABEL_REF:
3782 return !TARGET_LARGE_MEM;
3783
3784 case SYMBOL_REF:
3785 case CONST:
3786 /* Keep __ea references until reload so that spu_expand_mov can see them
3787 in MEMs. */
3788 if (ea_symbol_ref (&x, 0))
3789 return !reload_in_progress && !reload_completed;
3790 return !TARGET_LARGE_MEM;
3791
3792 case CONST_INT:
3793 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3794
3795 case SUBREG:
3796 x = XEXP (x, 0);
3797 if (REG_P (x))
3798 return 0;
3799
3800 case REG:
3801 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3802
3803 case PLUS:
3804 case LO_SUM:
3805 {
3806 rtx op0 = XEXP (x, 0);
3807 rtx op1 = XEXP (x, 1);
3808 if (GET_CODE (op0) == SUBREG)
3809 op0 = XEXP (op0, 0);
3810 if (GET_CODE (op1) == SUBREG)
3811 op1 = XEXP (op1, 0);
3812 if (GET_CODE (op0) == REG
3813 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3814 && GET_CODE (op1) == CONST_INT
3815 && INTVAL (op1) >= -0x2000
3816 && INTVAL (op1) <= 0x1fff
3817 && (!aligned || (INTVAL (op1) & 15) == 0))
3818 return TRUE;
3819 if (GET_CODE (op0) == REG
3820 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3821 && GET_CODE (op1) == REG
3822 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3823 return TRUE;
3824 }
3825 break;
3826
3827 default:
3828 break;
3829 }
3830 return FALSE;
3831 }
3832
3833 /* Like spu_legitimate_address_p, except with named addresses. */
3834 static bool
3835 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3836 bool reg_ok_strict, addr_space_t as)
3837 {
3838 if (as == ADDR_SPACE_EA)
3839 return (REG_P (x) && (GET_MODE (x) == EAmode));
3840
3841 else if (as != ADDR_SPACE_GENERIC)
3842 gcc_unreachable ();
3843
3844 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3845 }
3846
3847 /* When the address is reg + const_int, force the const_int into a
3848 register. */
3849 rtx
3850 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3851 enum machine_mode mode ATTRIBUTE_UNUSED)
3852 {
3853 rtx op0, op1;
3854 /* Make sure both operands are registers. */
3855 if (GET_CODE (x) == PLUS)
3856 {
3857 op0 = XEXP (x, 0);
3858 op1 = XEXP (x, 1);
3859 if (ALIGNED_SYMBOL_REF_P (op0))
3860 {
3861 op0 = force_reg (Pmode, op0);
3862 mark_reg_pointer (op0, 128);
3863 }
3864 else if (GET_CODE (op0) != REG)
3865 op0 = force_reg (Pmode, op0);
3866 if (ALIGNED_SYMBOL_REF_P (op1))
3867 {
3868 op1 = force_reg (Pmode, op1);
3869 mark_reg_pointer (op1, 128);
3870 }
3871 else if (GET_CODE (op1) != REG)
3872 op1 = force_reg (Pmode, op1);
3873 x = gen_rtx_PLUS (Pmode, op0, op1);
3874 }
3875 return x;
3876 }
3877
3878 /* Like spu_legitimate_address, except with named address support. */
3879 static rtx
3880 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3881 addr_space_t as)
3882 {
3883 if (as != ADDR_SPACE_GENERIC)
3884 return x;
3885
3886 return spu_legitimize_address (x, oldx, mode);
3887 }
3888
3889 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3890 struct attribute_spec.handler. */
3891 static tree
3892 spu_handle_fndecl_attribute (tree * node,
3893 tree name,
3894 tree args ATTRIBUTE_UNUSED,
3895 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3896 {
3897 if (TREE_CODE (*node) != FUNCTION_DECL)
3898 {
3899 warning (0, "%qE attribute only applies to functions",
3900 name);
3901 *no_add_attrs = true;
3902 }
3903
3904 return NULL_TREE;
3905 }
3906
3907 /* Handle the "vector" attribute. */
3908 static tree
3909 spu_handle_vector_attribute (tree * node, tree name,
3910 tree args ATTRIBUTE_UNUSED,
3911 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3912 {
3913 tree type = *node, result = NULL_TREE;
3914 enum machine_mode mode;
3915 int unsigned_p;
3916
3917 while (POINTER_TYPE_P (type)
3918 || TREE_CODE (type) == FUNCTION_TYPE
3919 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3920 type = TREE_TYPE (type);
3921
3922 mode = TYPE_MODE (type);
3923
3924 unsigned_p = TYPE_UNSIGNED (type);
3925 switch (mode)
3926 {
3927 case DImode:
3928 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3929 break;
3930 case SImode:
3931 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3932 break;
3933 case HImode:
3934 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3935 break;
3936 case QImode:
3937 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3938 break;
3939 case SFmode:
3940 result = V4SF_type_node;
3941 break;
3942 case DFmode:
3943 result = V2DF_type_node;
3944 break;
3945 default:
3946 break;
3947 }
3948
3949 /* Propagate qualifiers attached to the element type
3950 onto the vector type. */
3951 if (result && result != type && TYPE_QUALS (type))
3952 result = build_qualified_type (result, TYPE_QUALS (type));
3953
3954 *no_add_attrs = true; /* No need to hang on to the attribute. */
3955
3956 if (!result)
3957 warning (0, "%qE attribute ignored", name);
3958 else
3959 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3960
3961 return NULL_TREE;
3962 }
3963
3964 /* Return nonzero if FUNC is a naked function. */
3965 static int
3966 spu_naked_function_p (tree func)
3967 {
3968 tree a;
3969
3970 if (TREE_CODE (func) != FUNCTION_DECL)
3971 abort ();
3972
3973 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3974 return a != NULL_TREE;
3975 }
3976
3977 int
3978 spu_initial_elimination_offset (int from, int to)
3979 {
3980 int saved_regs_size = spu_saved_regs_size ();
3981 int sp_offset = 0;
3982 if (!current_function_is_leaf || crtl->outgoing_args_size
3983 || get_frame_size () || saved_regs_size)
3984 sp_offset = STACK_POINTER_OFFSET;
3985 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3986 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3987 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3988 return get_frame_size ();
3989 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3990 return sp_offset + crtl->outgoing_args_size
3991 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3992 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3993 return get_frame_size () + saved_regs_size + sp_offset;
3994 else
3995 gcc_unreachable ();
3996 }
3997
3998 rtx
3999 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
4000 {
4001 enum machine_mode mode = TYPE_MODE (type);
4002 int byte_size = ((mode == BLKmode)
4003 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4004
4005 /* Make sure small structs are left justified in a register. */
4006 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4007 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4008 {
4009 enum machine_mode smode;
4010 rtvec v;
4011 int i;
4012 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4013 int n = byte_size / UNITS_PER_WORD;
4014 v = rtvec_alloc (nregs);
4015 for (i = 0; i < n; i++)
4016 {
4017 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4018 gen_rtx_REG (TImode,
4019 FIRST_RETURN_REGNUM
4020 + i),
4021 GEN_INT (UNITS_PER_WORD * i));
4022 byte_size -= UNITS_PER_WORD;
4023 }
4024
4025 if (n < nregs)
4026 {
4027 if (byte_size < 4)
4028 byte_size = 4;
4029 smode =
4030 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4031 RTVEC_ELT (v, n) =
4032 gen_rtx_EXPR_LIST (VOIDmode,
4033 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4034 GEN_INT (UNITS_PER_WORD * n));
4035 }
4036 return gen_rtx_PARALLEL (mode, v);
4037 }
4038 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4039 }
4040
4041 static rtx
4042 spu_function_arg (CUMULATIVE_ARGS *cum,
4043 enum machine_mode mode,
4044 const_tree type, bool named ATTRIBUTE_UNUSED)
4045 {
4046 int byte_size;
4047
4048 if (*cum >= MAX_REGISTER_ARGS)
4049 return 0;
4050
4051 byte_size = ((mode == BLKmode)
4052 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4053
4054 /* The ABI does not allow parameters to be passed partially in
4055 reg and partially in stack. */
4056 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4057 return 0;
4058
4059 /* Make sure small structs are left justified in a register. */
4060 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4061 && byte_size < UNITS_PER_WORD && byte_size > 0)
4062 {
4063 enum machine_mode smode;
4064 rtx gr_reg;
4065 if (byte_size < 4)
4066 byte_size = 4;
4067 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4068 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4069 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
4070 const0_rtx);
4071 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4072 }
4073 else
4074 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
4075 }
4076
4077 static void
4078 spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4079 const_tree type, bool named ATTRIBUTE_UNUSED)
4080 {
4081 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4082 ? 1
4083 : mode == BLKmode
4084 ? ((int_size_in_bytes (type) + 15) / 16)
4085 : mode == VOIDmode
4086 ? 1
4087 : HARD_REGNO_NREGS (cum, mode));
4088 }
4089
4090 /* Variable sized types are passed by reference. */
4091 static bool
4092 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4093 enum machine_mode mode ATTRIBUTE_UNUSED,
4094 const_tree type, bool named ATTRIBUTE_UNUSED)
4095 {
4096 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4097 }
4098 \f
4099
4100 /* Var args. */
4101
4102 /* Create and return the va_list datatype.
4103
4104 On SPU, va_list is an array type equivalent to
4105
4106 typedef struct __va_list_tag
4107 {
4108 void *__args __attribute__((__aligned(16)));
4109 void *__skip __attribute__((__aligned(16)));
4110
4111 } va_list[1];
4112
4113 where __args points to the arg that will be returned by the next
4114 va_arg(), and __skip points to the previous stack frame such that
4115 when __args == __skip we should advance __args by 32 bytes. */
4116 static tree
4117 spu_build_builtin_va_list (void)
4118 {
4119 tree f_args, f_skip, record, type_decl;
4120 bool owp;
4121
4122 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4123
4124 type_decl =
4125 build_decl (BUILTINS_LOCATION,
4126 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4127
4128 f_args = build_decl (BUILTINS_LOCATION,
4129 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4130 f_skip = build_decl (BUILTINS_LOCATION,
4131 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4132
4133 DECL_FIELD_CONTEXT (f_args) = record;
4134 DECL_ALIGN (f_args) = 128;
4135 DECL_USER_ALIGN (f_args) = 1;
4136
4137 DECL_FIELD_CONTEXT (f_skip) = record;
4138 DECL_ALIGN (f_skip) = 128;
4139 DECL_USER_ALIGN (f_skip) = 1;
4140
4141 TYPE_STUB_DECL (record) = type_decl;
4142 TYPE_NAME (record) = type_decl;
4143 TYPE_FIELDS (record) = f_args;
4144 DECL_CHAIN (f_args) = f_skip;
4145
4146 /* We know this is being padded and we want it too. It is an internal
4147 type so hide the warnings from the user. */
4148 owp = warn_padded;
4149 warn_padded = false;
4150
4151 layout_type (record);
4152
4153 warn_padded = owp;
4154
4155 /* The correct type is an array type of one element. */
4156 return build_array_type (record, build_index_type (size_zero_node));
4157 }
4158
4159 /* Implement va_start by filling the va_list structure VALIST.
4160 NEXTARG points to the first anonymous stack argument.
4161
4162 The following global variables are used to initialize
4163 the va_list structure:
4164
4165 crtl->args.info;
4166 the CUMULATIVE_ARGS for this function
4167
4168 crtl->args.arg_offset_rtx:
4169 holds the offset of the first anonymous stack argument
4170 (relative to the virtual arg pointer). */
4171
4172 static void
4173 spu_va_start (tree valist, rtx nextarg)
4174 {
4175 tree f_args, f_skip;
4176 tree args, skip, t;
4177
4178 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4179 f_skip = DECL_CHAIN (f_args);
4180
4181 valist = build_simple_mem_ref (valist);
4182 args =
4183 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4184 skip =
4185 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4186
4187 /* Find the __args area. */
4188 t = make_tree (TREE_TYPE (args), nextarg);
4189 if (crtl->args.pretend_args_size > 0)
4190 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4191 size_int (-STACK_POINTER_OFFSET));
4192 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4193 TREE_SIDE_EFFECTS (t) = 1;
4194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4195
4196 /* Find the __skip area. */
4197 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4198 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4199 size_int (crtl->args.pretend_args_size
4200 - STACK_POINTER_OFFSET));
4201 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4202 TREE_SIDE_EFFECTS (t) = 1;
4203 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4204 }
4205
4206 /* Gimplify va_arg by updating the va_list structure
4207 VALIST as required to retrieve an argument of type
4208 TYPE, and returning that argument.
4209
4210 ret = va_arg(VALIST, TYPE);
4211
4212 generates code equivalent to:
4213
4214 paddedsize = (sizeof(TYPE) + 15) & -16;
4215 if (VALIST.__args + paddedsize > VALIST.__skip
4216 && VALIST.__args <= VALIST.__skip)
4217 addr = VALIST.__skip + 32;
4218 else
4219 addr = VALIST.__args;
4220 VALIST.__args = addr + paddedsize;
4221 ret = *(TYPE *)addr;
4222 */
4223 static tree
4224 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4225 gimple_seq * post_p ATTRIBUTE_UNUSED)
4226 {
4227 tree f_args, f_skip;
4228 tree args, skip;
4229 HOST_WIDE_INT size, rsize;
4230 tree paddedsize, addr, tmp;
4231 bool pass_by_reference_p;
4232
4233 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4234 f_skip = DECL_CHAIN (f_args);
4235
4236 valist = build_simple_mem_ref (valist);
4237 args =
4238 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4239 skip =
4240 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4241
4242 addr = create_tmp_var (ptr_type_node, "va_arg");
4243
4244 /* if an object is dynamically sized, a pointer to it is passed
4245 instead of the object itself. */
4246 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4247 false);
4248 if (pass_by_reference_p)
4249 type = build_pointer_type (type);
4250 size = int_size_in_bytes (type);
4251 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4252
4253 /* build conditional expression to calculate addr. The expression
4254 will be gimplified later. */
4255 paddedsize = size_int (rsize);
4256 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4257 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4258 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4259 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4260 unshare_expr (skip)));
4261
4262 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4263 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4264 size_int (32)), unshare_expr (args));
4265
4266 gimplify_assign (addr, tmp, pre_p);
4267
4268 /* update VALIST.__args */
4269 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4270 gimplify_assign (unshare_expr (args), tmp, pre_p);
4271
4272 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4273 addr);
4274
4275 if (pass_by_reference_p)
4276 addr = build_va_arg_indirect_ref (addr);
4277
4278 return build_va_arg_indirect_ref (addr);
4279 }
4280
4281 /* Save parameter registers starting with the register that corresponds
4282 to the first unnamed parameters. If the first unnamed parameter is
4283 in the stack then save no registers. Set pretend_args_size to the
4284 amount of space needed to save the registers. */
4285 void
4286 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4287 tree type, int *pretend_size, int no_rtl)
4288 {
4289 if (!no_rtl)
4290 {
4291 rtx tmp;
4292 int regno;
4293 int offset;
4294 int ncum = *cum;
4295
4296 /* cum currently points to the last named argument, we want to
4297 start at the next argument. */
4298 spu_function_arg_advance (&ncum, mode, type, true);
4299
4300 offset = -STACK_POINTER_OFFSET;
4301 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4302 {
4303 tmp = gen_frame_mem (V4SImode,
4304 plus_constant (virtual_incoming_args_rtx,
4305 offset));
4306 emit_move_insn (tmp,
4307 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4308 offset += 16;
4309 }
4310 *pretend_size = offset + STACK_POINTER_OFFSET;
4311 }
4312 }
4313 \f
4314 static void
4315 spu_conditional_register_usage (void)
4316 {
4317 if (flag_pic)
4318 {
4319 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4320 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4321 }
4322 }
4323
4324 /* This is called any time we inspect the alignment of a register for
4325 addresses. */
4326 static int
4327 reg_aligned_for_addr (rtx x)
4328 {
4329 int regno =
4330 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4331 return REGNO_POINTER_ALIGN (regno) >= 128;
4332 }
4333
4334 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4335 into its SYMBOL_REF_FLAGS. */
4336 static void
4337 spu_encode_section_info (tree decl, rtx rtl, int first)
4338 {
4339 default_encode_section_info (decl, rtl, first);
4340
4341 /* If a variable has a forced alignment to < 16 bytes, mark it with
4342 SYMBOL_FLAG_ALIGN1. */
4343 if (TREE_CODE (decl) == VAR_DECL
4344 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4345 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4346 }
4347
4348 /* Return TRUE if we are certain the mem refers to a complete object
4349 which is both 16-byte aligned and padded to a 16-byte boundary. This
4350 would make it safe to store with a single instruction.
4351 We guarantee the alignment and padding for static objects by aligning
4352 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4353 FIXME: We currently cannot guarantee this for objects on the stack
4354 because assign_parm_setup_stack calls assign_stack_local with the
4355 alignment of the parameter mode and in that case the alignment never
4356 gets adjusted by LOCAL_ALIGNMENT. */
4357 static int
4358 store_with_one_insn_p (rtx mem)
4359 {
4360 enum machine_mode mode = GET_MODE (mem);
4361 rtx addr = XEXP (mem, 0);
4362 if (mode == BLKmode)
4363 return 0;
4364 if (GET_MODE_SIZE (mode) >= 16)
4365 return 1;
4366 /* Only static objects. */
4367 if (GET_CODE (addr) == SYMBOL_REF)
4368 {
4369 /* We use the associated declaration to make sure the access is
4370 referring to the whole object.
4371 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4372 if it is necessary. Will there be cases where one exists, and
4373 the other does not? Will there be cases where both exist, but
4374 have different types? */
4375 tree decl = MEM_EXPR (mem);
4376 if (decl
4377 && TREE_CODE (decl) == VAR_DECL
4378 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4379 return 1;
4380 decl = SYMBOL_REF_DECL (addr);
4381 if (decl
4382 && TREE_CODE (decl) == VAR_DECL
4383 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4384 return 1;
4385 }
4386 return 0;
4387 }
4388
4389 /* Return 1 when the address is not valid for a simple load and store as
4390 required by the '_mov*' patterns. We could make this less strict
4391 for loads, but we prefer mem's to look the same so they are more
4392 likely to be merged. */
4393 static int
4394 address_needs_split (rtx mem)
4395 {
4396 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4397 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4398 || !(store_with_one_insn_p (mem)
4399 || mem_is_padded_component_ref (mem))))
4400 return 1;
4401
4402 return 0;
4403 }
4404
4405 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4406 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4407 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4408
4409 /* MEM is known to be an __ea qualified memory access. Emit a call to
4410 fetch the ppu memory to local store, and return its address in local
4411 store. */
4412
4413 static void
4414 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4415 {
4416 if (is_store)
4417 {
4418 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4419 if (!cache_fetch_dirty)
4420 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4421 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4422 2, ea_addr, EAmode, ndirty, SImode);
4423 }
4424 else
4425 {
4426 if (!cache_fetch)
4427 cache_fetch = init_one_libfunc ("__cache_fetch");
4428 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4429 1, ea_addr, EAmode);
4430 }
4431 }
4432
4433 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4434 dirty bit marking, inline.
4435
4436 The cache control data structure is an array of
4437
4438 struct __cache_tag_array
4439 {
4440 unsigned int tag_lo[4];
4441 unsigned int tag_hi[4];
4442 void *data_pointer[4];
4443 int reserved[4];
4444 vector unsigned short dirty_bits[4];
4445 } */
4446
4447 static void
4448 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4449 {
4450 rtx ea_addr_si;
4451 HOST_WIDE_INT v;
4452 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4453 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4454 rtx index_mask = gen_reg_rtx (SImode);
4455 rtx tag_arr = gen_reg_rtx (Pmode);
4456 rtx splat_mask = gen_reg_rtx (TImode);
4457 rtx splat = gen_reg_rtx (V4SImode);
4458 rtx splat_hi = NULL_RTX;
4459 rtx tag_index = gen_reg_rtx (Pmode);
4460 rtx block_off = gen_reg_rtx (SImode);
4461 rtx tag_addr = gen_reg_rtx (Pmode);
4462 rtx tag = gen_reg_rtx (V4SImode);
4463 rtx cache_tag = gen_reg_rtx (V4SImode);
4464 rtx cache_tag_hi = NULL_RTX;
4465 rtx cache_ptrs = gen_reg_rtx (TImode);
4466 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4467 rtx tag_equal = gen_reg_rtx (V4SImode);
4468 rtx tag_equal_hi = NULL_RTX;
4469 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4470 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4471 rtx eq_index = gen_reg_rtx (SImode);
4472 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4473
4474 if (spu_ea_model != 32)
4475 {
4476 splat_hi = gen_reg_rtx (V4SImode);
4477 cache_tag_hi = gen_reg_rtx (V4SImode);
4478 tag_equal_hi = gen_reg_rtx (V4SImode);
4479 }
4480
4481 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4482 emit_move_insn (tag_arr, tag_arr_sym);
4483 v = 0x0001020300010203LL;
4484 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4485 ea_addr_si = ea_addr;
4486 if (spu_ea_model != 32)
4487 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4488
4489 /* tag_index = ea_addr & (tag_array_size - 128) */
4490 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4491
4492 /* splat ea_addr to all 4 slots. */
4493 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4494 /* Similarly for high 32 bits of ea_addr. */
4495 if (spu_ea_model != 32)
4496 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4497
4498 /* block_off = ea_addr & 127 */
4499 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4500
4501 /* tag_addr = tag_arr + tag_index */
4502 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4503
4504 /* Read cache tags. */
4505 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4506 if (spu_ea_model != 32)
4507 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4508 plus_constant (tag_addr, 16)));
4509
4510 /* tag = ea_addr & -128 */
4511 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4512
4513 /* Read all four cache data pointers. */
4514 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4515 plus_constant (tag_addr, 32)));
4516
4517 /* Compare tags. */
4518 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4519 if (spu_ea_model != 32)
4520 {
4521 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4522 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4523 }
4524
4525 /* At most one of the tags compare equal, so tag_equal has one
4526 32-bit slot set to all 1's, with the other slots all zero.
4527 gbb picks off low bit from each byte in the 128-bit registers,
4528 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4529 we have a hit. */
4530 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4531 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4532
4533 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4534 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4535
4536 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4537 (rotating eq_index mod 16 bytes). */
4538 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4539 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4540
4541 /* Add block offset to form final data address. */
4542 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4543
4544 /* Check that we did hit. */
4545 hit_label = gen_label_rtx ();
4546 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4547 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4548 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4549 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4550 hit_ref, pc_rtx)));
4551 /* Say that this branch is very likely to happen. */
4552 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4553 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4554
4555 ea_load_store (mem, is_store, ea_addr, data_addr);
4556 cont_label = gen_label_rtx ();
4557 emit_jump_insn (gen_jump (cont_label));
4558 emit_barrier ();
4559
4560 emit_label (hit_label);
4561
4562 if (is_store)
4563 {
4564 HOST_WIDE_INT v_hi;
4565 rtx dirty_bits = gen_reg_rtx (TImode);
4566 rtx dirty_off = gen_reg_rtx (SImode);
4567 rtx dirty_128 = gen_reg_rtx (TImode);
4568 rtx neg_block_off = gen_reg_rtx (SImode);
4569
4570 /* Set up mask with one dirty bit per byte of the mem we are
4571 writing, starting from top bit. */
4572 v_hi = v = -1;
4573 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4574 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4575 {
4576 v_hi = v;
4577 v = 0;
4578 }
4579 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4580
4581 /* Form index into cache dirty_bits. eq_index is one of
4582 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4583 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4584 offset to each of the four dirty_bits elements. */
4585 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4586
4587 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4588
4589 /* Rotate bit mask to proper bit. */
4590 emit_insn (gen_negsi2 (neg_block_off, block_off));
4591 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4592 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4593
4594 /* Or in the new dirty bits. */
4595 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4596
4597 /* Store. */
4598 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4599 }
4600
4601 emit_label (cont_label);
4602 }
4603
4604 static rtx
4605 expand_ea_mem (rtx mem, bool is_store)
4606 {
4607 rtx ea_addr;
4608 rtx data_addr = gen_reg_rtx (Pmode);
4609 rtx new_mem;
4610
4611 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4612 if (optimize_size || optimize == 0)
4613 ea_load_store (mem, is_store, ea_addr, data_addr);
4614 else
4615 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4616
4617 if (ea_alias_set == -1)
4618 ea_alias_set = new_alias_set ();
4619
4620 /* We generate a new MEM RTX to refer to the copy of the data
4621 in the cache. We do not copy memory attributes (except the
4622 alignment) from the original MEM, as they may no longer apply
4623 to the cache copy. */
4624 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4625 set_mem_alias_set (new_mem, ea_alias_set);
4626 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4627
4628 return new_mem;
4629 }
4630
4631 int
4632 spu_expand_mov (rtx * ops, enum machine_mode mode)
4633 {
4634 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4635 {
4636 /* Perform the move in the destination SUBREG's inner mode. */
4637 ops[0] = SUBREG_REG (ops[0]);
4638 mode = GET_MODE (ops[0]);
4639 ops[1] = gen_lowpart_common (mode, ops[1]);
4640 gcc_assert (ops[1]);
4641 }
4642
4643 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4644 {
4645 rtx from = SUBREG_REG (ops[1]);
4646 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4647
4648 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4649 && GET_MODE_CLASS (imode) == MODE_INT
4650 && subreg_lowpart_p (ops[1]));
4651
4652 if (GET_MODE_SIZE (imode) < 4)
4653 imode = SImode;
4654 if (imode != GET_MODE (from))
4655 from = gen_rtx_SUBREG (imode, from, 0);
4656
4657 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4658 {
4659 enum insn_code icode = convert_optab_handler (trunc_optab,
4660 mode, imode);
4661 emit_insn (GEN_FCN (icode) (ops[0], from));
4662 }
4663 else
4664 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4665 return 1;
4666 }
4667
4668 /* At least one of the operands needs to be a register. */
4669 if ((reload_in_progress | reload_completed) == 0
4670 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4671 {
4672 rtx temp = force_reg (mode, ops[1]);
4673 emit_move_insn (ops[0], temp);
4674 return 1;
4675 }
4676 if (reload_in_progress || reload_completed)
4677 {
4678 if (CONSTANT_P (ops[1]))
4679 return spu_split_immediate (ops);
4680 return 0;
4681 }
4682
4683 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4684 extend them. */
4685 if (GET_CODE (ops[1]) == CONST_INT)
4686 {
4687 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4688 if (val != INTVAL (ops[1]))
4689 {
4690 emit_move_insn (ops[0], GEN_INT (val));
4691 return 1;
4692 }
4693 }
4694 if (MEM_P (ops[0]))
4695 {
4696 if (MEM_ADDR_SPACE (ops[0]))
4697 ops[0] = expand_ea_mem (ops[0], true);
4698 return spu_split_store (ops);
4699 }
4700 if (MEM_P (ops[1]))
4701 {
4702 if (MEM_ADDR_SPACE (ops[1]))
4703 ops[1] = expand_ea_mem (ops[1], false);
4704 return spu_split_load (ops);
4705 }
4706
4707 return 0;
4708 }
4709
4710 static void
4711 spu_convert_move (rtx dst, rtx src)
4712 {
4713 enum machine_mode mode = GET_MODE (dst);
4714 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4715 rtx reg;
4716 gcc_assert (GET_MODE (src) == TImode);
4717 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4718 emit_insn (gen_rtx_SET (VOIDmode, reg,
4719 gen_rtx_TRUNCATE (int_mode,
4720 gen_rtx_LSHIFTRT (TImode, src,
4721 GEN_INT (int_mode == DImode ? 64 : 96)))));
4722 if (int_mode != mode)
4723 {
4724 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4725 emit_move_insn (dst, reg);
4726 }
4727 }
4728
4729 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4730 the address from SRC and SRC+16. Return a REG or CONST_INT that
4731 specifies how many bytes to rotate the loaded registers, plus any
4732 extra from EXTRA_ROTQBY. The address and rotate amounts are
4733 normalized to improve merging of loads and rotate computations. */
4734 static rtx
4735 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4736 {
4737 rtx addr = XEXP (src, 0);
4738 rtx p0, p1, rot, addr0, addr1;
4739 int rot_amt;
4740
4741 rot = 0;
4742 rot_amt = 0;
4743
4744 if (MEM_ALIGN (src) >= 128)
4745 /* Address is already aligned; simply perform a TImode load. */ ;
4746 else if (GET_CODE (addr) == PLUS)
4747 {
4748 /* 8 cases:
4749 aligned reg + aligned reg => lqx
4750 aligned reg + unaligned reg => lqx, rotqby
4751 aligned reg + aligned const => lqd
4752 aligned reg + unaligned const => lqd, rotqbyi
4753 unaligned reg + aligned reg => lqx, rotqby
4754 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4755 unaligned reg + aligned const => lqd, rotqby
4756 unaligned reg + unaligned const -> not allowed by legitimate address
4757 */
4758 p0 = XEXP (addr, 0);
4759 p1 = XEXP (addr, 1);
4760 if (!reg_aligned_for_addr (p0))
4761 {
4762 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4763 {
4764 rot = gen_reg_rtx (SImode);
4765 emit_insn (gen_addsi3 (rot, p0, p1));
4766 }
4767 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4768 {
4769 if (INTVAL (p1) > 0
4770 && REG_POINTER (p0)
4771 && INTVAL (p1) * BITS_PER_UNIT
4772 < REGNO_POINTER_ALIGN (REGNO (p0)))
4773 {
4774 rot = gen_reg_rtx (SImode);
4775 emit_insn (gen_addsi3 (rot, p0, p1));
4776 addr = p0;
4777 }
4778 else
4779 {
4780 rtx x = gen_reg_rtx (SImode);
4781 emit_move_insn (x, p1);
4782 if (!spu_arith_operand (p1, SImode))
4783 p1 = x;
4784 rot = gen_reg_rtx (SImode);
4785 emit_insn (gen_addsi3 (rot, p0, p1));
4786 addr = gen_rtx_PLUS (Pmode, p0, x);
4787 }
4788 }
4789 else
4790 rot = p0;
4791 }
4792 else
4793 {
4794 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4795 {
4796 rot_amt = INTVAL (p1) & 15;
4797 if (INTVAL (p1) & -16)
4798 {
4799 p1 = GEN_INT (INTVAL (p1) & -16);
4800 addr = gen_rtx_PLUS (SImode, p0, p1);
4801 }
4802 else
4803 addr = p0;
4804 }
4805 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4806 rot = p1;
4807 }
4808 }
4809 else if (REG_P (addr))
4810 {
4811 if (!reg_aligned_for_addr (addr))
4812 rot = addr;
4813 }
4814 else if (GET_CODE (addr) == CONST)
4815 {
4816 if (GET_CODE (XEXP (addr, 0)) == PLUS
4817 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4818 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4819 {
4820 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4821 if (rot_amt & -16)
4822 addr = gen_rtx_CONST (Pmode,
4823 gen_rtx_PLUS (Pmode,
4824 XEXP (XEXP (addr, 0), 0),
4825 GEN_INT (rot_amt & -16)));
4826 else
4827 addr = XEXP (XEXP (addr, 0), 0);
4828 }
4829 else
4830 {
4831 rot = gen_reg_rtx (Pmode);
4832 emit_move_insn (rot, addr);
4833 }
4834 }
4835 else if (GET_CODE (addr) == CONST_INT)
4836 {
4837 rot_amt = INTVAL (addr);
4838 addr = GEN_INT (rot_amt & -16);
4839 }
4840 else if (!ALIGNED_SYMBOL_REF_P (addr))
4841 {
4842 rot = gen_reg_rtx (Pmode);
4843 emit_move_insn (rot, addr);
4844 }
4845
4846 rot_amt += extra_rotby;
4847
4848 rot_amt &= 15;
4849
4850 if (rot && rot_amt)
4851 {
4852 rtx x = gen_reg_rtx (SImode);
4853 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4854 rot = x;
4855 rot_amt = 0;
4856 }
4857 if (!rot && rot_amt)
4858 rot = GEN_INT (rot_amt);
4859
4860 addr0 = copy_rtx (addr);
4861 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4862 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4863
4864 if (dst1)
4865 {
4866 addr1 = plus_constant (copy_rtx (addr), 16);
4867 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4868 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4869 }
4870
4871 return rot;
4872 }
4873
4874 int
4875 spu_split_load (rtx * ops)
4876 {
4877 enum machine_mode mode = GET_MODE (ops[0]);
4878 rtx addr, load, rot;
4879 int rot_amt;
4880
4881 if (GET_MODE_SIZE (mode) >= 16)
4882 return 0;
4883
4884 addr = XEXP (ops[1], 0);
4885 gcc_assert (GET_CODE (addr) != AND);
4886
4887 if (!address_needs_split (ops[1]))
4888 {
4889 ops[1] = change_address (ops[1], TImode, addr);
4890 load = gen_reg_rtx (TImode);
4891 emit_insn (gen__movti (load, ops[1]));
4892 spu_convert_move (ops[0], load);
4893 return 1;
4894 }
4895
4896 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4897
4898 load = gen_reg_rtx (TImode);
4899 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4900
4901 if (rot)
4902 emit_insn (gen_rotqby_ti (load, load, rot));
4903
4904 spu_convert_move (ops[0], load);
4905 return 1;
4906 }
4907
4908 int
4909 spu_split_store (rtx * ops)
4910 {
4911 enum machine_mode mode = GET_MODE (ops[0]);
4912 rtx reg;
4913 rtx addr, p0, p1, p1_lo, smem;
4914 int aform;
4915 int scalar;
4916
4917 if (GET_MODE_SIZE (mode) >= 16)
4918 return 0;
4919
4920 addr = XEXP (ops[0], 0);
4921 gcc_assert (GET_CODE (addr) != AND);
4922
4923 if (!address_needs_split (ops[0]))
4924 {
4925 reg = gen_reg_rtx (TImode);
4926 emit_insn (gen_spu_convert (reg, ops[1]));
4927 ops[0] = change_address (ops[0], TImode, addr);
4928 emit_move_insn (ops[0], reg);
4929 return 1;
4930 }
4931
4932 if (GET_CODE (addr) == PLUS)
4933 {
4934 /* 8 cases:
4935 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4936 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4937 aligned reg + aligned const => lqd, c?d, shuf, stqx
4938 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4939 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4940 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4941 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4942 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4943 */
4944 aform = 0;
4945 p0 = XEXP (addr, 0);
4946 p1 = p1_lo = XEXP (addr, 1);
4947 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4948 {
4949 p1_lo = GEN_INT (INTVAL (p1) & 15);
4950 if (reg_aligned_for_addr (p0))
4951 {
4952 p1 = GEN_INT (INTVAL (p1) & -16);
4953 if (p1 == const0_rtx)
4954 addr = p0;
4955 else
4956 addr = gen_rtx_PLUS (SImode, p0, p1);
4957 }
4958 else
4959 {
4960 rtx x = gen_reg_rtx (SImode);
4961 emit_move_insn (x, p1);
4962 addr = gen_rtx_PLUS (SImode, p0, x);
4963 }
4964 }
4965 }
4966 else if (REG_P (addr))
4967 {
4968 aform = 0;
4969 p0 = addr;
4970 p1 = p1_lo = const0_rtx;
4971 }
4972 else
4973 {
4974 aform = 1;
4975 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4976 p1 = 0; /* aform doesn't use p1 */
4977 p1_lo = addr;
4978 if (ALIGNED_SYMBOL_REF_P (addr))
4979 p1_lo = const0_rtx;
4980 else if (GET_CODE (addr) == CONST
4981 && GET_CODE (XEXP (addr, 0)) == PLUS
4982 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4983 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4984 {
4985 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4986 if ((v & -16) != 0)
4987 addr = gen_rtx_CONST (Pmode,
4988 gen_rtx_PLUS (Pmode,
4989 XEXP (XEXP (addr, 0), 0),
4990 GEN_INT (v & -16)));
4991 else
4992 addr = XEXP (XEXP (addr, 0), 0);
4993 p1_lo = GEN_INT (v & 15);
4994 }
4995 else if (GET_CODE (addr) == CONST_INT)
4996 {
4997 p1_lo = GEN_INT (INTVAL (addr) & 15);
4998 addr = GEN_INT (INTVAL (addr) & -16);
4999 }
5000 else
5001 {
5002 p1_lo = gen_reg_rtx (SImode);
5003 emit_move_insn (p1_lo, addr);
5004 }
5005 }
5006
5007 gcc_assert (aform == 0 || aform == 1);
5008 reg = gen_reg_rtx (TImode);
5009
5010 scalar = store_with_one_insn_p (ops[0]);
5011 if (!scalar)
5012 {
5013 /* We could copy the flags from the ops[0] MEM to mem here,
5014 We don't because we want this load to be optimized away if
5015 possible, and copying the flags will prevent that in certain
5016 cases, e.g. consider the volatile flag. */
5017
5018 rtx pat = gen_reg_rtx (TImode);
5019 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5020 set_mem_alias_set (lmem, 0);
5021 emit_insn (gen_movti (reg, lmem));
5022
5023 if (!p0 || reg_aligned_for_addr (p0))
5024 p0 = stack_pointer_rtx;
5025 if (!p1_lo)
5026 p1_lo = const0_rtx;
5027
5028 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5029 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5030 }
5031 else
5032 {
5033 if (GET_CODE (ops[1]) == REG)
5034 emit_insn (gen_spu_convert (reg, ops[1]));
5035 else if (GET_CODE (ops[1]) == SUBREG)
5036 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5037 else
5038 abort ();
5039 }
5040
5041 if (GET_MODE_SIZE (mode) < 4 && scalar)
5042 emit_insn (gen_ashlti3
5043 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5044
5045 smem = change_address (ops[0], TImode, copy_rtx (addr));
5046 /* We can't use the previous alias set because the memory has changed
5047 size and can potentially overlap objects of other types. */
5048 set_mem_alias_set (smem, 0);
5049
5050 emit_insn (gen_movti (smem, reg));
5051 return 1;
5052 }
5053
5054 /* Return TRUE if X is MEM which is a struct member reference
5055 and the member can safely be loaded and stored with a single
5056 instruction because it is padded. */
5057 static int
5058 mem_is_padded_component_ref (rtx x)
5059 {
5060 tree t = MEM_EXPR (x);
5061 tree r;
5062 if (!t || TREE_CODE (t) != COMPONENT_REF)
5063 return 0;
5064 t = TREE_OPERAND (t, 1);
5065 if (!t || TREE_CODE (t) != FIELD_DECL
5066 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5067 return 0;
5068 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5069 r = DECL_FIELD_CONTEXT (t);
5070 if (!r || TREE_CODE (r) != RECORD_TYPE)
5071 return 0;
5072 /* Make sure they are the same mode */
5073 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5074 return 0;
5075 /* If there are no following fields then the field alignment assures
5076 the structure is padded to the alignment which means this field is
5077 padded too. */
5078 if (TREE_CHAIN (t) == 0)
5079 return 1;
5080 /* If the following field is also aligned then this field will be
5081 padded. */
5082 t = TREE_CHAIN (t);
5083 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5084 return 1;
5085 return 0;
5086 }
5087
5088 /* Parse the -mfixed-range= option string. */
5089 static void
5090 fix_range (const char *const_str)
5091 {
5092 int i, first, last;
5093 char *str, *dash, *comma;
5094
5095 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5096 REG2 are either register names or register numbers. The effect
5097 of this option is to mark the registers in the range from REG1 to
5098 REG2 as ``fixed'' so they won't be used by the compiler. */
5099
5100 i = strlen (const_str);
5101 str = (char *) alloca (i + 1);
5102 memcpy (str, const_str, i + 1);
5103
5104 while (1)
5105 {
5106 dash = strchr (str, '-');
5107 if (!dash)
5108 {
5109 warning (0, "value of -mfixed-range must have form REG1-REG2");
5110 return;
5111 }
5112 *dash = '\0';
5113 comma = strchr (dash + 1, ',');
5114 if (comma)
5115 *comma = '\0';
5116
5117 first = decode_reg_name (str);
5118 if (first < 0)
5119 {
5120 warning (0, "unknown register name: %s", str);
5121 return;
5122 }
5123
5124 last = decode_reg_name (dash + 1);
5125 if (last < 0)
5126 {
5127 warning (0, "unknown register name: %s", dash + 1);
5128 return;
5129 }
5130
5131 *dash = '-';
5132
5133 if (first > last)
5134 {
5135 warning (0, "%s-%s is an empty range", str, dash + 1);
5136 return;
5137 }
5138
5139 for (i = first; i <= last; ++i)
5140 fixed_regs[i] = call_used_regs[i] = 1;
5141
5142 if (!comma)
5143 break;
5144
5145 *comma = ',';
5146 str = comma + 1;
5147 }
5148 }
5149
5150 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5151 can be generated using the fsmbi instruction. */
5152 int
5153 fsmbi_const_p (rtx x)
5154 {
5155 if (CONSTANT_P (x))
5156 {
5157 /* We can always choose TImode for CONST_INT because the high bits
5158 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5159 enum immediate_class c = classify_immediate (x, TImode);
5160 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5161 }
5162 return 0;
5163 }
5164
5165 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5166 can be generated using the cbd, chd, cwd or cdd instruction. */
5167 int
5168 cpat_const_p (rtx x, enum machine_mode mode)
5169 {
5170 if (CONSTANT_P (x))
5171 {
5172 enum immediate_class c = classify_immediate (x, mode);
5173 return c == IC_CPAT;
5174 }
5175 return 0;
5176 }
5177
5178 rtx
5179 gen_cpat_const (rtx * ops)
5180 {
5181 unsigned char dst[16];
5182 int i, offset, shift, isize;
5183 if (GET_CODE (ops[3]) != CONST_INT
5184 || GET_CODE (ops[2]) != CONST_INT
5185 || (GET_CODE (ops[1]) != CONST_INT
5186 && GET_CODE (ops[1]) != REG))
5187 return 0;
5188 if (GET_CODE (ops[1]) == REG
5189 && (!REG_POINTER (ops[1])
5190 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5191 return 0;
5192
5193 for (i = 0; i < 16; i++)
5194 dst[i] = i + 16;
5195 isize = INTVAL (ops[3]);
5196 if (isize == 1)
5197 shift = 3;
5198 else if (isize == 2)
5199 shift = 2;
5200 else
5201 shift = 0;
5202 offset = (INTVAL (ops[2]) +
5203 (GET_CODE (ops[1]) ==
5204 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5205 for (i = 0; i < isize; i++)
5206 dst[offset + i] = i + shift;
5207 return array_to_constant (TImode, dst);
5208 }
5209
5210 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5211 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5212 than 16 bytes, the value is repeated across the rest of the array. */
5213 void
5214 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5215 {
5216 HOST_WIDE_INT val;
5217 int i, j, first;
5218
5219 memset (arr, 0, 16);
5220 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5221 if (GET_CODE (x) == CONST_INT
5222 || (GET_CODE (x) == CONST_DOUBLE
5223 && (mode == SFmode || mode == DFmode)))
5224 {
5225 gcc_assert (mode != VOIDmode && mode != BLKmode);
5226
5227 if (GET_CODE (x) == CONST_DOUBLE)
5228 val = const_double_to_hwint (x);
5229 else
5230 val = INTVAL (x);
5231 first = GET_MODE_SIZE (mode) - 1;
5232 for (i = first; i >= 0; i--)
5233 {
5234 arr[i] = val & 0xff;
5235 val >>= 8;
5236 }
5237 /* Splat the constant across the whole array. */
5238 for (j = 0, i = first + 1; i < 16; i++)
5239 {
5240 arr[i] = arr[j];
5241 j = (j == first) ? 0 : j + 1;
5242 }
5243 }
5244 else if (GET_CODE (x) == CONST_DOUBLE)
5245 {
5246 val = CONST_DOUBLE_LOW (x);
5247 for (i = 15; i >= 8; i--)
5248 {
5249 arr[i] = val & 0xff;
5250 val >>= 8;
5251 }
5252 val = CONST_DOUBLE_HIGH (x);
5253 for (i = 7; i >= 0; i--)
5254 {
5255 arr[i] = val & 0xff;
5256 val >>= 8;
5257 }
5258 }
5259 else if (GET_CODE (x) == CONST_VECTOR)
5260 {
5261 int units;
5262 rtx elt;
5263 mode = GET_MODE_INNER (mode);
5264 units = CONST_VECTOR_NUNITS (x);
5265 for (i = 0; i < units; i++)
5266 {
5267 elt = CONST_VECTOR_ELT (x, i);
5268 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5269 {
5270 if (GET_CODE (elt) == CONST_DOUBLE)
5271 val = const_double_to_hwint (elt);
5272 else
5273 val = INTVAL (elt);
5274 first = GET_MODE_SIZE (mode) - 1;
5275 if (first + i * GET_MODE_SIZE (mode) > 16)
5276 abort ();
5277 for (j = first; j >= 0; j--)
5278 {
5279 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5280 val >>= 8;
5281 }
5282 }
5283 }
5284 }
5285 else
5286 gcc_unreachable();
5287 }
5288
5289 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5290 smaller than 16 bytes, use the bytes that would represent that value
5291 in a register, e.g., for QImode return the value of arr[3]. */
5292 rtx
5293 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5294 {
5295 enum machine_mode inner_mode;
5296 rtvec v;
5297 int units, size, i, j, k;
5298 HOST_WIDE_INT val;
5299
5300 if (GET_MODE_CLASS (mode) == MODE_INT
5301 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5302 {
5303 j = GET_MODE_SIZE (mode);
5304 i = j < 4 ? 4 - j : 0;
5305 for (val = 0; i < j; i++)
5306 val = (val << 8) | arr[i];
5307 val = trunc_int_for_mode (val, mode);
5308 return GEN_INT (val);
5309 }
5310
5311 if (mode == TImode)
5312 {
5313 HOST_WIDE_INT high;
5314 for (i = high = 0; i < 8; i++)
5315 high = (high << 8) | arr[i];
5316 for (i = 8, val = 0; i < 16; i++)
5317 val = (val << 8) | arr[i];
5318 return immed_double_const (val, high, TImode);
5319 }
5320 if (mode == SFmode)
5321 {
5322 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5323 val = trunc_int_for_mode (val, SImode);
5324 return hwint_to_const_double (SFmode, val);
5325 }
5326 if (mode == DFmode)
5327 {
5328 for (i = 0, val = 0; i < 8; i++)
5329 val = (val << 8) | arr[i];
5330 return hwint_to_const_double (DFmode, val);
5331 }
5332
5333 if (!VECTOR_MODE_P (mode))
5334 abort ();
5335
5336 units = GET_MODE_NUNITS (mode);
5337 size = GET_MODE_UNIT_SIZE (mode);
5338 inner_mode = GET_MODE_INNER (mode);
5339 v = rtvec_alloc (units);
5340
5341 for (k = i = 0; i < units; ++i)
5342 {
5343 val = 0;
5344 for (j = 0; j < size; j++, k++)
5345 val = (val << 8) | arr[k];
5346
5347 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5348 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5349 else
5350 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5351 }
5352 if (k > 16)
5353 abort ();
5354
5355 return gen_rtx_CONST_VECTOR (mode, v);
5356 }
5357
5358 static void
5359 reloc_diagnostic (rtx x)
5360 {
5361 tree decl = 0;
5362 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5363 return;
5364
5365 if (GET_CODE (x) == SYMBOL_REF)
5366 decl = SYMBOL_REF_DECL (x);
5367 else if (GET_CODE (x) == CONST
5368 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5369 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5370
5371 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5372 if (decl && !DECL_P (decl))
5373 decl = 0;
5374
5375 /* The decl could be a string constant. */
5376 if (decl && DECL_P (decl))
5377 {
5378 location_t loc;
5379 /* We use last_assemble_variable_decl to get line information. It's
5380 not always going to be right and might not even be close, but will
5381 be right for the more common cases. */
5382 if (!last_assemble_variable_decl || in_section == ctors_section)
5383 loc = DECL_SOURCE_LOCATION (decl);
5384 else
5385 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5386
5387 if (TARGET_WARN_RELOC)
5388 warning_at (loc, 0,
5389 "creating run-time relocation for %qD", decl);
5390 else
5391 error_at (loc,
5392 "creating run-time relocation for %qD", decl);
5393 }
5394 else
5395 {
5396 if (TARGET_WARN_RELOC)
5397 warning_at (input_location, 0, "creating run-time relocation");
5398 else
5399 error_at (input_location, "creating run-time relocation");
5400 }
5401 }
5402
5403 /* Hook into assemble_integer so we can generate an error for run-time
5404 relocations. The SPU ABI disallows them. */
5405 static bool
5406 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5407 {
5408 /* By default run-time relocations aren't supported, but we allow them
5409 in case users support it in their own run-time loader. And we provide
5410 a warning for those users that don't. */
5411 if ((GET_CODE (x) == SYMBOL_REF)
5412 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5413 reloc_diagnostic (x);
5414
5415 return default_assemble_integer (x, size, aligned_p);
5416 }
5417
5418 static void
5419 spu_asm_globalize_label (FILE * file, const char *name)
5420 {
5421 fputs ("\t.global\t", file);
5422 assemble_name (file, name);
5423 fputs ("\n", file);
5424 }
5425
5426 static bool
5427 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5428 bool speed ATTRIBUTE_UNUSED)
5429 {
5430 enum machine_mode mode = GET_MODE (x);
5431 int cost = COSTS_N_INSNS (2);
5432
5433 /* Folding to a CONST_VECTOR will use extra space but there might
5434 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5435 only if it allows us to fold away multiple insns. Changing the cost
5436 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5437 because this cost will only be compared against a single insn.
5438 if (code == CONST_VECTOR)
5439 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5440 */
5441
5442 /* Use defaults for float operations. Not accurate but good enough. */
5443 if (mode == DFmode)
5444 {
5445 *total = COSTS_N_INSNS (13);
5446 return true;
5447 }
5448 if (mode == SFmode)
5449 {
5450 *total = COSTS_N_INSNS (6);
5451 return true;
5452 }
5453 switch (code)
5454 {
5455 case CONST_INT:
5456 if (satisfies_constraint_K (x))
5457 *total = 0;
5458 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5459 *total = COSTS_N_INSNS (1);
5460 else
5461 *total = COSTS_N_INSNS (3);
5462 return true;
5463
5464 case CONST:
5465 *total = COSTS_N_INSNS (3);
5466 return true;
5467
5468 case LABEL_REF:
5469 case SYMBOL_REF:
5470 *total = COSTS_N_INSNS (0);
5471 return true;
5472
5473 case CONST_DOUBLE:
5474 *total = COSTS_N_INSNS (5);
5475 return true;
5476
5477 case FLOAT_EXTEND:
5478 case FLOAT_TRUNCATE:
5479 case FLOAT:
5480 case UNSIGNED_FLOAT:
5481 case FIX:
5482 case UNSIGNED_FIX:
5483 *total = COSTS_N_INSNS (7);
5484 return true;
5485
5486 case PLUS:
5487 if (mode == TImode)
5488 {
5489 *total = COSTS_N_INSNS (9);
5490 return true;
5491 }
5492 break;
5493
5494 case MULT:
5495 cost =
5496 GET_CODE (XEXP (x, 0)) ==
5497 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5498 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5499 {
5500 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5501 {
5502 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5503 cost = COSTS_N_INSNS (14);
5504 if ((val & 0xffff) == 0)
5505 cost = COSTS_N_INSNS (9);
5506 else if (val > 0 && val < 0x10000)
5507 cost = COSTS_N_INSNS (11);
5508 }
5509 }
5510 *total = cost;
5511 return true;
5512 case DIV:
5513 case UDIV:
5514 case MOD:
5515 case UMOD:
5516 *total = COSTS_N_INSNS (20);
5517 return true;
5518 case ROTATE:
5519 case ROTATERT:
5520 case ASHIFT:
5521 case ASHIFTRT:
5522 case LSHIFTRT:
5523 *total = COSTS_N_INSNS (4);
5524 return true;
5525 case UNSPEC:
5526 if (XINT (x, 1) == UNSPEC_CONVERT)
5527 *total = COSTS_N_INSNS (0);
5528 else
5529 *total = COSTS_N_INSNS (4);
5530 return true;
5531 }
5532 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5533 if (GET_MODE_CLASS (mode) == MODE_INT
5534 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5535 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5536 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5537 *total = cost;
5538 return true;
5539 }
5540
5541 static enum machine_mode
5542 spu_unwind_word_mode (void)
5543 {
5544 return SImode;
5545 }
5546
5547 /* Decide whether we can make a sibling call to a function. DECL is the
5548 declaration of the function being targeted by the call and EXP is the
5549 CALL_EXPR representing the call. */
5550 static bool
5551 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5552 {
5553 return decl && !TARGET_LARGE_MEM;
5554 }
5555
5556 /* We need to correctly update the back chain pointer and the Available
5557 Stack Size (which is in the second slot of the sp register.) */
5558 void
5559 spu_allocate_stack (rtx op0, rtx op1)
5560 {
5561 HOST_WIDE_INT v;
5562 rtx chain = gen_reg_rtx (V4SImode);
5563 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5564 rtx sp = gen_reg_rtx (V4SImode);
5565 rtx splatted = gen_reg_rtx (V4SImode);
5566 rtx pat = gen_reg_rtx (TImode);
5567
5568 /* copy the back chain so we can save it back again. */
5569 emit_move_insn (chain, stack_bot);
5570
5571 op1 = force_reg (SImode, op1);
5572
5573 v = 0x1020300010203ll;
5574 emit_move_insn (pat, immed_double_const (v, v, TImode));
5575 emit_insn (gen_shufb (splatted, op1, op1, pat));
5576
5577 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5578 emit_insn (gen_subv4si3 (sp, sp, splatted));
5579
5580 if (flag_stack_check)
5581 {
5582 rtx avail = gen_reg_rtx(SImode);
5583 rtx result = gen_reg_rtx(SImode);
5584 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5585 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5586 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5587 }
5588
5589 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5590
5591 emit_move_insn (stack_bot, chain);
5592
5593 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5594 }
5595
5596 void
5597 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5598 {
5599 static unsigned char arr[16] =
5600 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5601 rtx temp = gen_reg_rtx (SImode);
5602 rtx temp2 = gen_reg_rtx (SImode);
5603 rtx temp3 = gen_reg_rtx (V4SImode);
5604 rtx temp4 = gen_reg_rtx (V4SImode);
5605 rtx pat = gen_reg_rtx (TImode);
5606 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5607
5608 /* Restore the backchain from the first word, sp from the second. */
5609 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5610 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5611
5612 emit_move_insn (pat, array_to_constant (TImode, arr));
5613
5614 /* Compute Available Stack Size for sp */
5615 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5616 emit_insn (gen_shufb (temp3, temp, temp, pat));
5617
5618 /* Compute Available Stack Size for back chain */
5619 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5620 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5621 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5622
5623 emit_insn (gen_addv4si3 (sp, sp, temp3));
5624 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5625 }
5626
5627 static void
5628 spu_init_libfuncs (void)
5629 {
5630 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5631 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5632 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5633 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5634 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5635 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5636 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5637 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5638 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5639 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5640 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5641
5642 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5643 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5644
5645 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5646 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5647 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5648 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5649 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5650 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5651 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5652 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5653 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5654 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5655 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5656 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5657
5658 set_optab_libfunc (smul_optab, TImode, "__multi3");
5659 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5660 set_optab_libfunc (smod_optab, TImode, "__modti3");
5661 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5662 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5663 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5664 }
5665
5666 /* Make a subreg, stripping any existing subreg. We could possibly just
5667 call simplify_subreg, but in this case we know what we want. */
5668 rtx
5669 spu_gen_subreg (enum machine_mode mode, rtx x)
5670 {
5671 if (GET_CODE (x) == SUBREG)
5672 x = SUBREG_REG (x);
5673 if (GET_MODE (x) == mode)
5674 return x;
5675 return gen_rtx_SUBREG (mode, x, 0);
5676 }
5677
5678 static bool
5679 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5680 {
5681 return (TYPE_MODE (type) == BLKmode
5682 && ((type) == 0
5683 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5684 || int_size_in_bytes (type) >
5685 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5686 }
5687 \f
5688 /* Create the built-in types and functions */
5689
5690 enum spu_function_code
5691 {
5692 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5693 #include "spu-builtins.def"
5694 #undef DEF_BUILTIN
5695 NUM_SPU_BUILTINS
5696 };
5697
5698 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5699
5700 struct spu_builtin_description spu_builtins[] = {
5701 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5702 {fcode, icode, name, type, params},
5703 #include "spu-builtins.def"
5704 #undef DEF_BUILTIN
5705 };
5706
5707 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5708
5709 /* Returns the spu builtin decl for CODE. */
5710
5711 static tree
5712 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5713 {
5714 if (code >= NUM_SPU_BUILTINS)
5715 return error_mark_node;
5716
5717 return spu_builtin_decls[code];
5718 }
5719
5720
5721 static void
5722 spu_init_builtins (void)
5723 {
5724 struct spu_builtin_description *d;
5725 unsigned int i;
5726
5727 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5728 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5729 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5730 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5731 V4SF_type_node = build_vector_type (float_type_node, 4);
5732 V2DF_type_node = build_vector_type (double_type_node, 2);
5733
5734 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5735 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5736 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5737 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5738
5739 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5740
5741 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5742 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5743 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5744 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5745 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5746 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5747 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5748 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5749 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5750 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5751 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5752 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5753
5754 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5755 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5756 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5757 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5758 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5759 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5760 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5761 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5762
5763 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5764 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5765
5766 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5767
5768 spu_builtin_types[SPU_BTI_PTR] =
5769 build_pointer_type (build_qualified_type
5770 (void_type_node,
5771 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5772
5773 /* For each builtin we build a new prototype. The tree code will make
5774 sure nodes are shared. */
5775 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5776 {
5777 tree p;
5778 char name[64]; /* build_function will make a copy. */
5779 int parm;
5780
5781 if (d->name == 0)
5782 continue;
5783
5784 /* Find last parm. */
5785 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5786 ;
5787
5788 p = void_list_node;
5789 while (parm > 1)
5790 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5791
5792 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5793
5794 sprintf (name, "__builtin_%s", d->name);
5795 spu_builtin_decls[i] =
5796 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5797 if (d->fcode == SPU_MASK_FOR_LOAD)
5798 TREE_READONLY (spu_builtin_decls[i]) = 1;
5799
5800 /* These builtins don't throw. */
5801 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5802 }
5803 }
5804
5805 void
5806 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5807 {
5808 static unsigned char arr[16] =
5809 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5810
5811 rtx temp = gen_reg_rtx (Pmode);
5812 rtx temp2 = gen_reg_rtx (V4SImode);
5813 rtx temp3 = gen_reg_rtx (V4SImode);
5814 rtx pat = gen_reg_rtx (TImode);
5815 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5816
5817 emit_move_insn (pat, array_to_constant (TImode, arr));
5818
5819 /* Restore the sp. */
5820 emit_move_insn (temp, op1);
5821 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5822
5823 /* Compute available stack size for sp. */
5824 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5825 emit_insn (gen_shufb (temp3, temp, temp, pat));
5826
5827 emit_insn (gen_addv4si3 (sp, sp, temp3));
5828 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5829 }
5830
5831 int
5832 spu_safe_dma (HOST_WIDE_INT channel)
5833 {
5834 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5835 }
5836
5837 void
5838 spu_builtin_splats (rtx ops[])
5839 {
5840 enum machine_mode mode = GET_MODE (ops[0]);
5841 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5842 {
5843 unsigned char arr[16];
5844 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5845 emit_move_insn (ops[0], array_to_constant (mode, arr));
5846 }
5847 else
5848 {
5849 rtx reg = gen_reg_rtx (TImode);
5850 rtx shuf;
5851 if (GET_CODE (ops[1]) != REG
5852 && GET_CODE (ops[1]) != SUBREG)
5853 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5854 switch (mode)
5855 {
5856 case V2DImode:
5857 case V2DFmode:
5858 shuf =
5859 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5860 TImode);
5861 break;
5862 case V4SImode:
5863 case V4SFmode:
5864 shuf =
5865 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5866 TImode);
5867 break;
5868 case V8HImode:
5869 shuf =
5870 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5871 TImode);
5872 break;
5873 case V16QImode:
5874 shuf =
5875 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5876 TImode);
5877 break;
5878 default:
5879 abort ();
5880 }
5881 emit_move_insn (reg, shuf);
5882 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5883 }
5884 }
5885
5886 void
5887 spu_builtin_extract (rtx ops[])
5888 {
5889 enum machine_mode mode;
5890 rtx rot, from, tmp;
5891
5892 mode = GET_MODE (ops[1]);
5893
5894 if (GET_CODE (ops[2]) == CONST_INT)
5895 {
5896 switch (mode)
5897 {
5898 case V16QImode:
5899 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5900 break;
5901 case V8HImode:
5902 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5903 break;
5904 case V4SFmode:
5905 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5906 break;
5907 case V4SImode:
5908 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5909 break;
5910 case V2DImode:
5911 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5912 break;
5913 case V2DFmode:
5914 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5915 break;
5916 default:
5917 abort ();
5918 }
5919 return;
5920 }
5921
5922 from = spu_gen_subreg (TImode, ops[1]);
5923 rot = gen_reg_rtx (TImode);
5924 tmp = gen_reg_rtx (SImode);
5925
5926 switch (mode)
5927 {
5928 case V16QImode:
5929 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5930 break;
5931 case V8HImode:
5932 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5933 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5934 break;
5935 case V4SFmode:
5936 case V4SImode:
5937 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5938 break;
5939 case V2DImode:
5940 case V2DFmode:
5941 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5942 break;
5943 default:
5944 abort ();
5945 }
5946 emit_insn (gen_rotqby_ti (rot, from, tmp));
5947
5948 emit_insn (gen_spu_convert (ops[0], rot));
5949 }
5950
5951 void
5952 spu_builtin_insert (rtx ops[])
5953 {
5954 enum machine_mode mode = GET_MODE (ops[0]);
5955 enum machine_mode imode = GET_MODE_INNER (mode);
5956 rtx mask = gen_reg_rtx (TImode);
5957 rtx offset;
5958
5959 if (GET_CODE (ops[3]) == CONST_INT)
5960 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5961 else
5962 {
5963 offset = gen_reg_rtx (SImode);
5964 emit_insn (gen_mulsi3
5965 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5966 }
5967 emit_insn (gen_cpat
5968 (mask, stack_pointer_rtx, offset,
5969 GEN_INT (GET_MODE_SIZE (imode))));
5970 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5971 }
5972
5973 void
5974 spu_builtin_promote (rtx ops[])
5975 {
5976 enum machine_mode mode, imode;
5977 rtx rot, from, offset;
5978 HOST_WIDE_INT pos;
5979
5980 mode = GET_MODE (ops[0]);
5981 imode = GET_MODE_INNER (mode);
5982
5983 from = gen_reg_rtx (TImode);
5984 rot = spu_gen_subreg (TImode, ops[0]);
5985
5986 emit_insn (gen_spu_convert (from, ops[1]));
5987
5988 if (GET_CODE (ops[2]) == CONST_INT)
5989 {
5990 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5991 if (GET_MODE_SIZE (imode) < 4)
5992 pos += 4 - GET_MODE_SIZE (imode);
5993 offset = GEN_INT (pos & 15);
5994 }
5995 else
5996 {
5997 offset = gen_reg_rtx (SImode);
5998 switch (mode)
5999 {
6000 case V16QImode:
6001 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6002 break;
6003 case V8HImode:
6004 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6005 emit_insn (gen_addsi3 (offset, offset, offset));
6006 break;
6007 case V4SFmode:
6008 case V4SImode:
6009 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6010 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6011 break;
6012 case V2DImode:
6013 case V2DFmode:
6014 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6015 break;
6016 default:
6017 abort ();
6018 }
6019 }
6020 emit_insn (gen_rotqby_ti (rot, from, offset));
6021 }
6022
6023 static void
6024 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
6025 {
6026 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
6027 rtx shuf = gen_reg_rtx (V4SImode);
6028 rtx insn = gen_reg_rtx (V4SImode);
6029 rtx shufc;
6030 rtx insnc;
6031 rtx mem;
6032
6033 fnaddr = force_reg (SImode, fnaddr);
6034 cxt = force_reg (SImode, cxt);
6035
6036 if (TARGET_LARGE_MEM)
6037 {
6038 rtx rotl = gen_reg_rtx (V4SImode);
6039 rtx mask = gen_reg_rtx (V4SImode);
6040 rtx bi = gen_reg_rtx (SImode);
6041 static unsigned char const shufa[16] = {
6042 2, 3, 0, 1, 18, 19, 16, 17,
6043 0, 1, 2, 3, 16, 17, 18, 19
6044 };
6045 static unsigned char const insna[16] = {
6046 0x41, 0, 0, 79,
6047 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6048 0x60, 0x80, 0, 79,
6049 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6050 };
6051
6052 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6053 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6054
6055 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6056 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6057 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6058 emit_insn (gen_selb (insn, insnc, rotl, mask));
6059
6060 mem = adjust_address (m_tramp, V4SImode, 0);
6061 emit_move_insn (mem, insn);
6062
6063 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6064 mem = adjust_address (m_tramp, Pmode, 16);
6065 emit_move_insn (mem, bi);
6066 }
6067 else
6068 {
6069 rtx scxt = gen_reg_rtx (SImode);
6070 rtx sfnaddr = gen_reg_rtx (SImode);
6071 static unsigned char const insna[16] = {
6072 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6073 0x30, 0, 0, 0,
6074 0, 0, 0, 0,
6075 0, 0, 0, 0
6076 };
6077
6078 shufc = gen_reg_rtx (TImode);
6079 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6080
6081 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6082 fits 18 bits and the last 4 are zeros. This will be true if
6083 the stack pointer is initialized to 0x3fff0 at program start,
6084 otherwise the ila instruction will be garbage. */
6085
6086 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6087 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6088 emit_insn (gen_cpat
6089 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6090 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6091 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6092
6093 mem = adjust_address (m_tramp, V4SImode, 0);
6094 emit_move_insn (mem, insn);
6095 }
6096 emit_insn (gen_sync ());
6097 }
6098
6099 void
6100 spu_expand_sign_extend (rtx ops[])
6101 {
6102 unsigned char arr[16];
6103 rtx pat = gen_reg_rtx (TImode);
6104 rtx sign, c;
6105 int i, last;
6106 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6107 if (GET_MODE (ops[1]) == QImode)
6108 {
6109 sign = gen_reg_rtx (HImode);
6110 emit_insn (gen_extendqihi2 (sign, ops[1]));
6111 for (i = 0; i < 16; i++)
6112 arr[i] = 0x12;
6113 arr[last] = 0x13;
6114 }
6115 else
6116 {
6117 for (i = 0; i < 16; i++)
6118 arr[i] = 0x10;
6119 switch (GET_MODE (ops[1]))
6120 {
6121 case HImode:
6122 sign = gen_reg_rtx (SImode);
6123 emit_insn (gen_extendhisi2 (sign, ops[1]));
6124 arr[last] = 0x03;
6125 arr[last - 1] = 0x02;
6126 break;
6127 case SImode:
6128 sign = gen_reg_rtx (SImode);
6129 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6130 for (i = 0; i < 4; i++)
6131 arr[last - i] = 3 - i;
6132 break;
6133 case DImode:
6134 sign = gen_reg_rtx (SImode);
6135 c = gen_reg_rtx (SImode);
6136 emit_insn (gen_spu_convert (c, ops[1]));
6137 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6138 for (i = 0; i < 8; i++)
6139 arr[last - i] = 7 - i;
6140 break;
6141 default:
6142 abort ();
6143 }
6144 }
6145 emit_move_insn (pat, array_to_constant (TImode, arr));
6146 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6147 }
6148
6149 /* expand vector initialization. If there are any constant parts,
6150 load constant parts first. Then load any non-constant parts. */
6151 void
6152 spu_expand_vector_init (rtx target, rtx vals)
6153 {
6154 enum machine_mode mode = GET_MODE (target);
6155 int n_elts = GET_MODE_NUNITS (mode);
6156 int n_var = 0;
6157 bool all_same = true;
6158 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6159 int i;
6160
6161 first = XVECEXP (vals, 0, 0);
6162 for (i = 0; i < n_elts; ++i)
6163 {
6164 x = XVECEXP (vals, 0, i);
6165 if (!(CONST_INT_P (x)
6166 || GET_CODE (x) == CONST_DOUBLE
6167 || GET_CODE (x) == CONST_FIXED))
6168 ++n_var;
6169 else
6170 {
6171 if (first_constant == NULL_RTX)
6172 first_constant = x;
6173 }
6174 if (i > 0 && !rtx_equal_p (x, first))
6175 all_same = false;
6176 }
6177
6178 /* if all elements are the same, use splats to repeat elements */
6179 if (all_same)
6180 {
6181 if (!CONSTANT_P (first)
6182 && !register_operand (first, GET_MODE (x)))
6183 first = force_reg (GET_MODE (first), first);
6184 emit_insn (gen_spu_splats (target, first));
6185 return;
6186 }
6187
6188 /* load constant parts */
6189 if (n_var != n_elts)
6190 {
6191 if (n_var == 0)
6192 {
6193 emit_move_insn (target,
6194 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6195 }
6196 else
6197 {
6198 rtx constant_parts_rtx = copy_rtx (vals);
6199
6200 gcc_assert (first_constant != NULL_RTX);
6201 /* fill empty slots with the first constant, this increases
6202 our chance of using splats in the recursive call below. */
6203 for (i = 0; i < n_elts; ++i)
6204 {
6205 x = XVECEXP (constant_parts_rtx, 0, i);
6206 if (!(CONST_INT_P (x)
6207 || GET_CODE (x) == CONST_DOUBLE
6208 || GET_CODE (x) == CONST_FIXED))
6209 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6210 }
6211
6212 spu_expand_vector_init (target, constant_parts_rtx);
6213 }
6214 }
6215
6216 /* load variable parts */
6217 if (n_var != 0)
6218 {
6219 rtx insert_operands[4];
6220
6221 insert_operands[0] = target;
6222 insert_operands[2] = target;
6223 for (i = 0; i < n_elts; ++i)
6224 {
6225 x = XVECEXP (vals, 0, i);
6226 if (!(CONST_INT_P (x)
6227 || GET_CODE (x) == CONST_DOUBLE
6228 || GET_CODE (x) == CONST_FIXED))
6229 {
6230 if (!register_operand (x, GET_MODE (x)))
6231 x = force_reg (GET_MODE (x), x);
6232 insert_operands[1] = x;
6233 insert_operands[3] = GEN_INT (i);
6234 spu_builtin_insert (insert_operands);
6235 }
6236 }
6237 }
6238 }
6239
6240 /* Return insn index for the vector compare instruction for given CODE,
6241 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6242
6243 static int
6244 get_vec_cmp_insn (enum rtx_code code,
6245 enum machine_mode dest_mode,
6246 enum machine_mode op_mode)
6247
6248 {
6249 switch (code)
6250 {
6251 case EQ:
6252 if (dest_mode == V16QImode && op_mode == V16QImode)
6253 return CODE_FOR_ceq_v16qi;
6254 if (dest_mode == V8HImode && op_mode == V8HImode)
6255 return CODE_FOR_ceq_v8hi;
6256 if (dest_mode == V4SImode && op_mode == V4SImode)
6257 return CODE_FOR_ceq_v4si;
6258 if (dest_mode == V4SImode && op_mode == V4SFmode)
6259 return CODE_FOR_ceq_v4sf;
6260 if (dest_mode == V2DImode && op_mode == V2DFmode)
6261 return CODE_FOR_ceq_v2df;
6262 break;
6263 case GT:
6264 if (dest_mode == V16QImode && op_mode == V16QImode)
6265 return CODE_FOR_cgt_v16qi;
6266 if (dest_mode == V8HImode && op_mode == V8HImode)
6267 return CODE_FOR_cgt_v8hi;
6268 if (dest_mode == V4SImode && op_mode == V4SImode)
6269 return CODE_FOR_cgt_v4si;
6270 if (dest_mode == V4SImode && op_mode == V4SFmode)
6271 return CODE_FOR_cgt_v4sf;
6272 if (dest_mode == V2DImode && op_mode == V2DFmode)
6273 return CODE_FOR_cgt_v2df;
6274 break;
6275 case GTU:
6276 if (dest_mode == V16QImode && op_mode == V16QImode)
6277 return CODE_FOR_clgt_v16qi;
6278 if (dest_mode == V8HImode && op_mode == V8HImode)
6279 return CODE_FOR_clgt_v8hi;
6280 if (dest_mode == V4SImode && op_mode == V4SImode)
6281 return CODE_FOR_clgt_v4si;
6282 break;
6283 default:
6284 break;
6285 }
6286 return -1;
6287 }
6288
6289 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6290 DMODE is expected destination mode. This is a recursive function. */
6291
6292 static rtx
6293 spu_emit_vector_compare (enum rtx_code rcode,
6294 rtx op0, rtx op1,
6295 enum machine_mode dmode)
6296 {
6297 int vec_cmp_insn;
6298 rtx mask;
6299 enum machine_mode dest_mode;
6300 enum machine_mode op_mode = GET_MODE (op1);
6301
6302 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6303
6304 /* Floating point vector compare instructions uses destination V4SImode.
6305 Double floating point vector compare instructions uses destination V2DImode.
6306 Move destination to appropriate mode later. */
6307 if (dmode == V4SFmode)
6308 dest_mode = V4SImode;
6309 else if (dmode == V2DFmode)
6310 dest_mode = V2DImode;
6311 else
6312 dest_mode = dmode;
6313
6314 mask = gen_reg_rtx (dest_mode);
6315 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6316
6317 if (vec_cmp_insn == -1)
6318 {
6319 bool swap_operands = false;
6320 bool try_again = false;
6321 switch (rcode)
6322 {
6323 case LT:
6324 rcode = GT;
6325 swap_operands = true;
6326 try_again = true;
6327 break;
6328 case LTU:
6329 rcode = GTU;
6330 swap_operands = true;
6331 try_again = true;
6332 break;
6333 case NE:
6334 /* Treat A != B as ~(A==B). */
6335 {
6336 enum insn_code nor_code;
6337 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6338 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6339 gcc_assert (nor_code != CODE_FOR_nothing);
6340 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6341 if (dmode != dest_mode)
6342 {
6343 rtx temp = gen_reg_rtx (dest_mode);
6344 convert_move (temp, mask, 0);
6345 return temp;
6346 }
6347 return mask;
6348 }
6349 break;
6350 case GE:
6351 case GEU:
6352 case LE:
6353 case LEU:
6354 /* Try GT/GTU/LT/LTU OR EQ */
6355 {
6356 rtx c_rtx, eq_rtx;
6357 enum insn_code ior_code;
6358 enum rtx_code new_code;
6359
6360 switch (rcode)
6361 {
6362 case GE: new_code = GT; break;
6363 case GEU: new_code = GTU; break;
6364 case LE: new_code = LT; break;
6365 case LEU: new_code = LTU; break;
6366 default:
6367 gcc_unreachable ();
6368 }
6369
6370 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6371 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6372
6373 ior_code = optab_handler (ior_optab, dest_mode);
6374 gcc_assert (ior_code != CODE_FOR_nothing);
6375 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6376 if (dmode != dest_mode)
6377 {
6378 rtx temp = gen_reg_rtx (dest_mode);
6379 convert_move (temp, mask, 0);
6380 return temp;
6381 }
6382 return mask;
6383 }
6384 break;
6385 default:
6386 gcc_unreachable ();
6387 }
6388
6389 /* You only get two chances. */
6390 if (try_again)
6391 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6392
6393 gcc_assert (vec_cmp_insn != -1);
6394
6395 if (swap_operands)
6396 {
6397 rtx tmp;
6398 tmp = op0;
6399 op0 = op1;
6400 op1 = tmp;
6401 }
6402 }
6403
6404 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6405 if (dmode != dest_mode)
6406 {
6407 rtx temp = gen_reg_rtx (dest_mode);
6408 convert_move (temp, mask, 0);
6409 return temp;
6410 }
6411 return mask;
6412 }
6413
6414
6415 /* Emit vector conditional expression.
6416 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6417 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6418
6419 int
6420 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6421 rtx cond, rtx cc_op0, rtx cc_op1)
6422 {
6423 enum machine_mode dest_mode = GET_MODE (dest);
6424 enum rtx_code rcode = GET_CODE (cond);
6425 rtx mask;
6426
6427 /* Get the vector mask for the given relational operations. */
6428 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6429
6430 emit_insn(gen_selb (dest, op2, op1, mask));
6431
6432 return 1;
6433 }
6434
6435 static rtx
6436 spu_force_reg (enum machine_mode mode, rtx op)
6437 {
6438 rtx x, r;
6439 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6440 {
6441 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6442 || GET_MODE (op) == BLKmode)
6443 return force_reg (mode, convert_to_mode (mode, op, 0));
6444 abort ();
6445 }
6446
6447 r = force_reg (GET_MODE (op), op);
6448 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6449 {
6450 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6451 if (x)
6452 return x;
6453 }
6454
6455 x = gen_reg_rtx (mode);
6456 emit_insn (gen_spu_convert (x, r));
6457 return x;
6458 }
6459
6460 static void
6461 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6462 {
6463 HOST_WIDE_INT v = 0;
6464 int lsbits;
6465 /* Check the range of immediate operands. */
6466 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6467 {
6468 int range = p - SPU_BTI_7;
6469
6470 if (!CONSTANT_P (op))
6471 error ("%s expects an integer literal in the range [%d, %d]",
6472 d->name,
6473 spu_builtin_range[range].low, spu_builtin_range[range].high);
6474
6475 if (GET_CODE (op) == CONST
6476 && (GET_CODE (XEXP (op, 0)) == PLUS
6477 || GET_CODE (XEXP (op, 0)) == MINUS))
6478 {
6479 v = INTVAL (XEXP (XEXP (op, 0), 1));
6480 op = XEXP (XEXP (op, 0), 0);
6481 }
6482 else if (GET_CODE (op) == CONST_INT)
6483 v = INTVAL (op);
6484 else if (GET_CODE (op) == CONST_VECTOR
6485 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6486 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6487
6488 /* The default for v is 0 which is valid in every range. */
6489 if (v < spu_builtin_range[range].low
6490 || v > spu_builtin_range[range].high)
6491 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6492 d->name,
6493 spu_builtin_range[range].low, spu_builtin_range[range].high,
6494 v);
6495
6496 switch (p)
6497 {
6498 case SPU_BTI_S10_4:
6499 lsbits = 4;
6500 break;
6501 case SPU_BTI_U16_2:
6502 /* This is only used in lqa, and stqa. Even though the insns
6503 encode 16 bits of the address (all but the 2 least
6504 significant), only 14 bits are used because it is masked to
6505 be 16 byte aligned. */
6506 lsbits = 4;
6507 break;
6508 case SPU_BTI_S16_2:
6509 /* This is used for lqr and stqr. */
6510 lsbits = 2;
6511 break;
6512 default:
6513 lsbits = 0;
6514 }
6515
6516 if (GET_CODE (op) == LABEL_REF
6517 || (GET_CODE (op) == SYMBOL_REF
6518 && SYMBOL_REF_FUNCTION_P (op))
6519 || (v & ((1 << lsbits) - 1)) != 0)
6520 warning (0, "%d least significant bits of %s are ignored", lsbits,
6521 d->name);
6522 }
6523 }
6524
6525
6526 static int
6527 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6528 rtx target, rtx ops[])
6529 {
6530 enum insn_code icode = (enum insn_code) d->icode;
6531 int i = 0, a;
6532
6533 /* Expand the arguments into rtl. */
6534
6535 if (d->parm[0] != SPU_BTI_VOID)
6536 ops[i++] = target;
6537
6538 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6539 {
6540 tree arg = CALL_EXPR_ARG (exp, a);
6541 if (arg == 0)
6542 abort ();
6543 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6544 }
6545
6546 gcc_assert (i == insn_data[icode].n_generator_args);
6547 return i;
6548 }
6549
6550 static rtx
6551 spu_expand_builtin_1 (struct spu_builtin_description *d,
6552 tree exp, rtx target)
6553 {
6554 rtx pat;
6555 rtx ops[8];
6556 enum insn_code icode = (enum insn_code) d->icode;
6557 enum machine_mode mode, tmode;
6558 int i, p;
6559 int n_operands;
6560 tree return_type;
6561
6562 /* Set up ops[] with values from arglist. */
6563 n_operands = expand_builtin_args (d, exp, target, ops);
6564
6565 /* Handle the target operand which must be operand 0. */
6566 i = 0;
6567 if (d->parm[0] != SPU_BTI_VOID)
6568 {
6569
6570 /* We prefer the mode specified for the match_operand otherwise
6571 use the mode from the builtin function prototype. */
6572 tmode = insn_data[d->icode].operand[0].mode;
6573 if (tmode == VOIDmode)
6574 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6575
6576 /* Try to use target because not using it can lead to extra copies
6577 and when we are using all of the registers extra copies leads
6578 to extra spills. */
6579 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6580 ops[0] = target;
6581 else
6582 target = ops[0] = gen_reg_rtx (tmode);
6583
6584 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6585 abort ();
6586
6587 i++;
6588 }
6589
6590 if (d->fcode == SPU_MASK_FOR_LOAD)
6591 {
6592 enum machine_mode mode = insn_data[icode].operand[1].mode;
6593 tree arg;
6594 rtx addr, op, pat;
6595
6596 /* get addr */
6597 arg = CALL_EXPR_ARG (exp, 0);
6598 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6599 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6600 addr = memory_address (mode, op);
6601
6602 /* negate addr */
6603 op = gen_reg_rtx (GET_MODE (addr));
6604 emit_insn (gen_rtx_SET (VOIDmode, op,
6605 gen_rtx_NEG (GET_MODE (addr), addr)));
6606 op = gen_rtx_MEM (mode, op);
6607
6608 pat = GEN_FCN (icode) (target, op);
6609 if (!pat)
6610 return 0;
6611 emit_insn (pat);
6612 return target;
6613 }
6614
6615 /* Ignore align_hint, but still expand it's args in case they have
6616 side effects. */
6617 if (icode == CODE_FOR_spu_align_hint)
6618 return 0;
6619
6620 /* Handle the rest of the operands. */
6621 for (p = 1; i < n_operands; i++, p++)
6622 {
6623 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6624 mode = insn_data[d->icode].operand[i].mode;
6625 else
6626 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6627
6628 /* mode can be VOIDmode here for labels */
6629
6630 /* For specific intrinsics with an immediate operand, e.g.,
6631 si_ai(), we sometimes need to convert the scalar argument to a
6632 vector argument by splatting the scalar. */
6633 if (VECTOR_MODE_P (mode)
6634 && (GET_CODE (ops[i]) == CONST_INT
6635 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6636 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6637 {
6638 if (GET_CODE (ops[i]) == CONST_INT)
6639 ops[i] = spu_const (mode, INTVAL (ops[i]));
6640 else
6641 {
6642 rtx reg = gen_reg_rtx (mode);
6643 enum machine_mode imode = GET_MODE_INNER (mode);
6644 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6645 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6646 if (imode != GET_MODE (ops[i]))
6647 ops[i] = convert_to_mode (imode, ops[i],
6648 TYPE_UNSIGNED (spu_builtin_types
6649 [d->parm[i]]));
6650 emit_insn (gen_spu_splats (reg, ops[i]));
6651 ops[i] = reg;
6652 }
6653 }
6654
6655 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6656
6657 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6658 ops[i] = spu_force_reg (mode, ops[i]);
6659 }
6660
6661 switch (n_operands)
6662 {
6663 case 0:
6664 pat = GEN_FCN (icode) (0);
6665 break;
6666 case 1:
6667 pat = GEN_FCN (icode) (ops[0]);
6668 break;
6669 case 2:
6670 pat = GEN_FCN (icode) (ops[0], ops[1]);
6671 break;
6672 case 3:
6673 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6674 break;
6675 case 4:
6676 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6677 break;
6678 case 5:
6679 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6680 break;
6681 case 6:
6682 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6683 break;
6684 default:
6685 abort ();
6686 }
6687
6688 if (!pat)
6689 abort ();
6690
6691 if (d->type == B_CALL || d->type == B_BISLED)
6692 emit_call_insn (pat);
6693 else if (d->type == B_JUMP)
6694 {
6695 emit_jump_insn (pat);
6696 emit_barrier ();
6697 }
6698 else
6699 emit_insn (pat);
6700
6701 return_type = spu_builtin_types[d->parm[0]];
6702 if (d->parm[0] != SPU_BTI_VOID
6703 && GET_MODE (target) != TYPE_MODE (return_type))
6704 {
6705 /* target is the return value. It should always be the mode of
6706 the builtin function prototype. */
6707 target = spu_force_reg (TYPE_MODE (return_type), target);
6708 }
6709
6710 return target;
6711 }
6712
6713 rtx
6714 spu_expand_builtin (tree exp,
6715 rtx target,
6716 rtx subtarget ATTRIBUTE_UNUSED,
6717 enum machine_mode mode ATTRIBUTE_UNUSED,
6718 int ignore ATTRIBUTE_UNUSED)
6719 {
6720 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6721 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6722 struct spu_builtin_description *d;
6723
6724 if (fcode < NUM_SPU_BUILTINS)
6725 {
6726 d = &spu_builtins[fcode];
6727
6728 return spu_expand_builtin_1 (d, exp, target);
6729 }
6730 abort ();
6731 }
6732
6733 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6734 static tree
6735 spu_builtin_mul_widen_even (tree type)
6736 {
6737 switch (TYPE_MODE (type))
6738 {
6739 case V8HImode:
6740 if (TYPE_UNSIGNED (type))
6741 return spu_builtin_decls[SPU_MULE_0];
6742 else
6743 return spu_builtin_decls[SPU_MULE_1];
6744 break;
6745 default:
6746 return NULL_TREE;
6747 }
6748 }
6749
6750 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6751 static tree
6752 spu_builtin_mul_widen_odd (tree type)
6753 {
6754 switch (TYPE_MODE (type))
6755 {
6756 case V8HImode:
6757 if (TYPE_UNSIGNED (type))
6758 return spu_builtin_decls[SPU_MULO_1];
6759 else
6760 return spu_builtin_decls[SPU_MULO_0];
6761 break;
6762 default:
6763 return NULL_TREE;
6764 }
6765 }
6766
6767 /* Implement targetm.vectorize.builtin_mask_for_load. */
6768 static tree
6769 spu_builtin_mask_for_load (void)
6770 {
6771 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6772 }
6773
6774 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6775 static int
6776 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6777 tree vectype ATTRIBUTE_UNUSED,
6778 int misalign ATTRIBUTE_UNUSED)
6779 {
6780 switch (type_of_cost)
6781 {
6782 case scalar_stmt:
6783 case vector_stmt:
6784 case vector_load:
6785 case vector_store:
6786 case vec_to_scalar:
6787 case scalar_to_vec:
6788 case cond_branch_not_taken:
6789 case vec_perm:
6790 return 1;
6791
6792 case scalar_store:
6793 return 10;
6794
6795 case scalar_load:
6796 /* Load + rotate. */
6797 return 2;
6798
6799 case unaligned_load:
6800 return 2;
6801
6802 case cond_branch_taken:
6803 return 6;
6804
6805 default:
6806 gcc_unreachable ();
6807 }
6808 }
6809
6810 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6811 after applying N number of iterations. This routine does not determine
6812 how may iterations are required to reach desired alignment. */
6813
6814 static bool
6815 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6816 {
6817 if (is_packed)
6818 return false;
6819
6820 /* All other types are naturally aligned. */
6821 return true;
6822 }
6823
6824 /* Implement targetm.vectorize.builtin_vec_perm. */
6825 tree
6826 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6827 {
6828 *mask_element_type = unsigned_char_type_node;
6829
6830 switch (TYPE_MODE (type))
6831 {
6832 case V16QImode:
6833 if (TYPE_UNSIGNED (type))
6834 return spu_builtin_decls[SPU_SHUFFLE_0];
6835 else
6836 return spu_builtin_decls[SPU_SHUFFLE_1];
6837
6838 case V8HImode:
6839 if (TYPE_UNSIGNED (type))
6840 return spu_builtin_decls[SPU_SHUFFLE_2];
6841 else
6842 return spu_builtin_decls[SPU_SHUFFLE_3];
6843
6844 case V4SImode:
6845 if (TYPE_UNSIGNED (type))
6846 return spu_builtin_decls[SPU_SHUFFLE_4];
6847 else
6848 return spu_builtin_decls[SPU_SHUFFLE_5];
6849
6850 case V2DImode:
6851 if (TYPE_UNSIGNED (type))
6852 return spu_builtin_decls[SPU_SHUFFLE_6];
6853 else
6854 return spu_builtin_decls[SPU_SHUFFLE_7];
6855
6856 case V4SFmode:
6857 return spu_builtin_decls[SPU_SHUFFLE_8];
6858
6859 case V2DFmode:
6860 return spu_builtin_decls[SPU_SHUFFLE_9];
6861
6862 default:
6863 return NULL_TREE;
6864 }
6865 }
6866
6867 /* Return the appropriate mode for a named address pointer. */
6868 static enum machine_mode
6869 spu_addr_space_pointer_mode (addr_space_t addrspace)
6870 {
6871 switch (addrspace)
6872 {
6873 case ADDR_SPACE_GENERIC:
6874 return ptr_mode;
6875 case ADDR_SPACE_EA:
6876 return EAmode;
6877 default:
6878 gcc_unreachable ();
6879 }
6880 }
6881
6882 /* Return the appropriate mode for a named address address. */
6883 static enum machine_mode
6884 spu_addr_space_address_mode (addr_space_t addrspace)
6885 {
6886 switch (addrspace)
6887 {
6888 case ADDR_SPACE_GENERIC:
6889 return Pmode;
6890 case ADDR_SPACE_EA:
6891 return EAmode;
6892 default:
6893 gcc_unreachable ();
6894 }
6895 }
6896
6897 /* Determine if one named address space is a subset of another. */
6898
6899 static bool
6900 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6901 {
6902 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6903 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6904
6905 if (subset == superset)
6906 return true;
6907
6908 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6909 being subsets but instead as disjoint address spaces. */
6910 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6911 return false;
6912
6913 else
6914 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6915 }
6916
6917 /* Convert from one address space to another. */
6918 static rtx
6919 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6920 {
6921 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6922 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6923
6924 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6925 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6926
6927 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6928 {
6929 rtx result, ls;
6930
6931 ls = gen_const_mem (DImode,
6932 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6933 set_mem_align (ls, 128);
6934
6935 result = gen_reg_rtx (Pmode);
6936 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6937 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6938 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6939 ls, const0_rtx, Pmode, 1);
6940
6941 emit_insn (gen_subsi3 (result, op, ls));
6942
6943 return result;
6944 }
6945
6946 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6947 {
6948 rtx result, ls;
6949
6950 ls = gen_const_mem (DImode,
6951 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6952 set_mem_align (ls, 128);
6953
6954 result = gen_reg_rtx (EAmode);
6955 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6956 op = force_reg (Pmode, op);
6957 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6958 ls, const0_rtx, EAmode, 1);
6959 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6960
6961 if (EAmode == SImode)
6962 emit_insn (gen_addsi3 (result, op, ls));
6963 else
6964 emit_insn (gen_adddi3 (result, op, ls));
6965
6966 return result;
6967 }
6968
6969 else
6970 gcc_unreachable ();
6971 }
6972
6973
6974 /* Count the total number of instructions in each pipe and return the
6975 maximum, which is used as the Minimum Iteration Interval (MII)
6976 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6977 -2 are instructions that can go in pipe0 or pipe1. */
6978 static int
6979 spu_sms_res_mii (struct ddg *g)
6980 {
6981 int i;
6982 unsigned t[4] = {0, 0, 0, 0};
6983
6984 for (i = 0; i < g->num_nodes; i++)
6985 {
6986 rtx insn = g->nodes[i].insn;
6987 int p = get_pipe (insn) + 2;
6988
6989 gcc_assert (p >= 0);
6990 gcc_assert (p < 4);
6991
6992 t[p]++;
6993 if (dump_file && INSN_P (insn))
6994 fprintf (dump_file, "i%d %s %d %d\n",
6995 INSN_UID (insn),
6996 insn_data[INSN_CODE(insn)].name,
6997 p, t[p]);
6998 }
6999 if (dump_file)
7000 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7001
7002 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7003 }
7004
7005
7006 void
7007 spu_init_expanders (void)
7008 {
7009 if (cfun)
7010 {
7011 rtx r0, r1;
7012 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7013 frame_pointer_needed is true. We don't know that until we're
7014 expanding the prologue. */
7015 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7016
7017 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7018 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7019 to be treated as aligned, so generate them here. */
7020 r0 = gen_reg_rtx (SImode);
7021 r1 = gen_reg_rtx (SImode);
7022 mark_reg_pointer (r0, 128);
7023 mark_reg_pointer (r1, 128);
7024 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7025 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7026 }
7027 }
7028
7029 static enum machine_mode
7030 spu_libgcc_cmp_return_mode (void)
7031 {
7032
7033 /* For SPU word mode is TI mode so it is better to use SImode
7034 for compare returns. */
7035 return SImode;
7036 }
7037
7038 static enum machine_mode
7039 spu_libgcc_shift_count_mode (void)
7040 {
7041 /* For SPU word mode is TI mode so it is better to use SImode
7042 for shift counts. */
7043 return SImode;
7044 }
7045
7046 /* An early place to adjust some flags after GCC has finished processing
7047 * them. */
7048 static void
7049 asm_file_start (void)
7050 {
7051 default_file_start ();
7052 }
7053
7054 /* Implement targetm.section_type_flags. */
7055 static unsigned int
7056 spu_section_type_flags (tree decl, const char *name, int reloc)
7057 {
7058 /* .toe needs to have type @nobits. */
7059 if (strcmp (name, ".toe") == 0)
7060 return SECTION_BSS;
7061 /* Don't load _ea into the current address space. */
7062 if (strcmp (name, "._ea") == 0)
7063 return SECTION_WRITE | SECTION_DEBUG;
7064 return default_section_type_flags (decl, name, reloc);
7065 }
7066
7067 /* Implement targetm.select_section. */
7068 static section *
7069 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7070 {
7071 /* Variables and constants defined in the __ea address space
7072 go into a special section named "._ea". */
7073 if (TREE_TYPE (decl) != error_mark_node
7074 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7075 {
7076 /* We might get called with string constants, but get_named_section
7077 doesn't like them as they are not DECLs. Also, we need to set
7078 flags in that case. */
7079 if (!DECL_P (decl))
7080 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7081
7082 return get_named_section (decl, "._ea", reloc);
7083 }
7084
7085 return default_elf_select_section (decl, reloc, align);
7086 }
7087
7088 /* Implement targetm.unique_section. */
7089 static void
7090 spu_unique_section (tree decl, int reloc)
7091 {
7092 /* We don't support unique section names in the __ea address
7093 space for now. */
7094 if (TREE_TYPE (decl) != error_mark_node
7095 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7096 return;
7097
7098 default_unique_section (decl, reloc);
7099 }
7100
7101 /* Generate a constant or register which contains 2^SCALE. We assume
7102 the result is valid for MODE. Currently, MODE must be V4SFmode and
7103 SCALE must be SImode. */
7104 rtx
7105 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7106 {
7107 gcc_assert (mode == V4SFmode);
7108 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7109 if (GET_CODE (scale) != CONST_INT)
7110 {
7111 /* unsigned int exp = (127 + scale) << 23;
7112 __vector float m = (__vector float) spu_splats (exp); */
7113 rtx reg = force_reg (SImode, scale);
7114 rtx exp = gen_reg_rtx (SImode);
7115 rtx mul = gen_reg_rtx (mode);
7116 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7117 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7118 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7119 return mul;
7120 }
7121 else
7122 {
7123 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7124 unsigned char arr[16];
7125 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7126 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7127 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7128 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7129 return array_to_constant (mode, arr);
7130 }
7131 }
7132
7133 /* After reload, just change the convert into a move instruction
7134 or a dead instruction. */
7135 void
7136 spu_split_convert (rtx ops[])
7137 {
7138 if (REGNO (ops[0]) == REGNO (ops[1]))
7139 emit_note (NOTE_INSN_DELETED);
7140 else
7141 {
7142 /* Use TImode always as this might help hard reg copyprop. */
7143 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7144 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7145 emit_insn (gen_move_insn (op0, op1));
7146 }
7147 }
7148
7149 void
7150 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7151 {
7152 fprintf (file, "# profile\n");
7153 fprintf (file, "brsl $75, _mcount\n");
7154 }
7155
7156 /* Implement targetm.ref_may_alias_errno. */
7157 static bool
7158 spu_ref_may_alias_errno (ao_ref *ref)
7159 {
7160 tree base = ao_ref_base (ref);
7161
7162 /* With SPU newlib, errno is defined as something like
7163 _impure_data._errno
7164 The default implementation of this target macro does not
7165 recognize such expressions, so special-code for it here. */
7166
7167 if (TREE_CODE (base) == VAR_DECL
7168 && !TREE_STATIC (base)
7169 && DECL_EXTERNAL (base)
7170 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7171 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7172 "_impure_data") == 0
7173 /* _errno is the first member of _impure_data. */
7174 && ref->offset == 0)
7175 return true;
7176
7177 return default_ref_may_alias_errno (ref);
7178 }
7179
7180 /* Output thunk to FILE that implements a C++ virtual function call (with
7181 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7182 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7183 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7184 relative to the resulting this pointer. */
7185
7186 static void
7187 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7188 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7189 tree function)
7190 {
7191 rtx op[8];
7192
7193 /* Make sure unwind info is emitted for the thunk if needed. */
7194 final_start_function (emit_barrier (), file, 1);
7195
7196 /* Operand 0 is the target function. */
7197 op[0] = XEXP (DECL_RTL (function), 0);
7198
7199 /* Operand 1 is the 'this' pointer. */
7200 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7201 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7202 else
7203 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7204
7205 /* Operands 2/3 are the low/high halfwords of delta. */
7206 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7207 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7208
7209 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7210 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7211 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7212
7213 /* Operands 6/7 are temporary registers. */
7214 op[6] = gen_rtx_REG (Pmode, 79);
7215 op[7] = gen_rtx_REG (Pmode, 78);
7216
7217 /* Add DELTA to this pointer. */
7218 if (delta)
7219 {
7220 if (delta >= -0x200 && delta < 0x200)
7221 output_asm_insn ("ai\t%1,%1,%2", op);
7222 else if (delta >= -0x8000 && delta < 0x8000)
7223 {
7224 output_asm_insn ("il\t%6,%2", op);
7225 output_asm_insn ("a\t%1,%1,%6", op);
7226 }
7227 else
7228 {
7229 output_asm_insn ("ilhu\t%6,%3", op);
7230 output_asm_insn ("iohl\t%6,%2", op);
7231 output_asm_insn ("a\t%1,%1,%6", op);
7232 }
7233 }
7234
7235 /* Perform vcall adjustment. */
7236 if (vcall_offset)
7237 {
7238 output_asm_insn ("lqd\t%7,0(%1)", op);
7239 output_asm_insn ("rotqby\t%7,%7,%1", op);
7240
7241 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7242 output_asm_insn ("ai\t%7,%7,%4", op);
7243 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7244 {
7245 output_asm_insn ("il\t%6,%4", op);
7246 output_asm_insn ("a\t%7,%7,%6", op);
7247 }
7248 else
7249 {
7250 output_asm_insn ("ilhu\t%6,%5", op);
7251 output_asm_insn ("iohl\t%6,%4", op);
7252 output_asm_insn ("a\t%7,%7,%6", op);
7253 }
7254
7255 output_asm_insn ("lqd\t%6,0(%7)", op);
7256 output_asm_insn ("rotqby\t%6,%6,%7", op);
7257 output_asm_insn ("a\t%1,%1,%6", op);
7258 }
7259
7260 /* Jump to target. */
7261 output_asm_insn ("br\t%0", op);
7262
7263 final_end_function ();
7264 }
7265
7266 #include "gt-spu.h"