]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
[multiple changes]
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
16 02110-1301, USA. */
17
18 #include "config.h"
19 #include "system.h"
20 #include "coretypes.h"
21 #include "tm.h"
22 #include "rtl.h"
23 #include "regs.h"
24 #include "hard-reg-set.h"
25 #include "real.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
29 #include "flags.h"
30 #include "recog.h"
31 #include "obstack.h"
32 #include "tree.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "except.h"
36 #include "function.h"
37 #include "output.h"
38 #include "basic-block.h"
39 #include "integrate.h"
40 #include "toplev.h"
41 #include "ggc.h"
42 #include "hashtab.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "langhooks.h"
47 #include "reload.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
50 #include "params.h"
51 #include "assert.h"
52 #include "c-common.h"
53 #include "machmode.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
57
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
60 {
61 int low, high;
62 };
63
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77 };
78
79 \f
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98 static rtx get_branch_target (rtx branch);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
104 int can_issue_more);
105 static int get_pipe (rtx insn);
106 static int spu_sched_adjust_priority (rtx insn, int pri);
107 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
108 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
109 int flags,
110 unsigned char *no_add_attrs);
111 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
112 int flags,
113 unsigned char *no_add_attrs);
114 static int spu_naked_function_p (tree func);
115 static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
116 tree type, unsigned char named);
117 static tree spu_build_builtin_va_list (void);
118 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
119 tree * post_p);
120 static int regno_aligned_for_load (int regno);
121 static int store_with_one_insn_p (rtx mem);
122 static int reg_align (rtx reg);
123 static int mem_is_padded_component_ref (rtx x);
124 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
125 static void spu_asm_globalize_label (FILE * file, const char *name);
126 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
127 int *total);
128 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type, tree fntype);
131 static void fix_range (const char *);
132 static void spu_encode_section_info (tree, rtx, int);
133 static tree spu_builtin_mul_widen_even (tree);
134 static tree spu_builtin_mul_widen_odd (tree);
135 static tree spu_builtin_mask_for_load (void);
136
137 extern const char *reg_names[];
138 rtx spu_compare_op0, spu_compare_op1;
139
140 enum spu_immediate {
141 SPU_NONE,
142 SPU_IL,
143 SPU_ILA,
144 SPU_ILH,
145 SPU_ILHU,
146 SPU_ORI,
147 SPU_ORHI,
148 SPU_ORBI,
149 SPU_IOHL
150 };
151 enum immediate_class
152 {
153 IC_POOL, /* constant pool */
154 IC_IL1, /* one il* instruction */
155 IC_IL2, /* both ilhu and iohl instructions */
156 IC_IL1s, /* one il* instruction */
157 IC_IL2s, /* both ilhu and iohl instructions */
158 IC_FSMBI, /* the fsmbi instruction */
159 IC_CPAT, /* one of the c*d instructions */
160 IC_FSMBI2 /* fsmbi plus 1 other instruction */
161 };
162
163 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
164 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
165 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
166 static enum immediate_class classify_immediate (rtx op,
167 enum machine_mode mode);
168
169 /* Built in types. */
170 tree spu_builtin_types[SPU_BTI_MAX];
171 \f
172 /* TARGET overrides. */
173
174 #undef TARGET_INIT_BUILTINS
175 #define TARGET_INIT_BUILTINS spu_init_builtins
176
177 #undef TARGET_EXPAND_BUILTIN
178 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
179
180 #undef TARGET_EH_RETURN_FILTER_MODE
181 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
182
183 /* The .8byte directive doesn't seem to work well for a 32 bit
184 architecture. */
185 #undef TARGET_ASM_UNALIGNED_DI_OP
186 #define TARGET_ASM_UNALIGNED_DI_OP NULL
187
188 #undef TARGET_RTX_COSTS
189 #define TARGET_RTX_COSTS spu_rtx_costs
190
191 #undef TARGET_ADDRESS_COST
192 #define TARGET_ADDRESS_COST hook_int_rtx_0
193
194 #undef TARGET_SCHED_ISSUE_RATE
195 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
196
197 #undef TARGET_SCHED_VARIABLE_ISSUE
198 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
199
200 #undef TARGET_SCHED_ADJUST_PRIORITY
201 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
202
203 #undef TARGET_SCHED_ADJUST_COST
204 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
205
206 const struct attribute_spec spu_attribute_table[];
207 #undef TARGET_ATTRIBUTE_TABLE
208 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
209
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER spu_assemble_integer
212
213 #undef TARGET_SCALAR_MODE_SUPPORTED_P
214 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
215
216 #undef TARGET_VECTOR_MODE_SUPPORTED_P
217 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
218
219 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
220 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
221
222 #undef TARGET_ASM_GLOBALIZE_LABEL
223 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
224
225 #undef TARGET_PASS_BY_REFERENCE
226 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
227
228 #undef TARGET_MUST_PASS_IN_STACK
229 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
230
231 #undef TARGET_BUILD_BUILTIN_VA_LIST
232 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
233
234 #undef TARGET_SETUP_INCOMING_VARARGS
235 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
236
237 #undef TARGET_MACHINE_DEPENDENT_REORG
238 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
239
240 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
241 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
242
243 #undef TARGET_DEFAULT_TARGET_FLAGS
244 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
245
246 #undef TARGET_INIT_LIBFUNCS
247 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
248
249 #undef TARGET_RETURN_IN_MEMORY
250 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
251
252 #undef TARGET_ENCODE_SECTION_INFO
253 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
254
255 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
256 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
257
258 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
259 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
260
261 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
262 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
263
264 struct gcc_target targetm = TARGET_INITIALIZER;
265
266 void
267 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
268 {
269 /* Small loops will be unpeeled at -O3. For SPU it is more important
270 to keep code small by default. */
271 if (!flag_unroll_loops && !flag_peel_loops)
272 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
273
274 /* Override some of the default param values. With so many registers
275 larger values are better for these params. */
276 MAX_PENDING_LIST_LENGTH = 128;
277
278 /* With so many registers this is better on by default. */
279 flag_rename_registers = 1;
280 }
281
282 /* Sometimes certain combinations of command options do not make sense
283 on a particular target machine. You can define a macro
284 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
285 executed once just after all the command options have been parsed. */
286 void
287 spu_override_options (void)
288 {
289 flag_omit_frame_pointer = 1;
290
291 if (align_functions < 8)
292 align_functions = 8;
293
294 if (spu_fixed_range_string)
295 fix_range (spu_fixed_range_string);
296 }
297 \f
298 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
299 struct attribute_spec.handler. */
300
301 /* Table of machine attributes. */
302 const struct attribute_spec spu_attribute_table[] =
303 {
304 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
305 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
306 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
307 { NULL, 0, 0, false, false, false, NULL }
308 };
309
310 /* True if MODE is valid for the target. By "valid", we mean able to
311 be manipulated in non-trivial ways. In particular, this means all
312 the arithmetic is supported. */
313 static bool
314 spu_scalar_mode_supported_p (enum machine_mode mode)
315 {
316 switch (mode)
317 {
318 case QImode:
319 case HImode:
320 case SImode:
321 case SFmode:
322 case DImode:
323 case TImode:
324 case DFmode:
325 return true;
326
327 default:
328 return false;
329 }
330 }
331
332 /* Similarly for vector modes. "Supported" here is less strict. At
333 least some operations are supported; need to check optabs or builtins
334 for further details. */
335 static bool
336 spu_vector_mode_supported_p (enum machine_mode mode)
337 {
338 switch (mode)
339 {
340 case V16QImode:
341 case V8HImode:
342 case V4SImode:
343 case V2DImode:
344 case V4SFmode:
345 case V2DFmode:
346 return true;
347
348 default:
349 return false;
350 }
351 }
352
353 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
354 least significant bytes of the outer mode. This function returns
355 TRUE for the SUBREG's where this is correct. */
356 int
357 valid_subreg (rtx op)
358 {
359 enum machine_mode om = GET_MODE (op);
360 enum machine_mode im = GET_MODE (SUBREG_REG (op));
361 return om != VOIDmode && im != VOIDmode
362 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
363 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
364 }
365
366 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
367 and adjust the start offset. */
368 static rtx
369 adjust_operand (rtx op, HOST_WIDE_INT * start)
370 {
371 enum machine_mode mode;
372 int op_size;
373 /* Strip any SUBREG */
374 if (GET_CODE (op) == SUBREG)
375 {
376 if (start)
377 *start -=
378 GET_MODE_BITSIZE (GET_MODE (op)) -
379 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
380 op = SUBREG_REG (op);
381 }
382 /* If it is smaller than SI, assure a SUBREG */
383 op_size = GET_MODE_BITSIZE (GET_MODE (op));
384 if (op_size < 32)
385 {
386 if (start)
387 *start += 32 - op_size;
388 op_size = 32;
389 }
390 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
391 mode = mode_for_size (op_size, MODE_INT, 0);
392 if (mode != GET_MODE (op))
393 op = gen_rtx_SUBREG (mode, op, 0);
394 return op;
395 }
396
397 void
398 spu_expand_extv (rtx ops[], int unsignedp)
399 {
400 HOST_WIDE_INT width = INTVAL (ops[2]);
401 HOST_WIDE_INT start = INTVAL (ops[3]);
402 HOST_WIDE_INT src_size, dst_size;
403 enum machine_mode src_mode, dst_mode;
404 rtx dst = ops[0], src = ops[1];
405 rtx s;
406
407 dst = adjust_operand (ops[0], 0);
408 dst_mode = GET_MODE (dst);
409 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
410
411 src = adjust_operand (src, &start);
412 src_mode = GET_MODE (src);
413 src_size = GET_MODE_BITSIZE (GET_MODE (src));
414
415 if (start > 0)
416 {
417 s = gen_reg_rtx (src_mode);
418 switch (src_mode)
419 {
420 case SImode:
421 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
422 break;
423 case DImode:
424 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
425 break;
426 case TImode:
427 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
428 break;
429 default:
430 abort ();
431 }
432 src = s;
433 }
434
435 if (width < src_size)
436 {
437 rtx pat;
438 int icode;
439 switch (src_mode)
440 {
441 case SImode:
442 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
443 break;
444 case DImode:
445 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
446 break;
447 case TImode:
448 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
449 break;
450 default:
451 abort ();
452 }
453 s = gen_reg_rtx (src_mode);
454 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
455 emit_insn (pat);
456 src = s;
457 }
458
459 convert_move (dst, src, unsignedp);
460 }
461
462 void
463 spu_expand_insv (rtx ops[])
464 {
465 HOST_WIDE_INT width = INTVAL (ops[1]);
466 HOST_WIDE_INT start = INTVAL (ops[2]);
467 HOST_WIDE_INT maskbits;
468 enum machine_mode dst_mode, src_mode;
469 rtx dst = ops[0], src = ops[3];
470 int dst_size, src_size;
471 rtx mask;
472 rtx shift_reg;
473 int shift;
474
475
476 if (GET_CODE (ops[0]) == MEM)
477 dst = gen_reg_rtx (TImode);
478 else
479 dst = adjust_operand (dst, &start);
480 dst_mode = GET_MODE (dst);
481 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
482
483 if (CONSTANT_P (src))
484 {
485 enum machine_mode m =
486 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
487 src = force_reg (m, convert_to_mode (m, src, 0));
488 }
489 src = adjust_operand (src, 0);
490 src_mode = GET_MODE (src);
491 src_size = GET_MODE_BITSIZE (GET_MODE (src));
492
493 mask = gen_reg_rtx (dst_mode);
494 shift_reg = gen_reg_rtx (dst_mode);
495 shift = dst_size - start - width;
496
497 /* It's not safe to use subreg here because the compiler assumes
498 that the SUBREG_REG is right justified in the SUBREG. */
499 convert_move (shift_reg, src, 1);
500
501 if (shift > 0)
502 {
503 switch (dst_mode)
504 {
505 case SImode:
506 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
507 break;
508 case DImode:
509 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
510 break;
511 case TImode:
512 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
513 break;
514 default:
515 abort ();
516 }
517 }
518 else if (shift < 0)
519 abort ();
520
521 switch (dst_size)
522 {
523 case 32:
524 maskbits = (-1ll << (32 - width - start));
525 if (start)
526 maskbits += (1ll << (32 - start));
527 emit_move_insn (mask, GEN_INT (maskbits));
528 break;
529 case 64:
530 maskbits = (-1ll << (64 - width - start));
531 if (start)
532 maskbits += (1ll << (64 - start));
533 emit_move_insn (mask, GEN_INT (maskbits));
534 break;
535 case 128:
536 {
537 unsigned char arr[16];
538 int i = start / 8;
539 memset (arr, 0, sizeof (arr));
540 arr[i] = 0xff >> (start & 7);
541 for (i++; i <= (start + width - 1) / 8; i++)
542 arr[i] = 0xff;
543 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
544 emit_move_insn (mask, array_to_constant (TImode, arr));
545 }
546 break;
547 default:
548 abort ();
549 }
550 if (GET_CODE (ops[0]) == MEM)
551 {
552 rtx aligned = gen_reg_rtx (SImode);
553 rtx low = gen_reg_rtx (SImode);
554 rtx addr = gen_reg_rtx (SImode);
555 rtx rotl = gen_reg_rtx (SImode);
556 rtx mask0 = gen_reg_rtx (TImode);
557 rtx mem;
558
559 emit_move_insn (addr, XEXP (ops[0], 0));
560 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
561 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
562 emit_insn (gen_negsi2 (rotl, low));
563 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
564 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
565 mem = change_address (ops[0], TImode, aligned);
566 set_mem_alias_set (mem, 0);
567 emit_move_insn (dst, mem);
568 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
569 emit_move_insn (mem, dst);
570 if (start + width > MEM_ALIGN (ops[0]))
571 {
572 rtx shl = gen_reg_rtx (SImode);
573 rtx mask1 = gen_reg_rtx (TImode);
574 rtx dst1 = gen_reg_rtx (TImode);
575 rtx mem1;
576 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
577 emit_insn (gen_shlqby_ti (mask1, mask, shl));
578 mem1 = adjust_address (mem, TImode, 16);
579 set_mem_alias_set (mem1, 0);
580 emit_move_insn (dst1, mem1);
581 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
582 emit_move_insn (mem1, dst1);
583 }
584 }
585 else
586 emit_insn (gen_selb (dst, dst, shift_reg, mask));
587 }
588
589
590 int
591 spu_expand_block_move (rtx ops[])
592 {
593 HOST_WIDE_INT bytes, align, offset;
594 rtx src, dst, sreg, dreg, target;
595 int i;
596 if (GET_CODE (ops[2]) != CONST_INT
597 || GET_CODE (ops[3]) != CONST_INT
598 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
599 return 0;
600
601 bytes = INTVAL (ops[2]);
602 align = INTVAL (ops[3]);
603
604 if (bytes <= 0)
605 return 1;
606
607 dst = ops[0];
608 src = ops[1];
609
610 if (align == 16)
611 {
612 for (offset = 0; offset + 16 <= bytes; offset += 16)
613 {
614 dst = adjust_address (ops[0], V16QImode, offset);
615 src = adjust_address (ops[1], V16QImode, offset);
616 emit_move_insn (dst, src);
617 }
618 if (offset < bytes)
619 {
620 rtx mask;
621 unsigned char arr[16] = { 0 };
622 for (i = 0; i < bytes - offset; i++)
623 arr[i] = 0xff;
624 dst = adjust_address (ops[0], V16QImode, offset);
625 src = adjust_address (ops[1], V16QImode, offset);
626 mask = gen_reg_rtx (V16QImode);
627 sreg = gen_reg_rtx (V16QImode);
628 dreg = gen_reg_rtx (V16QImode);
629 target = gen_reg_rtx (V16QImode);
630 emit_move_insn (mask, array_to_constant (V16QImode, arr));
631 emit_move_insn (dreg, dst);
632 emit_move_insn (sreg, src);
633 emit_insn (gen_selb (target, dreg, sreg, mask));
634 emit_move_insn (dst, target);
635 }
636 return 1;
637 }
638 return 0;
639 }
640
641 enum spu_comp_code
642 { SPU_EQ, SPU_GT, SPU_GTU };
643
644
645 int spu_comp_icode[8][3] = {
646 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
647 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
648 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
649 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
650 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
651 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
652 {0, 0, 0},
653 {CODE_FOR_ceq_vec, 0, 0},
654 };
655
656 /* Generate a compare for CODE. Return a brand-new rtx that represents
657 the result of the compare. GCC can figure this out too if we don't
658 provide all variations of compares, but GCC always wants to use
659 WORD_MODE, we can generate better code in most cases if we do it
660 ourselves. */
661 void
662 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
663 {
664 int reverse_compare = 0;
665 int reverse_test = 0;
666 rtx compare_result;
667 rtx comp_rtx;
668 rtx target = operands[0];
669 enum machine_mode comp_mode;
670 enum machine_mode op_mode;
671 enum spu_comp_code scode;
672 int index;
673
674 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
675 and so on, to keep the constant in operand 1. */
676 if (GET_CODE (spu_compare_op1) == CONST_INT)
677 {
678 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
679 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
680 switch (code)
681 {
682 case GE:
683 spu_compare_op1 = GEN_INT (val);
684 code = GT;
685 break;
686 case LT:
687 spu_compare_op1 = GEN_INT (val);
688 code = LE;
689 break;
690 case GEU:
691 spu_compare_op1 = GEN_INT (val);
692 code = GTU;
693 break;
694 case LTU:
695 spu_compare_op1 = GEN_INT (val);
696 code = LEU;
697 break;
698 default:
699 break;
700 }
701 }
702
703 switch (code)
704 {
705 case GE:
706 reverse_compare = 1;
707 reverse_test = 1;
708 scode = SPU_GT;
709 break;
710 case LE:
711 reverse_compare = 0;
712 reverse_test = 1;
713 scode = SPU_GT;
714 break;
715 case LT:
716 reverse_compare = 1;
717 reverse_test = 0;
718 scode = SPU_GT;
719 break;
720 case GEU:
721 reverse_compare = 1;
722 reverse_test = 1;
723 scode = SPU_GTU;
724 break;
725 case LEU:
726 reverse_compare = 0;
727 reverse_test = 1;
728 scode = SPU_GTU;
729 break;
730 case LTU:
731 reverse_compare = 1;
732 reverse_test = 0;
733 scode = SPU_GTU;
734 break;
735 case NE:
736 reverse_compare = 0;
737 reverse_test = 1;
738 scode = SPU_EQ;
739 break;
740
741 case EQ:
742 scode = SPU_EQ;
743 break;
744 case GT:
745 scode = SPU_GT;
746 break;
747 case GTU:
748 scode = SPU_GTU;
749 break;
750 default:
751 scode = SPU_EQ;
752 break;
753 }
754
755 comp_mode = SImode;
756 op_mode = GET_MODE (spu_compare_op0);
757
758 switch (op_mode)
759 {
760 case QImode:
761 index = 0;
762 comp_mode = QImode;
763 break;
764 case HImode:
765 index = 1;
766 comp_mode = HImode;
767 break;
768 case SImode:
769 index = 2;
770 break;
771 case DImode:
772 index = 3;
773 break;
774 case TImode:
775 index = 4;
776 break;
777 case SFmode:
778 index = 5;
779 break;
780 case DFmode:
781 index = 6;
782 break;
783 case V16QImode:
784 case V8HImode:
785 case V4SImode:
786 case V2DImode:
787 case V4SFmode:
788 case V2DFmode:
789 index = 7;
790 break;
791 default:
792 abort ();
793 }
794
795 if (GET_MODE (spu_compare_op1) == DFmode)
796 {
797 rtx reg = gen_reg_rtx (DFmode);
798 if (!flag_unsafe_math_optimizations
799 || (scode != SPU_GT && scode != SPU_EQ))
800 abort ();
801 if (reverse_compare)
802 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
803 else
804 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
805 reverse_compare = 0;
806 spu_compare_op0 = reg;
807 spu_compare_op1 = CONST0_RTX (DFmode);
808 }
809
810 if (is_set == 0 && spu_compare_op1 == const0_rtx
811 && (GET_MODE (spu_compare_op0) == SImode
812 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
813 {
814 /* Don't need to set a register with the result when we are
815 comparing against zero and branching. */
816 reverse_test = !reverse_test;
817 compare_result = spu_compare_op0;
818 }
819 else
820 {
821 compare_result = gen_reg_rtx (comp_mode);
822
823 if (reverse_compare)
824 {
825 rtx t = spu_compare_op1;
826 spu_compare_op1 = spu_compare_op0;
827 spu_compare_op0 = t;
828 }
829
830 if (spu_comp_icode[index][scode] == 0)
831 abort ();
832
833 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
834 (spu_compare_op0, op_mode))
835 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
836 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
837 (spu_compare_op1, op_mode))
838 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
839 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
840 spu_compare_op0,
841 spu_compare_op1);
842 if (comp_rtx == 0)
843 abort ();
844 emit_insn (comp_rtx);
845
846 }
847
848 if (is_set == 0)
849 {
850 rtx bcomp;
851 rtx loc_ref;
852
853 /* We don't have branch on QI compare insns, so we convert the
854 QI compare result to a HI result. */
855 if (comp_mode == QImode)
856 {
857 rtx old_res = compare_result;
858 compare_result = gen_reg_rtx (HImode);
859 comp_mode = HImode;
860 emit_insn (gen_extendqihi2 (compare_result, old_res));
861 }
862
863 if (reverse_test)
864 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
865 else
866 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
867
868 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
869 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
870 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
871 loc_ref, pc_rtx)));
872 }
873 else if (is_set == 2)
874 {
875 int compare_size = GET_MODE_BITSIZE (comp_mode);
876 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
877 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
878 rtx select_mask;
879 rtx op_t = operands[2];
880 rtx op_f = operands[3];
881
882 /* The result of the comparison can be SI, HI or QI mode. Create a
883 mask based on that result. */
884 if (target_size > compare_size)
885 {
886 select_mask = gen_reg_rtx (mode);
887 emit_insn (gen_extend_compare (select_mask, compare_result));
888 }
889 else if (target_size < compare_size)
890 select_mask =
891 gen_rtx_SUBREG (mode, compare_result,
892 (compare_size - target_size) / BITS_PER_UNIT);
893 else if (comp_mode != mode)
894 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
895 else
896 select_mask = compare_result;
897
898 if (GET_MODE (target) != GET_MODE (op_t)
899 || GET_MODE (target) != GET_MODE (op_f))
900 abort ();
901
902 if (reverse_test)
903 emit_insn (gen_selb (target, op_t, op_f, select_mask));
904 else
905 emit_insn (gen_selb (target, op_f, op_t, select_mask));
906 }
907 else
908 {
909 if (reverse_test)
910 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
911 gen_rtx_NOT (comp_mode, compare_result)));
912 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
913 emit_insn (gen_extendhisi2 (target, compare_result));
914 else if (GET_MODE (target) == SImode
915 && GET_MODE (compare_result) == QImode)
916 emit_insn (gen_extend_compare (target, compare_result));
917 else
918 emit_move_insn (target, compare_result);
919 }
920 }
921
922 HOST_WIDE_INT
923 const_double_to_hwint (rtx x)
924 {
925 HOST_WIDE_INT val;
926 REAL_VALUE_TYPE rv;
927 if (GET_MODE (x) == SFmode)
928 {
929 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
930 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
931 }
932 else if (GET_MODE (x) == DFmode)
933 {
934 long l[2];
935 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
936 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
937 val = l[0];
938 val = (val << 32) | (l[1] & 0xffffffff);
939 }
940 else
941 abort ();
942 return val;
943 }
944
945 rtx
946 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
947 {
948 long tv[2];
949 REAL_VALUE_TYPE rv;
950 gcc_assert (mode == SFmode || mode == DFmode);
951
952 if (mode == SFmode)
953 tv[0] = (v << 32) >> 32;
954 else if (mode == DFmode)
955 {
956 tv[1] = (v << 32) >> 32;
957 tv[0] = v >> 32;
958 }
959 real_from_target (&rv, tv, mode);
960 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
961 }
962
963 void
964 print_operand_address (FILE * file, register rtx addr)
965 {
966 rtx reg;
967 rtx offset;
968
969 if (GET_CODE (addr) == AND
970 && GET_CODE (XEXP (addr, 1)) == CONST_INT
971 && INTVAL (XEXP (addr, 1)) == -16)
972 addr = XEXP (addr, 0);
973
974 switch (GET_CODE (addr))
975 {
976 case REG:
977 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
978 break;
979
980 case PLUS:
981 reg = XEXP (addr, 0);
982 offset = XEXP (addr, 1);
983 if (GET_CODE (offset) == REG)
984 {
985 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
986 reg_names[REGNO (offset)]);
987 }
988 else if (GET_CODE (offset) == CONST_INT)
989 {
990 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
991 INTVAL (offset), reg_names[REGNO (reg)]);
992 }
993 else
994 abort ();
995 break;
996
997 case CONST:
998 case LABEL_REF:
999 case SYMBOL_REF:
1000 case CONST_INT:
1001 output_addr_const (file, addr);
1002 break;
1003
1004 default:
1005 debug_rtx (addr);
1006 abort ();
1007 }
1008 }
1009
1010 void
1011 print_operand (FILE * file, rtx x, int code)
1012 {
1013 enum machine_mode mode = GET_MODE (x);
1014 HOST_WIDE_INT val;
1015 unsigned char arr[16];
1016 int xcode = GET_CODE (x);
1017 int i, info;
1018 if (GET_MODE (x) == VOIDmode)
1019 switch (code)
1020 {
1021 case 'L': /* 128 bits, signed */
1022 case 'm': /* 128 bits, signed */
1023 case 'T': /* 128 bits, signed */
1024 case 't': /* 128 bits, signed */
1025 mode = TImode;
1026 break;
1027 case 'K': /* 64 bits, signed */
1028 case 'k': /* 64 bits, signed */
1029 case 'D': /* 64 bits, signed */
1030 case 'd': /* 64 bits, signed */
1031 mode = DImode;
1032 break;
1033 case 'J': /* 32 bits, signed */
1034 case 'j': /* 32 bits, signed */
1035 case 's': /* 32 bits, signed */
1036 case 'S': /* 32 bits, signed */
1037 mode = SImode;
1038 break;
1039 }
1040 switch (code)
1041 {
1042
1043 case 'j': /* 32 bits, signed */
1044 case 'k': /* 64 bits, signed */
1045 case 'm': /* 128 bits, signed */
1046 if (xcode == CONST_INT
1047 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1048 {
1049 gcc_assert (logical_immediate_p (x, mode));
1050 constant_to_array (mode, x, arr);
1051 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1052 val = trunc_int_for_mode (val, SImode);
1053 switch (which_logical_immediate (val))
1054 {
1055 case SPU_ORI:
1056 break;
1057 case SPU_ORHI:
1058 fprintf (file, "h");
1059 break;
1060 case SPU_ORBI:
1061 fprintf (file, "b");
1062 break;
1063 default:
1064 gcc_unreachable();
1065 }
1066 }
1067 else
1068 gcc_unreachable();
1069 return;
1070
1071 case 'J': /* 32 bits, signed */
1072 case 'K': /* 64 bits, signed */
1073 case 'L': /* 128 bits, signed */
1074 if (xcode == CONST_INT
1075 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1076 {
1077 gcc_assert (logical_immediate_p (x, mode)
1078 || iohl_immediate_p (x, mode));
1079 constant_to_array (mode, x, arr);
1080 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1081 val = trunc_int_for_mode (val, SImode);
1082 switch (which_logical_immediate (val))
1083 {
1084 case SPU_ORI:
1085 case SPU_IOHL:
1086 break;
1087 case SPU_ORHI:
1088 val = trunc_int_for_mode (val, HImode);
1089 break;
1090 case SPU_ORBI:
1091 val = trunc_int_for_mode (val, QImode);
1092 break;
1093 default:
1094 gcc_unreachable();
1095 }
1096 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1097 }
1098 else
1099 gcc_unreachable();
1100 return;
1101
1102 case 't': /* 128 bits, signed */
1103 case 'd': /* 64 bits, signed */
1104 case 's': /* 32 bits, signed */
1105 if (CONSTANT_P (x))
1106 {
1107 enum immediate_class c = classify_immediate (x, mode);
1108 switch (c)
1109 {
1110 case IC_IL1:
1111 constant_to_array (mode, x, arr);
1112 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1113 val = trunc_int_for_mode (val, SImode);
1114 switch (which_immediate_load (val))
1115 {
1116 case SPU_IL:
1117 break;
1118 case SPU_ILA:
1119 fprintf (file, "a");
1120 break;
1121 case SPU_ILH:
1122 fprintf (file, "h");
1123 break;
1124 case SPU_ILHU:
1125 fprintf (file, "hu");
1126 break;
1127 default:
1128 gcc_unreachable ();
1129 }
1130 break;
1131 case IC_CPAT:
1132 constant_to_array (mode, x, arr);
1133 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1134 if (info == 1)
1135 fprintf (file, "b");
1136 else if (info == 2)
1137 fprintf (file, "h");
1138 else if (info == 4)
1139 fprintf (file, "w");
1140 else if (info == 8)
1141 fprintf (file, "d");
1142 break;
1143 case IC_IL1s:
1144 if (xcode == CONST_VECTOR)
1145 {
1146 x = CONST_VECTOR_ELT (x, 0);
1147 xcode = GET_CODE (x);
1148 }
1149 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1150 fprintf (file, "a");
1151 else if (xcode == HIGH)
1152 fprintf (file, "hu");
1153 break;
1154 case IC_FSMBI:
1155 case IC_FSMBI2:
1156 case IC_IL2:
1157 case IC_IL2s:
1158 case IC_POOL:
1159 abort ();
1160 }
1161 }
1162 else
1163 gcc_unreachable ();
1164 return;
1165
1166 case 'T': /* 128 bits, signed */
1167 case 'D': /* 64 bits, signed */
1168 case 'S': /* 32 bits, signed */
1169 if (CONSTANT_P (x))
1170 {
1171 enum immediate_class c = classify_immediate (x, mode);
1172 switch (c)
1173 {
1174 case IC_IL1:
1175 constant_to_array (mode, x, arr);
1176 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1177 val = trunc_int_for_mode (val, SImode);
1178 switch (which_immediate_load (val))
1179 {
1180 case SPU_IL:
1181 case SPU_ILA:
1182 break;
1183 case SPU_ILH:
1184 case SPU_ILHU:
1185 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1186 break;
1187 default:
1188 gcc_unreachable ();
1189 }
1190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1191 break;
1192 case IC_FSMBI:
1193 constant_to_array (mode, x, arr);
1194 val = 0;
1195 for (i = 0; i < 16; i++)
1196 {
1197 val <<= 1;
1198 val |= arr[i] & 1;
1199 }
1200 print_operand (file, GEN_INT (val), 0);
1201 break;
1202 case IC_CPAT:
1203 constant_to_array (mode, x, arr);
1204 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1205 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1206 break;
1207 case IC_IL1s:
1208 if (xcode == HIGH)
1209 x = XEXP (x, 0);
1210 if (GET_CODE (x) == CONST_VECTOR)
1211 x = CONST_VECTOR_ELT (x, 0);
1212 output_addr_const (file, x);
1213 if (xcode == HIGH)
1214 fprintf (file, "@h");
1215 break;
1216 case IC_IL2:
1217 case IC_IL2s:
1218 case IC_FSMBI2:
1219 case IC_POOL:
1220 abort ();
1221 }
1222 }
1223 else
1224 gcc_unreachable ();
1225 return;
1226
1227 case 'C':
1228 if (xcode == CONST_INT)
1229 {
1230 /* Only 4 least significant bits are relevant for generate
1231 control word instructions. */
1232 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1233 return;
1234 }
1235 break;
1236
1237 case 'M': /* print code for c*d */
1238 if (GET_CODE (x) == CONST_INT)
1239 switch (INTVAL (x))
1240 {
1241 case 1:
1242 fprintf (file, "b");
1243 break;
1244 case 2:
1245 fprintf (file, "h");
1246 break;
1247 case 4:
1248 fprintf (file, "w");
1249 break;
1250 case 8:
1251 fprintf (file, "d");
1252 break;
1253 default:
1254 gcc_unreachable();
1255 }
1256 else
1257 gcc_unreachable();
1258 return;
1259
1260 case 'N': /* Negate the operand */
1261 if (xcode == CONST_INT)
1262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1263 else if (xcode == CONST_VECTOR)
1264 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1265 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1266 return;
1267
1268 case 'I': /* enable/disable interrupts */
1269 if (xcode == CONST_INT)
1270 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1271 return;
1272
1273 case 'b': /* branch modifiers */
1274 if (xcode == REG)
1275 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1276 else if (COMPARISON_P (x))
1277 fprintf (file, "%s", xcode == NE ? "n" : "");
1278 return;
1279
1280 case 'i': /* indirect call */
1281 if (xcode == MEM)
1282 {
1283 if (GET_CODE (XEXP (x, 0)) == REG)
1284 /* Used in indirect function calls. */
1285 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1286 else
1287 output_address (XEXP (x, 0));
1288 }
1289 return;
1290
1291 case 'p': /* load/store */
1292 if (xcode == MEM)
1293 {
1294 x = XEXP (x, 0);
1295 xcode = GET_CODE (x);
1296 }
1297 if (xcode == AND)
1298 {
1299 x = XEXP (x, 0);
1300 xcode = GET_CODE (x);
1301 }
1302 if (xcode == REG)
1303 fprintf (file, "d");
1304 else if (xcode == CONST_INT)
1305 fprintf (file, "a");
1306 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1307 fprintf (file, "r");
1308 else if (xcode == PLUS || xcode == LO_SUM)
1309 {
1310 if (GET_CODE (XEXP (x, 1)) == REG)
1311 fprintf (file, "x");
1312 else
1313 fprintf (file, "d");
1314 }
1315 return;
1316
1317 case 'e':
1318 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1319 val &= 0x7;
1320 output_addr_const (file, GEN_INT (val));
1321 return;
1322
1323 case 'f':
1324 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1325 val &= 0x1f;
1326 output_addr_const (file, GEN_INT (val));
1327 return;
1328
1329 case 'g':
1330 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1331 val &= 0x3f;
1332 output_addr_const (file, GEN_INT (val));
1333 return;
1334
1335 case 'h':
1336 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1337 val = (val >> 3) & 0x1f;
1338 output_addr_const (file, GEN_INT (val));
1339 return;
1340
1341 case 'E':
1342 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1343 val = -val;
1344 val &= 0x7;
1345 output_addr_const (file, GEN_INT (val));
1346 return;
1347
1348 case 'F':
1349 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1350 val = -val;
1351 val &= 0x1f;
1352 output_addr_const (file, GEN_INT (val));
1353 return;
1354
1355 case 'G':
1356 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1357 val = -val;
1358 val &= 0x3f;
1359 output_addr_const (file, GEN_INT (val));
1360 return;
1361
1362 case 'H':
1363 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1364 val = -(val & -8ll);
1365 val = (val >> 3) & 0x1f;
1366 output_addr_const (file, GEN_INT (val));
1367 return;
1368
1369 case 0:
1370 if (xcode == REG)
1371 fprintf (file, "%s", reg_names[REGNO (x)]);
1372 else if (xcode == MEM)
1373 output_address (XEXP (x, 0));
1374 else if (xcode == CONST_VECTOR)
1375 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1376 else
1377 output_addr_const (file, x);
1378 return;
1379
1380 /* unused letters
1381 o qr uvw yz
1382 AB OPQR UVWXYZ */
1383 default:
1384 output_operand_lossage ("invalid %%xn code");
1385 }
1386 gcc_unreachable ();
1387 }
1388
1389 extern char call_used_regs[];
1390
1391 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1392 caller saved register. For leaf functions it is more efficient to
1393 use a volatile register because we won't need to save and restore the
1394 pic register. This routine is only valid after register allocation
1395 is completed, so we can pick an unused register. */
1396 static rtx
1397 get_pic_reg (void)
1398 {
1399 rtx pic_reg = pic_offset_table_rtx;
1400 if (!reload_completed && !reload_in_progress)
1401 abort ();
1402 return pic_reg;
1403 }
1404
1405 /* Split constant addresses to handle cases that are too large.
1406 Add in the pic register when in PIC mode.
1407 Split immediates that require more than 1 instruction. */
1408 int
1409 spu_split_immediate (rtx * ops)
1410 {
1411 enum machine_mode mode = GET_MODE (ops[0]);
1412 enum immediate_class c = classify_immediate (ops[1], mode);
1413
1414 switch (c)
1415 {
1416 case IC_IL2:
1417 {
1418 unsigned char arrhi[16];
1419 unsigned char arrlo[16];
1420 rtx to, hi, lo;
1421 int i;
1422 constant_to_array (mode, ops[1], arrhi);
1423 to = no_new_pseudos ? ops[0] : gen_reg_rtx (mode);
1424 for (i = 0; i < 16; i += 4)
1425 {
1426 arrlo[i + 2] = arrhi[i + 2];
1427 arrlo[i + 3] = arrhi[i + 3];
1428 arrlo[i + 0] = arrlo[i + 1] = 0;
1429 arrhi[i + 2] = arrhi[i + 3] = 0;
1430 }
1431 hi = array_to_constant (mode, arrhi);
1432 lo = array_to_constant (mode, arrlo);
1433 emit_move_insn (to, hi);
1434 emit_insn (gen_rtx_SET
1435 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1436 return 1;
1437 }
1438 case IC_FSMBI2:
1439 {
1440 unsigned char arr_fsmbi[16];
1441 unsigned char arr_andbi[16];
1442 rtx to, reg_fsmbi, reg_and;
1443 int i;
1444 enum machine_mode imode = mode;
1445 /* We need to do reals as ints because the constant used in the
1446 * AND might not be a legitimate real constant. */
1447 imode = int_mode_for_mode (mode);
1448 constant_to_array (mode, ops[1], arr_fsmbi);
1449 if (imode != mode)
1450 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1451 else
1452 to = ops[0];
1453 for (i = 0; i < 16; i++)
1454 if (arr_fsmbi[i] != 0)
1455 {
1456 arr_andbi[0] = arr_fsmbi[i];
1457 arr_fsmbi[i] = 0xff;
1458 }
1459 for (i = 1; i < 16; i++)
1460 arr_andbi[i] = arr_andbi[0];
1461 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1462 reg_and = array_to_constant (imode, arr_andbi);
1463 emit_move_insn (to, reg_fsmbi);
1464 emit_insn (gen_rtx_SET
1465 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1466 return 1;
1467 }
1468 case IC_POOL:
1469 if (reload_in_progress || reload_completed)
1470 {
1471 rtx mem = force_const_mem (mode, ops[1]);
1472 if (TARGET_LARGE_MEM)
1473 {
1474 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1475 emit_move_insn (addr, XEXP (mem, 0));
1476 mem = replace_equiv_address (mem, addr);
1477 }
1478 emit_move_insn (ops[0], mem);
1479 return 1;
1480 }
1481 break;
1482 case IC_IL1s:
1483 case IC_IL2s:
1484 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1485 {
1486 if (c == IC_IL2s)
1487 {
1488 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1489 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1490 }
1491 else if (flag_pic)
1492 emit_insn (gen_pic (ops[0], ops[1]));
1493 if (flag_pic)
1494 {
1495 rtx pic_reg = get_pic_reg ();
1496 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1497 current_function_uses_pic_offset_table = 1;
1498 }
1499 return flag_pic || c == IC_IL2s;
1500 }
1501 break;
1502 case IC_IL1:
1503 case IC_FSMBI:
1504 case IC_CPAT:
1505 break;
1506 }
1507 return 0;
1508 }
1509
1510 /* SAVING is TRUE when we are generating the actual load and store
1511 instructions for REGNO. When determining the size of the stack
1512 needed for saving register we must allocate enough space for the
1513 worst case, because we don't always have the information early enough
1514 to not allocate it. But we can at least eliminate the actual loads
1515 and stores during the prologue/epilogue. */
1516 static int
1517 need_to_save_reg (int regno, int saving)
1518 {
1519 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1520 return 1;
1521 if (flag_pic
1522 && regno == PIC_OFFSET_TABLE_REGNUM
1523 && (!saving || current_function_uses_pic_offset_table)
1524 && (!saving
1525 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1526 return 1;
1527 return 0;
1528 }
1529
1530 /* This function is only correct starting with local register
1531 allocation */
1532 int
1533 spu_saved_regs_size (void)
1534 {
1535 int reg_save_size = 0;
1536 int regno;
1537
1538 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1539 if (need_to_save_reg (regno, 0))
1540 reg_save_size += 0x10;
1541 return reg_save_size;
1542 }
1543
1544 static rtx
1545 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1546 {
1547 rtx reg = gen_rtx_REG (V4SImode, regno);
1548 rtx mem =
1549 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1550 return emit_insn (gen_movv4si (mem, reg));
1551 }
1552
1553 static rtx
1554 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1555 {
1556 rtx reg = gen_rtx_REG (V4SImode, regno);
1557 rtx mem =
1558 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1559 return emit_insn (gen_movv4si (reg, mem));
1560 }
1561
1562 /* This happens after reload, so we need to expand it. */
1563 static rtx
1564 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1565 {
1566 rtx insn;
1567 if (satisfies_constraint_K (GEN_INT (imm)))
1568 {
1569 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1570 }
1571 else
1572 {
1573 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1574 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1575 if (REGNO (src) == REGNO (scratch))
1576 abort ();
1577 }
1578 return insn;
1579 }
1580
1581 /* Return nonzero if this function is known to have a null epilogue. */
1582
1583 int
1584 direct_return (void)
1585 {
1586 if (reload_completed)
1587 {
1588 if (cfun->static_chain_decl == 0
1589 && (spu_saved_regs_size ()
1590 + get_frame_size ()
1591 + current_function_outgoing_args_size
1592 + current_function_pretend_args_size == 0)
1593 && current_function_is_leaf)
1594 return 1;
1595 }
1596 return 0;
1597 }
1598
1599 /*
1600 The stack frame looks like this:
1601 +-------------+
1602 | incoming |
1603 AP | args |
1604 +-------------+
1605 | $lr save |
1606 +-------------+
1607 prev SP | back chain |
1608 +-------------+
1609 | var args |
1610 | reg save | current_function_pretend_args_size bytes
1611 +-------------+
1612 | ... |
1613 | saved regs | spu_saved_regs_size() bytes
1614 +-------------+
1615 | ... |
1616 FP | vars | get_frame_size() bytes
1617 +-------------+
1618 | ... |
1619 | outgoing |
1620 | args | current_function_outgoing_args_size bytes
1621 +-------------+
1622 | $lr of next |
1623 | frame |
1624 +-------------+
1625 SP | back chain |
1626 +-------------+
1627
1628 */
1629 void
1630 spu_expand_prologue (void)
1631 {
1632 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1633 HOST_WIDE_INT total_size;
1634 HOST_WIDE_INT saved_regs_size;
1635 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1636 rtx scratch_reg_0, scratch_reg_1;
1637 rtx insn, real;
1638
1639 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1640 the "toplevel" insn chain. */
1641 emit_note (NOTE_INSN_DELETED);
1642
1643 if (flag_pic && optimize == 0)
1644 current_function_uses_pic_offset_table = 1;
1645
1646 if (spu_naked_function_p (current_function_decl))
1647 return;
1648
1649 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1650 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1651
1652 saved_regs_size = spu_saved_regs_size ();
1653 total_size = size + saved_regs_size
1654 + current_function_outgoing_args_size
1655 + current_function_pretend_args_size;
1656
1657 if (!current_function_is_leaf
1658 || current_function_calls_alloca || total_size > 0)
1659 total_size += STACK_POINTER_OFFSET;
1660
1661 /* Save this first because code after this might use the link
1662 register as a scratch register. */
1663 if (!current_function_is_leaf)
1664 {
1665 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1666 RTX_FRAME_RELATED_P (insn) = 1;
1667 }
1668
1669 if (total_size > 0)
1670 {
1671 offset = -current_function_pretend_args_size;
1672 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1673 if (need_to_save_reg (regno, 1))
1674 {
1675 offset -= 16;
1676 insn = frame_emit_store (regno, sp_reg, offset);
1677 RTX_FRAME_RELATED_P (insn) = 1;
1678 }
1679 }
1680
1681 if (flag_pic && current_function_uses_pic_offset_table)
1682 {
1683 rtx pic_reg = get_pic_reg ();
1684 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1685 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1686 }
1687
1688 if (total_size > 0)
1689 {
1690 if (flag_stack_check)
1691 {
1692 /* We compare against total_size-1 because
1693 ($sp >= total_size) <=> ($sp > total_size-1) */
1694 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1695 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1696 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1697 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1698 {
1699 emit_move_insn (scratch_v4si, size_v4si);
1700 size_v4si = scratch_v4si;
1701 }
1702 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1703 emit_insn (gen_vec_extractv4si
1704 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1705 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1706 }
1707
1708 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1709 the value of the previous $sp because we save it as the back
1710 chain. */
1711 if (total_size <= 2000)
1712 {
1713 /* In this case we save the back chain first. */
1714 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1715 insn =
1716 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1717 }
1718 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1719 {
1720 insn = emit_move_insn (scratch_reg_0, sp_reg);
1721 insn =
1722 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1723 }
1724 else
1725 {
1726 insn = emit_move_insn (scratch_reg_0, sp_reg);
1727 insn =
1728 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1729 }
1730 RTX_FRAME_RELATED_P (insn) = 1;
1731 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1732 REG_NOTES (insn) =
1733 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1734
1735 if (total_size > 2000)
1736 {
1737 /* Save the back chain ptr */
1738 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1739 }
1740
1741 if (frame_pointer_needed)
1742 {
1743 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1744 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1745 + current_function_outgoing_args_size;
1746 /* Set the new frame_pointer */
1747 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1748 RTX_FRAME_RELATED_P (insn) = 1;
1749 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1750 REG_NOTES (insn) =
1751 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1752 real, REG_NOTES (insn));
1753 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1754 }
1755 }
1756
1757 emit_note (NOTE_INSN_DELETED);
1758 }
1759
1760 void
1761 spu_expand_epilogue (bool sibcall_p)
1762 {
1763 int size = get_frame_size (), offset, regno;
1764 HOST_WIDE_INT saved_regs_size, total_size;
1765 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1766 rtx jump, scratch_reg_0;
1767
1768 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1769 the "toplevel" insn chain. */
1770 emit_note (NOTE_INSN_DELETED);
1771
1772 if (spu_naked_function_p (current_function_decl))
1773 return;
1774
1775 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1776
1777 saved_regs_size = spu_saved_regs_size ();
1778 total_size = size + saved_regs_size
1779 + current_function_outgoing_args_size
1780 + current_function_pretend_args_size;
1781
1782 if (!current_function_is_leaf
1783 || current_function_calls_alloca || total_size > 0)
1784 total_size += STACK_POINTER_OFFSET;
1785
1786 if (total_size > 0)
1787 {
1788 if (current_function_calls_alloca)
1789 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1790 else
1791 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1792
1793
1794 if (saved_regs_size > 0)
1795 {
1796 offset = -current_function_pretend_args_size;
1797 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1798 if (need_to_save_reg (regno, 1))
1799 {
1800 offset -= 0x10;
1801 frame_emit_load (regno, sp_reg, offset);
1802 }
1803 }
1804 }
1805
1806 if (!current_function_is_leaf)
1807 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1808
1809 if (!sibcall_p)
1810 {
1811 emit_insn (gen_rtx_USE
1812 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1813 jump = emit_jump_insn (gen__return ());
1814 emit_barrier_after (jump);
1815 }
1816
1817 emit_note (NOTE_INSN_DELETED);
1818 }
1819
1820 rtx
1821 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1822 {
1823 if (count != 0)
1824 return 0;
1825 /* This is inefficient because it ends up copying to a save-register
1826 which then gets saved even though $lr has already been saved. But
1827 it does generate better code for leaf functions and we don't need
1828 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1829 used for __builtin_return_address anyway, so maybe we don't care if
1830 it's inefficient. */
1831 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1832 }
1833 \f
1834
1835 /* Given VAL, generate a constant appropriate for MODE.
1836 If MODE is a vector mode, every element will be VAL.
1837 For TImode, VAL will be zero extended to 128 bits. */
1838 rtx
1839 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1840 {
1841 rtx inner;
1842 rtvec v;
1843 int units, i;
1844
1845 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1846 || GET_MODE_CLASS (mode) == MODE_FLOAT
1847 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1848 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1849
1850 if (GET_MODE_CLASS (mode) == MODE_INT)
1851 return immed_double_const (val, 0, mode);
1852
1853 /* val is the bit representation of the float */
1854 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1855 return hwint_to_const_double (mode, val);
1856
1857 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1858 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1859 else
1860 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1861
1862 units = GET_MODE_NUNITS (mode);
1863
1864 v = rtvec_alloc (units);
1865
1866 for (i = 0; i < units; ++i)
1867 RTVEC_ELT (v, i) = inner;
1868
1869 return gen_rtx_CONST_VECTOR (mode, v);
1870 }
1871 \f
1872 /* branch hint stuff */
1873
1874 /* The hardware requires 8 insns between a hint and the branch it
1875 effects. This variable describes how many rtl instructions the
1876 compiler needs to see before inserting a hint. (FIXME: We should
1877 accept less and insert nops to enforce it because hinting is always
1878 profitable for performance, but we do need to be careful of code
1879 size.) */
1880 int spu_hint_dist = (8 * 4);
1881
1882 /* An array of these is used to propagate hints to predecessor blocks. */
1883 struct spu_bb_info
1884 {
1885 rtx prop_jump; /* propagated from another block */
1886 basic_block bb; /* the original block. */
1887 };
1888
1889 /* The special $hbr register is used to prevent the insn scheduler from
1890 moving hbr insns across instructions which invalidate them. It
1891 should only be used in a clobber, and this function searches for
1892 insns which clobber it. */
1893 static bool
1894 insn_clobbers_hbr (rtx insn)
1895 {
1896 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1897 {
1898 rtx parallel = PATTERN (insn);
1899 rtx clobber;
1900 int j;
1901 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1902 {
1903 clobber = XVECEXP (parallel, 0, j);
1904 if (GET_CODE (clobber) == CLOBBER
1905 && GET_CODE (XEXP (clobber, 0)) == REG
1906 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1907 return 1;
1908 }
1909 }
1910 return 0;
1911 }
1912
1913 static void
1914 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1915 {
1916 rtx branch_label;
1917 rtx hint, insn, prev, next;
1918
1919 if (before == 0 || branch == 0 || target == 0)
1920 return;
1921
1922 if (distance > 600)
1923 return;
1924
1925
1926 branch_label = gen_label_rtx ();
1927 LABEL_NUSES (branch_label)++;
1928 LABEL_PRESERVE_P (branch_label) = 1;
1929 insn = emit_label_before (branch_label, branch);
1930 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1931
1932 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1933 the current insn is pipe0, dual issue with it. */
1934 prev = prev_active_insn (before);
1935 if (prev && get_pipe (prev) == 0)
1936 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1937 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1938 {
1939 next = next_active_insn (before);
1940 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1941 if (next)
1942 PUT_MODE (next, TImode);
1943 }
1944 else
1945 {
1946 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1947 PUT_MODE (hint, TImode);
1948 }
1949 recog_memoized (hint);
1950 }
1951
1952 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1953 the rtx for the branch target. */
1954 static rtx
1955 get_branch_target (rtx branch)
1956 {
1957 if (GET_CODE (branch) == JUMP_INSN)
1958 {
1959 rtx set, src;
1960
1961 /* Return statements */
1962 if (GET_CODE (PATTERN (branch)) == RETURN)
1963 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1964
1965 /* jump table */
1966 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1967 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1968 return 0;
1969
1970 set = single_set (branch);
1971 src = SET_SRC (set);
1972 if (GET_CODE (SET_DEST (set)) != PC)
1973 abort ();
1974
1975 if (GET_CODE (src) == IF_THEN_ELSE)
1976 {
1977 rtx lab = 0;
1978 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1979 if (note)
1980 {
1981 /* If the more probable case is not a fall through, then
1982 try a branch hint. */
1983 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1984 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1985 && GET_CODE (XEXP (src, 1)) != PC)
1986 lab = XEXP (src, 1);
1987 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1988 && GET_CODE (XEXP (src, 2)) != PC)
1989 lab = XEXP (src, 2);
1990 }
1991 if (lab)
1992 {
1993 if (GET_CODE (lab) == RETURN)
1994 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1995 return lab;
1996 }
1997 return 0;
1998 }
1999
2000 return src;
2001 }
2002 else if (GET_CODE (branch) == CALL_INSN)
2003 {
2004 rtx call;
2005 /* All of our call patterns are in a PARALLEL and the CALL is
2006 the first pattern in the PARALLEL. */
2007 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2008 abort ();
2009 call = XVECEXP (PATTERN (branch), 0, 0);
2010 if (GET_CODE (call) == SET)
2011 call = SET_SRC (call);
2012 if (GET_CODE (call) != CALL)
2013 abort ();
2014 return XEXP (XEXP (call, 0), 0);
2015 }
2016 return 0;
2017 }
2018
2019 static void
2020 insert_branch_hints (void)
2021 {
2022 struct spu_bb_info *spu_bb_info;
2023 rtx branch, insn, next;
2024 rtx branch_target = 0;
2025 int branch_addr = 0, insn_addr, head_addr;
2026 basic_block bb;
2027 unsigned int j;
2028
2029 spu_bb_info =
2030 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2031 sizeof (struct spu_bb_info));
2032
2033 /* We need exact insn addresses and lengths. */
2034 shorten_branches (get_insns ());
2035
2036 FOR_EACH_BB_REVERSE (bb)
2037 {
2038 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2039 branch = 0;
2040 if (spu_bb_info[bb->index].prop_jump)
2041 {
2042 branch = spu_bb_info[bb->index].prop_jump;
2043 branch_target = get_branch_target (branch);
2044 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2045 }
2046 /* Search from end of a block to beginning. In this loop, find
2047 jumps which need a branch and emit them only when:
2048 - it's an indirect branch and we're at the insn which sets
2049 the register
2050 - we're at an insn that will invalidate the hint. e.g., a
2051 call, another hint insn, inline asm that clobbers $hbr, and
2052 some inlined operations (divmodsi4). Don't consider jumps
2053 because they are only at the end of a block and are
2054 considered when we are deciding whether to propagate
2055 - we're getting too far away from the branch. The hbr insns
2056 only have a signed 10-bit offset
2057 We go back as far as possible so the branch will be considered
2058 for propagation when we get to the beginning of the block. */
2059 next = 0;
2060 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2061 {
2062 if (INSN_P (insn))
2063 {
2064 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2065 if (branch && next
2066 && ((GET_CODE (branch_target) == REG
2067 && set_of (branch_target, insn) != NULL_RTX)
2068 || insn_clobbers_hbr (insn)
2069 || branch_addr - insn_addr > 600))
2070 {
2071 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2072 if (insn != BB_END (bb)
2073 && branch_addr - next_addr >= spu_hint_dist)
2074 {
2075 if (dump_file)
2076 fprintf (dump_file,
2077 "hint for %i in block %i before %i\n",
2078 INSN_UID (branch), bb->index, INSN_UID (next));
2079 spu_emit_branch_hint (next, branch, branch_target,
2080 branch_addr - next_addr);
2081 }
2082 branch = 0;
2083 }
2084
2085 /* JUMP_P will only be true at the end of a block. When
2086 branch is already set it means we've previously decided
2087 to propagate a hint for that branch into this block. */
2088 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2089 {
2090 branch = 0;
2091 if ((branch_target = get_branch_target (insn)))
2092 {
2093 branch = insn;
2094 branch_addr = insn_addr;
2095 }
2096 }
2097
2098 /* When a branch hint is emitted it will be inserted
2099 before "next". Make sure next is the beginning of a
2100 cycle to minimize impact on the scheduled insns. */
2101 if (GET_MODE (insn) == TImode)
2102 next = insn;
2103 }
2104 if (insn == BB_HEAD (bb))
2105 break;
2106 }
2107
2108 if (branch)
2109 {
2110 /* If we haven't emitted a hint for this branch yet, it might
2111 be profitable to emit it in one of the predecessor blocks,
2112 especially for loops. */
2113 rtx bbend;
2114 basic_block prev = 0, prop = 0, prev2 = 0;
2115 int loop_exit = 0, simple_loop = 0;
2116 int next_addr = 0;
2117 if (next)
2118 next_addr = INSN_ADDRESSES (INSN_UID (next));
2119
2120 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2121 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2122 prev = EDGE_PRED (bb, j)->src;
2123 else
2124 prev2 = EDGE_PRED (bb, j)->src;
2125
2126 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2127 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2128 loop_exit = 1;
2129 else if (EDGE_SUCC (bb, j)->dest == bb)
2130 simple_loop = 1;
2131
2132 /* If this branch is a loop exit then propagate to previous
2133 fallthru block. This catches the cases when it is a simple
2134 loop or when there is an initial branch into the loop. */
2135 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2136 prop = prev;
2137
2138 /* If there is only one adjacent predecessor. Don't propagate
2139 outside this loop. This loop_depth test isn't perfect, but
2140 I'm not sure the loop_father member is valid at this point. */
2141 else if (prev && single_pred_p (bb)
2142 && prev->loop_depth == bb->loop_depth)
2143 prop = prev;
2144
2145 /* If this is the JOIN block of a simple IF-THEN then
2146 propagate the hint to the HEADER block. */
2147 else if (prev && prev2
2148 && EDGE_COUNT (bb->preds) == 2
2149 && EDGE_COUNT (prev->preds) == 1
2150 && EDGE_PRED (prev, 0)->src == prev2
2151 && prev2->loop_depth == bb->loop_depth
2152 && GET_CODE (branch_target) != REG)
2153 prop = prev;
2154
2155 /* Don't propagate when:
2156 - this is a simple loop and the hint would be too far
2157 - this is not a simple loop and there are 16 insns in
2158 this block already
2159 - the predecessor block ends in a branch that will be
2160 hinted
2161 - the predecessor block ends in an insn that invalidates
2162 the hint */
2163 if (prop
2164 && prop->index >= 0
2165 && (bbend = BB_END (prop))
2166 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2167 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2168 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2169 {
2170 if (dump_file)
2171 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2172 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2173 bb->index, prop->index, bb->loop_depth,
2174 INSN_UID (branch), loop_exit, simple_loop,
2175 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2176
2177 spu_bb_info[prop->index].prop_jump = branch;
2178 spu_bb_info[prop->index].bb = bb;
2179 }
2180 else if (next && branch_addr - next_addr >= spu_hint_dist)
2181 {
2182 if (dump_file)
2183 fprintf (dump_file, "hint for %i in block %i before %i\n",
2184 INSN_UID (branch), bb->index, INSN_UID (next));
2185 spu_emit_branch_hint (next, branch, branch_target,
2186 branch_addr - next_addr);
2187 }
2188 branch = 0;
2189 }
2190 }
2191 free (spu_bb_info);
2192 }
2193 \f
2194 /* Emit a nop for INSN such that the two will dual issue. This assumes
2195 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2196 We check for TImode to handle a MULTI1 insn which has dual issued its
2197 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2198 ADDR_VEC insns. */
2199 static void
2200 emit_nop_for_insn (rtx insn)
2201 {
2202 int p;
2203 rtx new_insn;
2204 p = get_pipe (insn);
2205 if (p == 1 && GET_MODE (insn) == TImode)
2206 {
2207 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2208 PUT_MODE (new_insn, TImode);
2209 PUT_MODE (insn, VOIDmode);
2210 }
2211 else
2212 new_insn = emit_insn_after (gen_lnop (), insn);
2213 }
2214
2215 /* Insert nops in basic blocks to meet dual issue alignment
2216 requirements. */
2217 static void
2218 insert_nops (void)
2219 {
2220 rtx insn, next_insn, prev_insn;
2221 int length;
2222 int addr;
2223
2224 /* This sets up INSN_ADDRESSES. */
2225 shorten_branches (get_insns ());
2226
2227 /* Keep track of length added by nops. */
2228 length = 0;
2229
2230 prev_insn = 0;
2231 for (insn = get_insns (); insn; insn = next_insn)
2232 {
2233 next_insn = next_active_insn (insn);
2234 addr = INSN_ADDRESSES (INSN_UID (insn));
2235 if (GET_MODE (insn) == TImode
2236 && next_insn
2237 && GET_MODE (next_insn) != TImode
2238 && ((addr + length) & 7) != 0)
2239 {
2240 /* prev_insn will always be set because the first insn is
2241 always 8-byte aligned. */
2242 emit_nop_for_insn (prev_insn);
2243 length += 4;
2244 }
2245 prev_insn = insn;
2246 }
2247 }
2248
2249 static void
2250 spu_machine_dependent_reorg (void)
2251 {
2252 if (optimize > 0)
2253 {
2254 if (TARGET_BRANCH_HINTS)
2255 insert_branch_hints ();
2256 insert_nops ();
2257 }
2258 }
2259 \f
2260
2261 /* Insn scheduling routines, primarily for dual issue. */
2262 static int
2263 spu_sched_issue_rate (void)
2264 {
2265 return 2;
2266 }
2267
2268 static int
2269 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2270 int verbose ATTRIBUTE_UNUSED, rtx insn,
2271 int can_issue_more)
2272 {
2273 if (GET_CODE (PATTERN (insn)) != USE
2274 && GET_CODE (PATTERN (insn)) != CLOBBER
2275 && get_pipe (insn) != -2)
2276 can_issue_more--;
2277 return can_issue_more;
2278 }
2279
2280 static int
2281 get_pipe (rtx insn)
2282 {
2283 enum attr_type t;
2284 /* Handle inline asm */
2285 if (INSN_CODE (insn) == -1)
2286 return -1;
2287 t = get_attr_type (insn);
2288 switch (t)
2289 {
2290 case TYPE_CONVERT:
2291 return -2;
2292 case TYPE_MULTI0:
2293 return -1;
2294
2295 case TYPE_FX2:
2296 case TYPE_FX3:
2297 case TYPE_SPR:
2298 case TYPE_NOP:
2299 case TYPE_FXB:
2300 case TYPE_FPD:
2301 case TYPE_FP6:
2302 case TYPE_FP7:
2303 case TYPE_IPREFETCH:
2304 return 0;
2305
2306 case TYPE_LNOP:
2307 case TYPE_SHUF:
2308 case TYPE_LOAD:
2309 case TYPE_STORE:
2310 case TYPE_BR:
2311 case TYPE_MULTI1:
2312 case TYPE_HBR:
2313 return 1;
2314 default:
2315 abort ();
2316 }
2317 }
2318
2319 static int
2320 spu_sched_adjust_priority (rtx insn, int pri)
2321 {
2322 int p = get_pipe (insn);
2323 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2324 * scheduling. */
2325 if (GET_CODE (PATTERN (insn)) == USE
2326 || GET_CODE (PATTERN (insn)) == CLOBBER
2327 || p == -2)
2328 return pri + 100;
2329 /* Schedule pipe0 insns early for greedier dual issue. */
2330 if (p != 1)
2331 return pri + 50;
2332 return pri;
2333 }
2334
2335 /* INSN is dependent on DEP_INSN. */
2336 static int
2337 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2338 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2339 {
2340 if (GET_CODE (insn) == CALL_INSN)
2341 return cost - 2;
2342 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2343 scheduler makes every insn in a block anti-dependent on the final
2344 jump_insn. We adjust here so higher cost insns will get scheduled
2345 earlier. */
2346 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2347 return insn_cost (dep_insn) - 3;
2348 return cost;
2349 }
2350 \f
2351 /* Create a CONST_DOUBLE from a string. */
2352 struct rtx_def *
2353 spu_float_const (const char *string, enum machine_mode mode)
2354 {
2355 REAL_VALUE_TYPE value;
2356 value = REAL_VALUE_ATOF (string, mode);
2357 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2358 }
2359
2360 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2361 CONST_INT fits constraint 'K', i.e., is small. */
2362 int
2363 legitimate_const (rtx x, int aligned)
2364 {
2365 /* We can never know if the resulting address fits in 18 bits and can be
2366 loaded with ila. Instead we should use the HI and LO relocations to
2367 load a 32-bit address. */
2368 rtx sym, cst;
2369
2370 gcc_assert (GET_CODE (x) == CONST);
2371
2372 if (GET_CODE (XEXP (x, 0)) != PLUS)
2373 return 0;
2374 sym = XEXP (XEXP (x, 0), 0);
2375 cst = XEXP (XEXP (x, 0), 1);
2376 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2377 return 0;
2378 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2379 return 0;
2380 return satisfies_constraint_K (cst);
2381 }
2382
2383 int
2384 spu_constant_address_p (rtx x)
2385 {
2386 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2387 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2388 || GET_CODE (x) == HIGH);
2389 }
2390
2391 static enum spu_immediate
2392 which_immediate_load (HOST_WIDE_INT val)
2393 {
2394 gcc_assert (val == trunc_int_for_mode (val, SImode));
2395
2396 if (val >= -0x8000 && val <= 0x7fff)
2397 return SPU_IL;
2398 if (val >= 0 && val <= 0x3ffff)
2399 return SPU_ILA;
2400 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2401 return SPU_ILH;
2402 if ((val & 0xffff) == 0)
2403 return SPU_ILHU;
2404
2405 return SPU_NONE;
2406 }
2407
2408 /* Return true when OP can be loaded by one of the il instructions, or
2409 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2410 int
2411 immediate_load_p (rtx op, enum machine_mode mode)
2412 {
2413 if (CONSTANT_P (op))
2414 {
2415 enum immediate_class c = classify_immediate (op, mode);
2416 return c == IC_IL1 || c == IC_IL1s
2417 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
2418 }
2419 return 0;
2420 }
2421
2422 /* Return true if the first SIZE bytes of arr is a constant that can be
2423 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2424 represent the size and offset of the instruction to use. */
2425 static int
2426 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2427 {
2428 int cpat, run, i, start;
2429 cpat = 1;
2430 run = 0;
2431 start = -1;
2432 for (i = 0; i < size && cpat; i++)
2433 if (arr[i] != i+16)
2434 {
2435 if (!run)
2436 {
2437 start = i;
2438 if (arr[i] == 3)
2439 run = 1;
2440 else if (arr[i] == 2 && arr[i+1] == 3)
2441 run = 2;
2442 else if (arr[i] == 0)
2443 {
2444 while (arr[i+run] == run && i+run < 16)
2445 run++;
2446 if (run != 4 && run != 8)
2447 cpat = 0;
2448 }
2449 else
2450 cpat = 0;
2451 if ((i & (run-1)) != 0)
2452 cpat = 0;
2453 i += run;
2454 }
2455 else
2456 cpat = 0;
2457 }
2458 if (cpat && (run || size < 16))
2459 {
2460 if (run == 0)
2461 run = 1;
2462 if (prun)
2463 *prun = run;
2464 if (pstart)
2465 *pstart = start == -1 ? 16-run : start;
2466 return 1;
2467 }
2468 return 0;
2469 }
2470
2471 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2472 it into a register. MODE is only valid when OP is a CONST_INT. */
2473 static enum immediate_class
2474 classify_immediate (rtx op, enum machine_mode mode)
2475 {
2476 HOST_WIDE_INT val;
2477 unsigned char arr[16];
2478 int i, j, repeated, fsmbi, repeat;
2479
2480 gcc_assert (CONSTANT_P (op));
2481
2482 if (GET_MODE (op) != VOIDmode)
2483 mode = GET_MODE (op);
2484
2485 /* A V4SI const_vector with all identical symbols is ok. */
2486 if (!flag_pic
2487 && mode == V4SImode
2488 && GET_CODE (op) == CONST_VECTOR
2489 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2490 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2491 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2492 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2493 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2494 op = CONST_VECTOR_ELT (op, 0);
2495
2496 switch (GET_CODE (op))
2497 {
2498 case SYMBOL_REF:
2499 case LABEL_REF:
2500 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2501
2502 case CONST:
2503 return TARGET_LARGE_MEM
2504 || !legitimate_const (op, 0) ? IC_IL2s : IC_IL1s;
2505
2506 case HIGH:
2507 return IC_IL1s;
2508
2509 case CONST_VECTOR:
2510 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2511 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2512 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2513 return IC_POOL;
2514 /* Fall through. */
2515
2516 case CONST_INT:
2517 case CONST_DOUBLE:
2518 constant_to_array (mode, op, arr);
2519
2520 /* Check that each 4-byte slot is identical. */
2521 repeated = 1;
2522 for (i = 4; i < 16; i += 4)
2523 for (j = 0; j < 4; j++)
2524 if (arr[j] != arr[i + j])
2525 repeated = 0;
2526
2527 if (repeated)
2528 {
2529 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2530 val = trunc_int_for_mode (val, SImode);
2531
2532 if (which_immediate_load (val) != SPU_NONE)
2533 return IC_IL1;
2534 }
2535
2536 /* Any mode of 2 bytes or smaller can be loaded with an il
2537 instruction. */
2538 gcc_assert (GET_MODE_SIZE (mode) > 2);
2539
2540 fsmbi = 1;
2541 repeat = 0;
2542 for (i = 0; i < 16 && fsmbi; i++)
2543 if (arr[i] != 0 && repeat == 0)
2544 repeat = arr[i];
2545 else if (arr[i] != 0 && arr[i] != repeat)
2546 fsmbi = 0;
2547 if (fsmbi)
2548 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
2549
2550 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2551 return IC_CPAT;
2552
2553 if (repeated)
2554 return IC_IL2;
2555
2556 return IC_POOL;
2557 default:
2558 break;
2559 }
2560 gcc_unreachable ();
2561 }
2562
2563 static enum spu_immediate
2564 which_logical_immediate (HOST_WIDE_INT val)
2565 {
2566 gcc_assert (val == trunc_int_for_mode (val, SImode));
2567
2568 if (val >= -0x200 && val <= 0x1ff)
2569 return SPU_ORI;
2570 if (val >= 0 && val <= 0xffff)
2571 return SPU_IOHL;
2572 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2573 {
2574 val = trunc_int_for_mode (val, HImode);
2575 if (val >= -0x200 && val <= 0x1ff)
2576 return SPU_ORHI;
2577 if ((val & 0xff) == ((val >> 8) & 0xff))
2578 {
2579 val = trunc_int_for_mode (val, QImode);
2580 if (val >= -0x200 && val <= 0x1ff)
2581 return SPU_ORBI;
2582 }
2583 }
2584 return SPU_NONE;
2585 }
2586
2587 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2588 CONST_DOUBLEs. */
2589 static int
2590 const_vector_immediate_p (rtx x)
2591 {
2592 int i;
2593 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2594 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2595 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2596 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2597 return 0;
2598 return 1;
2599 }
2600
2601 int
2602 logical_immediate_p (rtx op, enum machine_mode mode)
2603 {
2604 HOST_WIDE_INT val;
2605 unsigned char arr[16];
2606 int i, j;
2607
2608 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2609 || GET_CODE (op) == CONST_VECTOR);
2610
2611 if (GET_CODE (op) == CONST_VECTOR
2612 && !const_vector_immediate_p (op))
2613 return 0;
2614
2615 if (GET_MODE (op) != VOIDmode)
2616 mode = GET_MODE (op);
2617
2618 constant_to_array (mode, op, arr);
2619
2620 /* Check that bytes are repeated. */
2621 for (i = 4; i < 16; i += 4)
2622 for (j = 0; j < 4; j++)
2623 if (arr[j] != arr[i + j])
2624 return 0;
2625
2626 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2627 val = trunc_int_for_mode (val, SImode);
2628
2629 i = which_logical_immediate (val);
2630 return i != SPU_NONE && i != SPU_IOHL;
2631 }
2632
2633 int
2634 iohl_immediate_p (rtx op, enum machine_mode mode)
2635 {
2636 HOST_WIDE_INT val;
2637 unsigned char arr[16];
2638 int i, j;
2639
2640 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2641 || GET_CODE (op) == CONST_VECTOR);
2642
2643 if (GET_CODE (op) == CONST_VECTOR
2644 && !const_vector_immediate_p (op))
2645 return 0;
2646
2647 if (GET_MODE (op) != VOIDmode)
2648 mode = GET_MODE (op);
2649
2650 constant_to_array (mode, op, arr);
2651
2652 /* Check that bytes are repeated. */
2653 for (i = 4; i < 16; i += 4)
2654 for (j = 0; j < 4; j++)
2655 if (arr[j] != arr[i + j])
2656 return 0;
2657
2658 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2659 val = trunc_int_for_mode (val, SImode);
2660
2661 return val >= 0 && val <= 0xffff;
2662 }
2663
2664 int
2665 arith_immediate_p (rtx op, enum machine_mode mode,
2666 HOST_WIDE_INT low, HOST_WIDE_INT high)
2667 {
2668 HOST_WIDE_INT val;
2669 unsigned char arr[16];
2670 int bytes, i, j;
2671
2672 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2673 || GET_CODE (op) == CONST_VECTOR);
2674
2675 if (GET_CODE (op) == CONST_VECTOR
2676 && !const_vector_immediate_p (op))
2677 return 0;
2678
2679 if (GET_MODE (op) != VOIDmode)
2680 mode = GET_MODE (op);
2681
2682 constant_to_array (mode, op, arr);
2683
2684 if (VECTOR_MODE_P (mode))
2685 mode = GET_MODE_INNER (mode);
2686
2687 bytes = GET_MODE_SIZE (mode);
2688 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2689
2690 /* Check that bytes are repeated. */
2691 for (i = bytes; i < 16; i += bytes)
2692 for (j = 0; j < bytes; j++)
2693 if (arr[j] != arr[i + j])
2694 return 0;
2695
2696 val = arr[0];
2697 for (j = 1; j < bytes; j++)
2698 val = (val << 8) | arr[j];
2699
2700 val = trunc_int_for_mode (val, mode);
2701
2702 return val >= low && val <= high;
2703 }
2704
2705 /* We accept:
2706 - any 32-bit constant (SImode, SFmode)
2707 - any constant that can be generated with fsmbi (any mode)
2708 - a 64-bit constant where the high and low bits are identical
2709 (DImode, DFmode)
2710 - a 128-bit constant where the four 32-bit words match. */
2711 int
2712 spu_legitimate_constant_p (rtx x)
2713 {
2714 if (GET_CODE (x) == HIGH)
2715 x = XEXP (x, 0);
2716 /* V4SI with all identical symbols is valid. */
2717 if (!flag_pic
2718 && GET_MODE (x) == V4SImode
2719 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2720 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2721 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
2722 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2723 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2724 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2725
2726 if (GET_CODE (x) == CONST_VECTOR
2727 && !const_vector_immediate_p (x))
2728 return 0;
2729 return 1;
2730 }
2731
2732 /* Valid address are:
2733 - symbol_ref, label_ref, const
2734 - reg
2735 - reg + const, where either reg or const is 16 byte aligned
2736 - reg + reg, alignment doesn't matter
2737 The alignment matters in the reg+const case because lqd and stqd
2738 ignore the 4 least significant bits of the const. (TODO: It might be
2739 preferable to allow any alignment and fix it up when splitting.) */
2740 int
2741 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2742 rtx x, int reg_ok_strict)
2743 {
2744 if (mode == TImode && GET_CODE (x) == AND
2745 && GET_CODE (XEXP (x, 1)) == CONST_INT
2746 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2747 x = XEXP (x, 0);
2748 switch (GET_CODE (x))
2749 {
2750 case SYMBOL_REF:
2751 case LABEL_REF:
2752 return !TARGET_LARGE_MEM;
2753
2754 case CONST:
2755 return !TARGET_LARGE_MEM && legitimate_const (x, 0);
2756
2757 case CONST_INT:
2758 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2759
2760 case SUBREG:
2761 x = XEXP (x, 0);
2762 gcc_assert (GET_CODE (x) == REG);
2763
2764 case REG:
2765 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2766
2767 case PLUS:
2768 case LO_SUM:
2769 {
2770 rtx op0 = XEXP (x, 0);
2771 rtx op1 = XEXP (x, 1);
2772 if (GET_CODE (op0) == SUBREG)
2773 op0 = XEXP (op0, 0);
2774 if (GET_CODE (op1) == SUBREG)
2775 op1 = XEXP (op1, 0);
2776 /* We can't just accept any aligned register because CSE can
2777 change it to a register that is not marked aligned and then
2778 recog will fail. So we only accept frame registers because
2779 they will only be changed to other frame registers. */
2780 if (GET_CODE (op0) == REG
2781 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2782 && GET_CODE (op1) == CONST_INT
2783 && INTVAL (op1) >= -0x2000
2784 && INTVAL (op1) <= 0x1fff
2785 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2786 return 1;
2787 if (GET_CODE (op0) == REG
2788 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2789 && GET_CODE (op1) == REG
2790 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2791 return 1;
2792 }
2793 break;
2794
2795 default:
2796 break;
2797 }
2798 return 0;
2799 }
2800
2801 /* When the address is reg + const_int, force the const_int into a
2802 register. */
2803 rtx
2804 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2805 enum machine_mode mode)
2806 {
2807 rtx op0, op1;
2808 /* Make sure both operands are registers. */
2809 if (GET_CODE (x) == PLUS)
2810 {
2811 op0 = XEXP (x, 0);
2812 op1 = XEXP (x, 1);
2813 if (ALIGNED_SYMBOL_REF_P (op0))
2814 {
2815 op0 = force_reg (Pmode, op0);
2816 mark_reg_pointer (op0, 128);
2817 }
2818 else if (GET_CODE (op0) != REG)
2819 op0 = force_reg (Pmode, op0);
2820 if (ALIGNED_SYMBOL_REF_P (op1))
2821 {
2822 op1 = force_reg (Pmode, op1);
2823 mark_reg_pointer (op1, 128);
2824 }
2825 else if (GET_CODE (op1) != REG)
2826 op1 = force_reg (Pmode, op1);
2827 x = gen_rtx_PLUS (Pmode, op0, op1);
2828 if (spu_legitimate_address (mode, x, 0))
2829 return x;
2830 }
2831 return NULL_RTX;
2832 }
2833
2834 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2835 struct attribute_spec.handler. */
2836 static tree
2837 spu_handle_fndecl_attribute (tree * node,
2838 tree name,
2839 tree args ATTRIBUTE_UNUSED,
2840 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2841 {
2842 if (TREE_CODE (*node) != FUNCTION_DECL)
2843 {
2844 warning (0, "`%s' attribute only applies to functions",
2845 IDENTIFIER_POINTER (name));
2846 *no_add_attrs = true;
2847 }
2848
2849 return NULL_TREE;
2850 }
2851
2852 /* Handle the "vector" attribute. */
2853 static tree
2854 spu_handle_vector_attribute (tree * node, tree name,
2855 tree args ATTRIBUTE_UNUSED,
2856 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2857 {
2858 tree type = *node, result = NULL_TREE;
2859 enum machine_mode mode;
2860 int unsigned_p;
2861
2862 while (POINTER_TYPE_P (type)
2863 || TREE_CODE (type) == FUNCTION_TYPE
2864 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2865 type = TREE_TYPE (type);
2866
2867 mode = TYPE_MODE (type);
2868
2869 unsigned_p = TYPE_UNSIGNED (type);
2870 switch (mode)
2871 {
2872 case DImode:
2873 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2874 break;
2875 case SImode:
2876 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2877 break;
2878 case HImode:
2879 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2880 break;
2881 case QImode:
2882 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2883 break;
2884 case SFmode:
2885 result = V4SF_type_node;
2886 break;
2887 case DFmode:
2888 result = V2DF_type_node;
2889 break;
2890 default:
2891 break;
2892 }
2893
2894 /* Propagate qualifiers attached to the element type
2895 onto the vector type. */
2896 if (result && result != type && TYPE_QUALS (type))
2897 result = build_qualified_type (result, TYPE_QUALS (type));
2898
2899 *no_add_attrs = true; /* No need to hang on to the attribute. */
2900
2901 if (!result)
2902 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2903 else
2904 *node = reconstruct_complex_type (*node, result);
2905
2906 return NULL_TREE;
2907 }
2908
2909 /* Return nonzero if FUNC is a naked function. */
2910 static int
2911 spu_naked_function_p (tree func)
2912 {
2913 tree a;
2914
2915 if (TREE_CODE (func) != FUNCTION_DECL)
2916 abort ();
2917
2918 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2919 return a != NULL_TREE;
2920 }
2921
2922 int
2923 spu_initial_elimination_offset (int from, int to)
2924 {
2925 int saved_regs_size = spu_saved_regs_size ();
2926 int sp_offset = 0;
2927 if (!current_function_is_leaf || current_function_outgoing_args_size
2928 || get_frame_size () || saved_regs_size)
2929 sp_offset = STACK_POINTER_OFFSET;
2930 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2931 return (sp_offset + current_function_outgoing_args_size);
2932 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2933 return 0;
2934 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2935 return sp_offset + current_function_outgoing_args_size
2936 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2937 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2938 return get_frame_size () + saved_regs_size + sp_offset;
2939 return 0;
2940 }
2941
2942 rtx
2943 spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2944 {
2945 enum machine_mode mode = TYPE_MODE (type);
2946 int byte_size = ((mode == BLKmode)
2947 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2948
2949 /* Make sure small structs are left justified in a register. */
2950 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2951 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2952 {
2953 enum machine_mode smode;
2954 rtvec v;
2955 int i;
2956 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2957 int n = byte_size / UNITS_PER_WORD;
2958 v = rtvec_alloc (nregs);
2959 for (i = 0; i < n; i++)
2960 {
2961 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2962 gen_rtx_REG (TImode,
2963 FIRST_RETURN_REGNUM
2964 + i),
2965 GEN_INT (UNITS_PER_WORD * i));
2966 byte_size -= UNITS_PER_WORD;
2967 }
2968
2969 if (n < nregs)
2970 {
2971 if (byte_size < 4)
2972 byte_size = 4;
2973 smode =
2974 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2975 RTVEC_ELT (v, n) =
2976 gen_rtx_EXPR_LIST (VOIDmode,
2977 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2978 GEN_INT (UNITS_PER_WORD * n));
2979 }
2980 return gen_rtx_PARALLEL (mode, v);
2981 }
2982 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2983 }
2984
2985 rtx
2986 spu_function_arg (CUMULATIVE_ARGS cum,
2987 enum machine_mode mode,
2988 tree type, int named ATTRIBUTE_UNUSED)
2989 {
2990 int byte_size;
2991
2992 if (cum >= MAX_REGISTER_ARGS)
2993 return 0;
2994
2995 byte_size = ((mode == BLKmode)
2996 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2997
2998 /* The ABI does not allow parameters to be passed partially in
2999 reg and partially in stack. */
3000 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3001 return 0;
3002
3003 /* Make sure small structs are left justified in a register. */
3004 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3005 && byte_size < UNITS_PER_WORD && byte_size > 0)
3006 {
3007 enum machine_mode smode;
3008 rtx gr_reg;
3009 if (byte_size < 4)
3010 byte_size = 4;
3011 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3012 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3013 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3014 const0_rtx);
3015 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3016 }
3017 else
3018 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3019 }
3020
3021 /* Variable sized types are passed by reference. */
3022 static bool
3023 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3024 enum machine_mode mode ATTRIBUTE_UNUSED,
3025 tree type, bool named ATTRIBUTE_UNUSED)
3026 {
3027 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3028 }
3029 \f
3030
3031 /* Var args. */
3032
3033 /* Create and return the va_list datatype.
3034
3035 On SPU, va_list is an array type equivalent to
3036
3037 typedef struct __va_list_tag
3038 {
3039 void *__args __attribute__((__aligned(16)));
3040 void *__skip __attribute__((__aligned(16)));
3041
3042 } va_list[1];
3043
3044 where __args points to the arg that will be returned by the next
3045 va_arg(), and __skip points to the previous stack frame such that
3046 when __args == __skip we should advance __args by 32 bytes. */
3047 static tree
3048 spu_build_builtin_va_list (void)
3049 {
3050 tree f_args, f_skip, record, type_decl;
3051 bool owp;
3052
3053 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3054
3055 type_decl =
3056 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3057
3058 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3059 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3060
3061 DECL_FIELD_CONTEXT (f_args) = record;
3062 DECL_ALIGN (f_args) = 128;
3063 DECL_USER_ALIGN (f_args) = 1;
3064
3065 DECL_FIELD_CONTEXT (f_skip) = record;
3066 DECL_ALIGN (f_skip) = 128;
3067 DECL_USER_ALIGN (f_skip) = 1;
3068
3069 TREE_CHAIN (record) = type_decl;
3070 TYPE_NAME (record) = type_decl;
3071 TYPE_FIELDS (record) = f_args;
3072 TREE_CHAIN (f_args) = f_skip;
3073
3074 /* We know this is being padded and we want it too. It is an internal
3075 type so hide the warnings from the user. */
3076 owp = warn_padded;
3077 warn_padded = false;
3078
3079 layout_type (record);
3080
3081 warn_padded = owp;
3082
3083 /* The correct type is an array type of one element. */
3084 return build_array_type (record, build_index_type (size_zero_node));
3085 }
3086
3087 /* Implement va_start by filling the va_list structure VALIST.
3088 NEXTARG points to the first anonymous stack argument.
3089
3090 The following global variables are used to initialize
3091 the va_list structure:
3092
3093 current_function_args_info;
3094 the CUMULATIVE_ARGS for this function
3095
3096 current_function_arg_offset_rtx:
3097 holds the offset of the first anonymous stack argument
3098 (relative to the virtual arg pointer). */
3099
3100 void
3101 spu_va_start (tree valist, rtx nextarg)
3102 {
3103 tree f_args, f_skip;
3104 tree args, skip, t;
3105
3106 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3107 f_skip = TREE_CHAIN (f_args);
3108
3109 valist = build_va_arg_indirect_ref (valist);
3110 args =
3111 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3112 skip =
3113 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3114
3115 /* Find the __args area. */
3116 t = make_tree (TREE_TYPE (args), nextarg);
3117 if (current_function_pretend_args_size > 0)
3118 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3119 size_int (-STACK_POINTER_OFFSET));
3120 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
3121 TREE_SIDE_EFFECTS (t) = 1;
3122 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3123
3124 /* Find the __skip area. */
3125 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3126 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3127 size_int (current_function_pretend_args_size
3128 - STACK_POINTER_OFFSET));
3129 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3130 TREE_SIDE_EFFECTS (t) = 1;
3131 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3132 }
3133
3134 /* Gimplify va_arg by updating the va_list structure
3135 VALIST as required to retrieve an argument of type
3136 TYPE, and returning that argument.
3137
3138 ret = va_arg(VALIST, TYPE);
3139
3140 generates code equivalent to:
3141
3142 paddedsize = (sizeof(TYPE) + 15) & -16;
3143 if (VALIST.__args + paddedsize > VALIST.__skip
3144 && VALIST.__args <= VALIST.__skip)
3145 addr = VALIST.__skip + 32;
3146 else
3147 addr = VALIST.__args;
3148 VALIST.__args = addr + paddedsize;
3149 ret = *(TYPE *)addr;
3150 */
3151 static tree
3152 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3153 tree * post_p ATTRIBUTE_UNUSED)
3154 {
3155 tree f_args, f_skip;
3156 tree args, skip;
3157 HOST_WIDE_INT size, rsize;
3158 tree paddedsize, addr, tmp;
3159 bool pass_by_reference_p;
3160
3161 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3162 f_skip = TREE_CHAIN (f_args);
3163
3164 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3165 args =
3166 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3167 skip =
3168 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3169
3170 addr = create_tmp_var (ptr_type_node, "va_arg");
3171 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3172
3173 /* if an object is dynamically sized, a pointer to it is passed
3174 instead of the object itself. */
3175 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3176 false);
3177 if (pass_by_reference_p)
3178 type = build_pointer_type (type);
3179 size = int_size_in_bytes (type);
3180 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3181
3182 /* build conditional expression to calculate addr. The expression
3183 will be gimplified later. */
3184 paddedsize = size_int (rsize);
3185 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
3186 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3187 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3188 build2 (LE_EXPR, boolean_type_node, args, skip));
3189
3190 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3191 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
3192 size_int (32)), args);
3193
3194 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3195 gimplify_and_add (tmp, pre_p);
3196
3197 /* update VALIST.__args */
3198 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3199 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3200 gimplify_and_add (tmp, pre_p);
3201
3202 addr = fold_convert (build_pointer_type (type), addr);
3203
3204 if (pass_by_reference_p)
3205 addr = build_va_arg_indirect_ref (addr);
3206
3207 return build_va_arg_indirect_ref (addr);
3208 }
3209
3210 /* Save parameter registers starting with the register that corresponds
3211 to the first unnamed parameters. If the first unnamed parameter is
3212 in the stack then save no registers. Set pretend_args_size to the
3213 amount of space needed to save the registers. */
3214 void
3215 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3216 tree type, int *pretend_size, int no_rtl)
3217 {
3218 if (!no_rtl)
3219 {
3220 rtx tmp;
3221 int regno;
3222 int offset;
3223 int ncum = *cum;
3224
3225 /* cum currently points to the last named argument, we want to
3226 start at the next argument. */
3227 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3228
3229 offset = -STACK_POINTER_OFFSET;
3230 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3231 {
3232 tmp = gen_frame_mem (V4SImode,
3233 plus_constant (virtual_incoming_args_rtx,
3234 offset));
3235 emit_move_insn (tmp,
3236 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3237 offset += 16;
3238 }
3239 *pretend_size = offset + STACK_POINTER_OFFSET;
3240 }
3241 }
3242 \f
3243 void
3244 spu_conditional_register_usage (void)
3245 {
3246 if (flag_pic)
3247 {
3248 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3249 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3250 }
3251 }
3252
3253 /* This is called to decide when we can simplify a load instruction. We
3254 must only return true for registers which we know will always be
3255 aligned. Taking into account that CSE might replace this reg with
3256 another one that has not been marked aligned.
3257 So this is really only true for frame, stack and virtual registers,
3258 which we know are always aligned and should not be adversely effected
3259 by CSE. */
3260 static int
3261 regno_aligned_for_load (int regno)
3262 {
3263 return regno == FRAME_POINTER_REGNUM
3264 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
3265 || regno == STACK_POINTER_REGNUM
3266 || (regno >= FIRST_VIRTUAL_REGISTER
3267 && regno <= LAST_VIRTUAL_REGISTER);
3268 }
3269
3270 /* Return TRUE when mem is known to be 16-byte aligned. */
3271 int
3272 aligned_mem_p (rtx mem)
3273 {
3274 if (MEM_ALIGN (mem) >= 128)
3275 return 1;
3276 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3277 return 1;
3278 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3279 {
3280 rtx p0 = XEXP (XEXP (mem, 0), 0);
3281 rtx p1 = XEXP (XEXP (mem, 0), 1);
3282 if (regno_aligned_for_load (REGNO (p0)))
3283 {
3284 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3285 return 1;
3286 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3287 return 1;
3288 }
3289 }
3290 else if (GET_CODE (XEXP (mem, 0)) == REG)
3291 {
3292 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3293 return 1;
3294 }
3295 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3296 return 1;
3297 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3298 {
3299 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3300 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3301 if (GET_CODE (p0) == SYMBOL_REF
3302 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3303 return 1;
3304 }
3305 return 0;
3306 }
3307
3308 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3309 into its SYMBOL_REF_FLAGS. */
3310 static void
3311 spu_encode_section_info (tree decl, rtx rtl, int first)
3312 {
3313 default_encode_section_info (decl, rtl, first);
3314
3315 /* If a variable has a forced alignment to < 16 bytes, mark it with
3316 SYMBOL_FLAG_ALIGN1. */
3317 if (TREE_CODE (decl) == VAR_DECL
3318 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3319 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3320 }
3321
3322 /* Return TRUE if we are certain the mem refers to a complete object
3323 which is both 16-byte aligned and padded to a 16-byte boundary. This
3324 would make it safe to store with a single instruction.
3325 We guarantee the alignment and padding for static objects by aligning
3326 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3327 FIXME: We currently cannot guarantee this for objects on the stack
3328 because assign_parm_setup_stack calls assign_stack_local with the
3329 alignment of the parameter mode and in that case the alignment never
3330 gets adjusted by LOCAL_ALIGNMENT. */
3331 static int
3332 store_with_one_insn_p (rtx mem)
3333 {
3334 rtx addr = XEXP (mem, 0);
3335 if (GET_MODE (mem) == BLKmode)
3336 return 0;
3337 /* Only static objects. */
3338 if (GET_CODE (addr) == SYMBOL_REF)
3339 {
3340 /* We use the associated declaration to make sure the access is
3341 referring to the whole object.
3342 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3343 if it is necessary. Will there be cases where one exists, and
3344 the other does not? Will there be cases where both exist, but
3345 have different types? */
3346 tree decl = MEM_EXPR (mem);
3347 if (decl
3348 && TREE_CODE (decl) == VAR_DECL
3349 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3350 return 1;
3351 decl = SYMBOL_REF_DECL (addr);
3352 if (decl
3353 && TREE_CODE (decl) == VAR_DECL
3354 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3355 return 1;
3356 }
3357 return 0;
3358 }
3359
3360 int
3361 spu_expand_mov (rtx * ops, enum machine_mode mode)
3362 {
3363 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3364 abort ();
3365
3366 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3367 {
3368 rtx from = SUBREG_REG (ops[1]);
3369 enum machine_mode imode = GET_MODE (from);
3370
3371 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3372 && GET_MODE_CLASS (imode) == MODE_INT
3373 && subreg_lowpart_p (ops[1]));
3374
3375 if (GET_MODE_SIZE (imode) < 4)
3376 {
3377 from = gen_rtx_SUBREG (SImode, from, 0);
3378 imode = SImode;
3379 }
3380
3381 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3382 {
3383 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3384 emit_insn (GEN_FCN (icode) (ops[0], from));
3385 }
3386 else
3387 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3388 return 1;
3389 }
3390
3391 /* At least one of the operands needs to be a register. */
3392 if ((reload_in_progress | reload_completed) == 0
3393 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3394 {
3395 rtx temp = force_reg (mode, ops[1]);
3396 emit_move_insn (ops[0], temp);
3397 return 1;
3398 }
3399 if (reload_in_progress || reload_completed)
3400 {
3401 if (CONSTANT_P (ops[1]))
3402 return spu_split_immediate (ops);
3403 return 0;
3404 }
3405 else
3406 {
3407 if (GET_CODE (ops[0]) == MEM)
3408 {
3409 if (!spu_valid_move (ops))
3410 {
3411 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3412 gen_reg_rtx (TImode)));
3413 return 1;
3414 }
3415 }
3416 else if (GET_CODE (ops[1]) == MEM)
3417 {
3418 if (!spu_valid_move (ops))
3419 {
3420 emit_insn (gen_load
3421 (ops[0], ops[1], gen_reg_rtx (TImode),
3422 gen_reg_rtx (SImode)));
3423 return 1;
3424 }
3425 }
3426 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3427 extend them. */
3428 if (GET_CODE (ops[1]) == CONST_INT)
3429 {
3430 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3431 if (val != INTVAL (ops[1]))
3432 {
3433 emit_move_insn (ops[0], GEN_INT (val));
3434 return 1;
3435 }
3436 }
3437 }
3438 return 0;
3439 }
3440
3441 static int
3442 reg_align (rtx reg)
3443 {
3444 /* For now, only frame registers are known to be aligned at all times.
3445 We can't trust REGNO_POINTER_ALIGN because optimization will move
3446 registers around, potentially changing an "aligned" register in an
3447 address to an unaligned register, which would result in an invalid
3448 address. */
3449 int regno = REGNO (reg);
3450 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3451 }
3452
3453 void
3454 spu_split_load (rtx * ops)
3455 {
3456 enum machine_mode mode = GET_MODE (ops[0]);
3457 rtx addr, load, rot, mem, p0, p1;
3458 int rot_amt;
3459
3460 addr = XEXP (ops[1], 0);
3461
3462 rot = 0;
3463 rot_amt = 0;
3464 if (GET_CODE (addr) == PLUS)
3465 {
3466 /* 8 cases:
3467 aligned reg + aligned reg => lqx
3468 aligned reg + unaligned reg => lqx, rotqby
3469 aligned reg + aligned const => lqd
3470 aligned reg + unaligned const => lqd, rotqbyi
3471 unaligned reg + aligned reg => lqx, rotqby
3472 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3473 unaligned reg + aligned const => lqd, rotqby
3474 unaligned reg + unaligned const -> not allowed by legitimate address
3475 */
3476 p0 = XEXP (addr, 0);
3477 p1 = XEXP (addr, 1);
3478 if (reg_align (p0) < 128)
3479 {
3480 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3481 {
3482 emit_insn (gen_addsi3 (ops[3], p0, p1));
3483 rot = ops[3];
3484 }
3485 else
3486 rot = p0;
3487 }
3488 else
3489 {
3490 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3491 {
3492 rot_amt = INTVAL (p1) & 15;
3493 p1 = GEN_INT (INTVAL (p1) & -16);
3494 addr = gen_rtx_PLUS (SImode, p0, p1);
3495 }
3496 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3497 rot = p1;
3498 }
3499 }
3500 else if (GET_CODE (addr) == REG)
3501 {
3502 if (reg_align (addr) < 128)
3503 rot = addr;
3504 }
3505 else if (GET_CODE (addr) == CONST)
3506 {
3507 if (GET_CODE (XEXP (addr, 0)) == PLUS
3508 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3509 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3510 {
3511 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3512 if (rot_amt & -16)
3513 addr = gen_rtx_CONST (Pmode,
3514 gen_rtx_PLUS (Pmode,
3515 XEXP (XEXP (addr, 0), 0),
3516 GEN_INT (rot_amt & -16)));
3517 else
3518 addr = XEXP (XEXP (addr, 0), 0);
3519 }
3520 else
3521 rot = addr;
3522 }
3523 else if (GET_CODE (addr) == CONST_INT)
3524 {
3525 rot_amt = INTVAL (addr);
3526 addr = GEN_INT (rot_amt & -16);
3527 }
3528 else if (!ALIGNED_SYMBOL_REF_P (addr))
3529 rot = addr;
3530
3531 if (GET_MODE_SIZE (mode) < 4)
3532 rot_amt += GET_MODE_SIZE (mode) - 4;
3533
3534 rot_amt &= 15;
3535
3536 if (rot && rot_amt)
3537 {
3538 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3539 rot = ops[3];
3540 rot_amt = 0;
3541 }
3542
3543 load = ops[2];
3544
3545 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3546 mem = change_address (ops[1], TImode, addr);
3547
3548 emit_insn (gen_movti (load, mem));
3549
3550 if (rot)
3551 emit_insn (gen_rotqby_ti (load, load, rot));
3552 else if (rot_amt)
3553 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3554
3555 if (reload_completed)
3556 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3557 else
3558 emit_insn (gen_spu_convert (ops[0], load));
3559 }
3560
3561 void
3562 spu_split_store (rtx * ops)
3563 {
3564 enum machine_mode mode = GET_MODE (ops[0]);
3565 rtx pat = ops[2];
3566 rtx reg = ops[3];
3567 rtx addr, p0, p1, p1_lo, smem;
3568 int aform;
3569 int scalar;
3570
3571 addr = XEXP (ops[0], 0);
3572
3573 if (GET_CODE (addr) == PLUS)
3574 {
3575 /* 8 cases:
3576 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3577 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3578 aligned reg + aligned const => lqd, c?d, shuf, stqx
3579 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3580 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3581 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3582 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3583 unaligned reg + unaligned const -> not allowed by legitimate address
3584 */
3585 aform = 0;
3586 p0 = XEXP (addr, 0);
3587 p1 = p1_lo = XEXP (addr, 1);
3588 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3589 {
3590 p1_lo = GEN_INT (INTVAL (p1) & 15);
3591 p1 = GEN_INT (INTVAL (p1) & -16);
3592 addr = gen_rtx_PLUS (SImode, p0, p1);
3593 }
3594 }
3595 else if (GET_CODE (addr) == REG)
3596 {
3597 aform = 0;
3598 p0 = addr;
3599 p1 = p1_lo = const0_rtx;
3600 }
3601 else
3602 {
3603 aform = 1;
3604 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3605 p1 = 0; /* aform doesn't use p1 */
3606 p1_lo = addr;
3607 if (ALIGNED_SYMBOL_REF_P (addr))
3608 p1_lo = const0_rtx;
3609 else if (GET_CODE (addr) == CONST)
3610 {
3611 if (GET_CODE (XEXP (addr, 0)) == PLUS
3612 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3613 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3614 {
3615 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3616 if ((v & -16) != 0)
3617 addr = gen_rtx_CONST (Pmode,
3618 gen_rtx_PLUS (Pmode,
3619 XEXP (XEXP (addr, 0), 0),
3620 GEN_INT (v & -16)));
3621 else
3622 addr = XEXP (XEXP (addr, 0), 0);
3623 p1_lo = GEN_INT (v & 15);
3624 }
3625 }
3626 else if (GET_CODE (addr) == CONST_INT)
3627 {
3628 p1_lo = GEN_INT (INTVAL (addr) & 15);
3629 addr = GEN_INT (INTVAL (addr) & -16);
3630 }
3631 }
3632
3633 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3634
3635 scalar = store_with_one_insn_p (ops[0]);
3636 if (!scalar)
3637 {
3638 /* We could copy the flags from the ops[0] MEM to mem here,
3639 We don't because we want this load to be optimized away if
3640 possible, and copying the flags will prevent that in certain
3641 cases, e.g. consider the volatile flag. */
3642
3643 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3644 set_mem_alias_set (lmem, 0);
3645 emit_insn (gen_movti (reg, lmem));
3646
3647 if (!p0 || reg_align (p0) >= 128)
3648 p0 = stack_pointer_rtx;
3649 if (!p1_lo)
3650 p1_lo = const0_rtx;
3651
3652 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3653 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3654 }
3655 else if (reload_completed)
3656 {
3657 if (GET_CODE (ops[1]) == REG)
3658 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3659 else if (GET_CODE (ops[1]) == SUBREG)
3660 emit_move_insn (reg,
3661 gen_rtx_REG (GET_MODE (reg),
3662 REGNO (SUBREG_REG (ops[1]))));
3663 else
3664 abort ();
3665 }
3666 else
3667 {
3668 if (GET_CODE (ops[1]) == REG)
3669 emit_insn (gen_spu_convert (reg, ops[1]));
3670 else if (GET_CODE (ops[1]) == SUBREG)
3671 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3672 else
3673 abort ();
3674 }
3675
3676 if (GET_MODE_SIZE (mode) < 4 && scalar)
3677 emit_insn (gen_shlqby_ti
3678 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3679
3680 smem = change_address (ops[0], TImode, addr);
3681 /* We can't use the previous alias set because the memory has changed
3682 size and can potentially overlap objects of other types. */
3683 set_mem_alias_set (smem, 0);
3684
3685 emit_insn (gen_movti (smem, reg));
3686 }
3687
3688 /* Return TRUE if X is MEM which is a struct member reference
3689 and the member can safely be loaded and stored with a single
3690 instruction because it is padded. */
3691 static int
3692 mem_is_padded_component_ref (rtx x)
3693 {
3694 tree t = MEM_EXPR (x);
3695 tree r;
3696 if (!t || TREE_CODE (t) != COMPONENT_REF)
3697 return 0;
3698 t = TREE_OPERAND (t, 1);
3699 if (!t || TREE_CODE (t) != FIELD_DECL
3700 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3701 return 0;
3702 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3703 r = DECL_FIELD_CONTEXT (t);
3704 if (!r || TREE_CODE (r) != RECORD_TYPE)
3705 return 0;
3706 /* Make sure they are the same mode */
3707 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3708 return 0;
3709 /* If there are no following fields then the field alignment assures
3710 the structure is padded to the alignment which means this field is
3711 padded too. */
3712 if (TREE_CHAIN (t) == 0)
3713 return 1;
3714 /* If the following field is also aligned then this field will be
3715 padded. */
3716 t = TREE_CHAIN (t);
3717 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3718 return 1;
3719 return 0;
3720 }
3721
3722 /* Parse the -mfixed-range= option string. */
3723 static void
3724 fix_range (const char *const_str)
3725 {
3726 int i, first, last;
3727 char *str, *dash, *comma;
3728
3729 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3730 REG2 are either register names or register numbers. The effect
3731 of this option is to mark the registers in the range from REG1 to
3732 REG2 as ``fixed'' so they won't be used by the compiler. */
3733
3734 i = strlen (const_str);
3735 str = (char *) alloca (i + 1);
3736 memcpy (str, const_str, i + 1);
3737
3738 while (1)
3739 {
3740 dash = strchr (str, '-');
3741 if (!dash)
3742 {
3743 warning (0, "value of -mfixed-range must have form REG1-REG2");
3744 return;
3745 }
3746 *dash = '\0';
3747 comma = strchr (dash + 1, ',');
3748 if (comma)
3749 *comma = '\0';
3750
3751 first = decode_reg_name (str);
3752 if (first < 0)
3753 {
3754 warning (0, "unknown register name: %s", str);
3755 return;
3756 }
3757
3758 last = decode_reg_name (dash + 1);
3759 if (last < 0)
3760 {
3761 warning (0, "unknown register name: %s", dash + 1);
3762 return;
3763 }
3764
3765 *dash = '-';
3766
3767 if (first > last)
3768 {
3769 warning (0, "%s-%s is an empty range", str, dash + 1);
3770 return;
3771 }
3772
3773 for (i = first; i <= last; ++i)
3774 fixed_regs[i] = call_used_regs[i] = 1;
3775
3776 if (!comma)
3777 break;
3778
3779 *comma = ',';
3780 str = comma + 1;
3781 }
3782 }
3783
3784 int
3785 spu_valid_move (rtx * ops)
3786 {
3787 enum machine_mode mode = GET_MODE (ops[0]);
3788 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3789 return 0;
3790
3791 /* init_expr_once tries to recog against load and store insns to set
3792 the direct_load[] and direct_store[] arrays. We always want to
3793 consider those loads and stores valid. init_expr_once is called in
3794 the context of a dummy function which does not have a decl. */
3795 if (cfun->decl == 0)
3796 return 1;
3797
3798 /* Don't allows loads/stores which would require more than 1 insn.
3799 During and after reload we assume loads and stores only take 1
3800 insn. */
3801 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3802 {
3803 if (GET_CODE (ops[0]) == MEM
3804 && (GET_MODE_SIZE (mode) < 4
3805 || !(store_with_one_insn_p (ops[0])
3806 || mem_is_padded_component_ref (ops[0]))))
3807 return 0;
3808 if (GET_CODE (ops[1]) == MEM
3809 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3810 return 0;
3811 }
3812 return 1;
3813 }
3814
3815 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3816 can be generated using the fsmbi instruction. */
3817 int
3818 fsmbi_const_p (rtx x)
3819 {
3820 if (CONSTANT_P (x))
3821 {
3822 /* We can always choose TImode for CONST_INT because the high bits
3823 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3824 enum immediate_class c = classify_immediate (x, TImode);
3825 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
3826 }
3827 return 0;
3828 }
3829
3830 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3831 can be generated using the cbd, chd, cwd or cdd instruction. */
3832 int
3833 cpat_const_p (rtx x, enum machine_mode mode)
3834 {
3835 if (CONSTANT_P (x))
3836 {
3837 enum immediate_class c = classify_immediate (x, mode);
3838 return c == IC_CPAT;
3839 }
3840 return 0;
3841 }
3842
3843 rtx
3844 gen_cpat_const (rtx * ops)
3845 {
3846 unsigned char dst[16];
3847 int i, offset, shift, isize;
3848 if (GET_CODE (ops[3]) != CONST_INT
3849 || GET_CODE (ops[2]) != CONST_INT
3850 || (GET_CODE (ops[1]) != CONST_INT
3851 && GET_CODE (ops[1]) != REG))
3852 return 0;
3853 if (GET_CODE (ops[1]) == REG
3854 && (!REG_POINTER (ops[1])
3855 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3856 return 0;
3857
3858 for (i = 0; i < 16; i++)
3859 dst[i] = i + 16;
3860 isize = INTVAL (ops[3]);
3861 if (isize == 1)
3862 shift = 3;
3863 else if (isize == 2)
3864 shift = 2;
3865 else
3866 shift = 0;
3867 offset = (INTVAL (ops[2]) +
3868 (GET_CODE (ops[1]) ==
3869 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3870 for (i = 0; i < isize; i++)
3871 dst[offset + i] = i + shift;
3872 return array_to_constant (TImode, dst);
3873 }
3874
3875 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3876 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3877 than 16 bytes, the value is repeated across the rest of the array. */
3878 void
3879 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3880 {
3881 HOST_WIDE_INT val;
3882 int i, j, first;
3883
3884 memset (arr, 0, 16);
3885 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3886 if (GET_CODE (x) == CONST_INT
3887 || (GET_CODE (x) == CONST_DOUBLE
3888 && (mode == SFmode || mode == DFmode)))
3889 {
3890 gcc_assert (mode != VOIDmode && mode != BLKmode);
3891
3892 if (GET_CODE (x) == CONST_DOUBLE)
3893 val = const_double_to_hwint (x);
3894 else
3895 val = INTVAL (x);
3896 first = GET_MODE_SIZE (mode) - 1;
3897 for (i = first; i >= 0; i--)
3898 {
3899 arr[i] = val & 0xff;
3900 val >>= 8;
3901 }
3902 /* Splat the constant across the whole array. */
3903 for (j = 0, i = first + 1; i < 16; i++)
3904 {
3905 arr[i] = arr[j];
3906 j = (j == first) ? 0 : j + 1;
3907 }
3908 }
3909 else if (GET_CODE (x) == CONST_DOUBLE)
3910 {
3911 val = CONST_DOUBLE_LOW (x);
3912 for (i = 15; i >= 8; i--)
3913 {
3914 arr[i] = val & 0xff;
3915 val >>= 8;
3916 }
3917 val = CONST_DOUBLE_HIGH (x);
3918 for (i = 7; i >= 0; i--)
3919 {
3920 arr[i] = val & 0xff;
3921 val >>= 8;
3922 }
3923 }
3924 else if (GET_CODE (x) == CONST_VECTOR)
3925 {
3926 int units;
3927 rtx elt;
3928 mode = GET_MODE_INNER (mode);
3929 units = CONST_VECTOR_NUNITS (x);
3930 for (i = 0; i < units; i++)
3931 {
3932 elt = CONST_VECTOR_ELT (x, i);
3933 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3934 {
3935 if (GET_CODE (elt) == CONST_DOUBLE)
3936 val = const_double_to_hwint (elt);
3937 else
3938 val = INTVAL (elt);
3939 first = GET_MODE_SIZE (mode) - 1;
3940 if (first + i * GET_MODE_SIZE (mode) > 16)
3941 abort ();
3942 for (j = first; j >= 0; j--)
3943 {
3944 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3945 val >>= 8;
3946 }
3947 }
3948 }
3949 }
3950 else
3951 gcc_unreachable();
3952 }
3953
3954 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3955 smaller than 16 bytes, use the bytes that would represent that value
3956 in a register, e.g., for QImode return the value of arr[3]. */
3957 rtx
3958 array_to_constant (enum machine_mode mode, unsigned char arr[16])
3959 {
3960 enum machine_mode inner_mode;
3961 rtvec v;
3962 int units, size, i, j, k;
3963 HOST_WIDE_INT val;
3964
3965 if (GET_MODE_CLASS (mode) == MODE_INT
3966 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3967 {
3968 j = GET_MODE_SIZE (mode);
3969 i = j < 4 ? 4 - j : 0;
3970 for (val = 0; i < j; i++)
3971 val = (val << 8) | arr[i];
3972 val = trunc_int_for_mode (val, mode);
3973 return GEN_INT (val);
3974 }
3975
3976 if (mode == TImode)
3977 {
3978 HOST_WIDE_INT high;
3979 for (i = high = 0; i < 8; i++)
3980 high = (high << 8) | arr[i];
3981 for (i = 8, val = 0; i < 16; i++)
3982 val = (val << 8) | arr[i];
3983 return immed_double_const (val, high, TImode);
3984 }
3985 if (mode == SFmode)
3986 {
3987 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3988 val = trunc_int_for_mode (val, SImode);
3989 return hwint_to_const_double (SFmode, val);
3990 }
3991 if (mode == DFmode)
3992 {
3993 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3994 val <<= 32;
3995 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
3996 return hwint_to_const_double (DFmode, val);
3997 }
3998
3999 if (!VECTOR_MODE_P (mode))
4000 abort ();
4001
4002 units = GET_MODE_NUNITS (mode);
4003 size = GET_MODE_UNIT_SIZE (mode);
4004 inner_mode = GET_MODE_INNER (mode);
4005 v = rtvec_alloc (units);
4006
4007 for (k = i = 0; i < units; ++i)
4008 {
4009 val = 0;
4010 for (j = 0; j < size; j++, k++)
4011 val = (val << 8) | arr[k];
4012
4013 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4014 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4015 else
4016 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4017 }
4018 if (k > 16)
4019 abort ();
4020
4021 return gen_rtx_CONST_VECTOR (mode, v);
4022 }
4023
4024 static void
4025 reloc_diagnostic (rtx x)
4026 {
4027 tree loc_decl, decl = 0;
4028 const char *msg;
4029 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4030 return;
4031
4032 if (GET_CODE (x) == SYMBOL_REF)
4033 decl = SYMBOL_REF_DECL (x);
4034 else if (GET_CODE (x) == CONST
4035 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4036 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4037
4038 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4039 if (decl && !DECL_P (decl))
4040 decl = 0;
4041
4042 /* We use last_assemble_variable_decl to get line information. It's
4043 not always going to be right and might not even be close, but will
4044 be right for the more common cases. */
4045 if (!last_assemble_variable_decl || in_section == ctors_section)
4046 loc_decl = decl;
4047 else
4048 loc_decl = last_assemble_variable_decl;
4049
4050 /* The decl could be a string constant. */
4051 if (decl && DECL_P (decl))
4052 msg = "%Jcreating run-time relocation for %qD";
4053 else
4054 msg = "creating run-time relocation";
4055
4056 if (TARGET_WARN_RELOC)
4057 warning (0, msg, loc_decl, decl);
4058 else
4059 error (msg, loc_decl, decl);
4060 }
4061
4062 /* Hook into assemble_integer so we can generate an error for run-time
4063 relocations. The SPU ABI disallows them. */
4064 static bool
4065 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4066 {
4067 /* By default run-time relocations aren't supported, but we allow them
4068 in case users support it in their own run-time loader. And we provide
4069 a warning for those users that don't. */
4070 if ((GET_CODE (x) == SYMBOL_REF)
4071 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4072 reloc_diagnostic (x);
4073
4074 return default_assemble_integer (x, size, aligned_p);
4075 }
4076
4077 static void
4078 spu_asm_globalize_label (FILE * file, const char *name)
4079 {
4080 fputs ("\t.global\t", file);
4081 assemble_name (file, name);
4082 fputs ("\n", file);
4083 }
4084
4085 static bool
4086 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4087 {
4088 enum machine_mode mode = GET_MODE (x);
4089 int cost = COSTS_N_INSNS (2);
4090
4091 /* Folding to a CONST_VECTOR will use extra space but there might
4092 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4093 only if it allows us to fold away multiple insns. Changing the cost
4094 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4095 because this cost will only be compared against a single insn.
4096 if (code == CONST_VECTOR)
4097 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4098 */
4099
4100 /* Use defaults for float operations. Not accurate but good enough. */
4101 if (mode == DFmode)
4102 {
4103 *total = COSTS_N_INSNS (13);
4104 return true;
4105 }
4106 if (mode == SFmode)
4107 {
4108 *total = COSTS_N_INSNS (6);
4109 return true;
4110 }
4111 switch (code)
4112 {
4113 case CONST_INT:
4114 if (satisfies_constraint_K (x))
4115 *total = 0;
4116 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4117 *total = COSTS_N_INSNS (1);
4118 else
4119 *total = COSTS_N_INSNS (3);
4120 return true;
4121
4122 case CONST:
4123 *total = COSTS_N_INSNS (3);
4124 return true;
4125
4126 case LABEL_REF:
4127 case SYMBOL_REF:
4128 *total = COSTS_N_INSNS (0);
4129 return true;
4130
4131 case CONST_DOUBLE:
4132 *total = COSTS_N_INSNS (5);
4133 return true;
4134
4135 case FLOAT_EXTEND:
4136 case FLOAT_TRUNCATE:
4137 case FLOAT:
4138 case UNSIGNED_FLOAT:
4139 case FIX:
4140 case UNSIGNED_FIX:
4141 *total = COSTS_N_INSNS (7);
4142 return true;
4143
4144 case PLUS:
4145 if (mode == TImode)
4146 {
4147 *total = COSTS_N_INSNS (9);
4148 return true;
4149 }
4150 break;
4151
4152 case MULT:
4153 cost =
4154 GET_CODE (XEXP (x, 0)) ==
4155 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4156 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4157 {
4158 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4159 {
4160 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4161 cost = COSTS_N_INSNS (14);
4162 if ((val & 0xffff) == 0)
4163 cost = COSTS_N_INSNS (9);
4164 else if (val > 0 && val < 0x10000)
4165 cost = COSTS_N_INSNS (11);
4166 }
4167 }
4168 *total = cost;
4169 return true;
4170 case DIV:
4171 case UDIV:
4172 case MOD:
4173 case UMOD:
4174 *total = COSTS_N_INSNS (20);
4175 return true;
4176 case ROTATE:
4177 case ROTATERT:
4178 case ASHIFT:
4179 case ASHIFTRT:
4180 case LSHIFTRT:
4181 *total = COSTS_N_INSNS (4);
4182 return true;
4183 case UNSPEC:
4184 if (XINT (x, 1) == UNSPEC_CONVERT)
4185 *total = COSTS_N_INSNS (0);
4186 else
4187 *total = COSTS_N_INSNS (4);
4188 return true;
4189 }
4190 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4191 if (GET_MODE_CLASS (mode) == MODE_INT
4192 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4193 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4194 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4195 *total = cost;
4196 return true;
4197 }
4198
4199 enum machine_mode
4200 spu_eh_return_filter_mode (void)
4201 {
4202 /* We would like this to be SImode, but sjlj exceptions seems to work
4203 only with word_mode. */
4204 return TImode;
4205 }
4206
4207 /* Decide whether we can make a sibling call to a function. DECL is the
4208 declaration of the function being targeted by the call and EXP is the
4209 CALL_EXPR representing the call. */
4210 static bool
4211 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4212 {
4213 return decl && !TARGET_LARGE_MEM;
4214 }
4215
4216 /* We need to correctly update the back chain pointer and the Available
4217 Stack Size (which is in the second slot of the sp register.) */
4218 void
4219 spu_allocate_stack (rtx op0, rtx op1)
4220 {
4221 HOST_WIDE_INT v;
4222 rtx chain = gen_reg_rtx (V4SImode);
4223 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4224 rtx sp = gen_reg_rtx (V4SImode);
4225 rtx splatted = gen_reg_rtx (V4SImode);
4226 rtx pat = gen_reg_rtx (TImode);
4227
4228 /* copy the back chain so we can save it back again. */
4229 emit_move_insn (chain, stack_bot);
4230
4231 op1 = force_reg (SImode, op1);
4232
4233 v = 0x1020300010203ll;
4234 emit_move_insn (pat, immed_double_const (v, v, TImode));
4235 emit_insn (gen_shufb (splatted, op1, op1, pat));
4236
4237 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4238 emit_insn (gen_subv4si3 (sp, sp, splatted));
4239
4240 if (flag_stack_check)
4241 {
4242 rtx avail = gen_reg_rtx(SImode);
4243 rtx result = gen_reg_rtx(SImode);
4244 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4245 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4246 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4247 }
4248
4249 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4250
4251 emit_move_insn (stack_bot, chain);
4252
4253 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4254 }
4255
4256 void
4257 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4258 {
4259 static unsigned char arr[16] =
4260 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4261 rtx temp = gen_reg_rtx (SImode);
4262 rtx temp2 = gen_reg_rtx (SImode);
4263 rtx temp3 = gen_reg_rtx (V4SImode);
4264 rtx temp4 = gen_reg_rtx (V4SImode);
4265 rtx pat = gen_reg_rtx (TImode);
4266 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4267
4268 /* Restore the backchain from the first word, sp from the second. */
4269 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4270 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4271
4272 emit_move_insn (pat, array_to_constant (TImode, arr));
4273
4274 /* Compute Available Stack Size for sp */
4275 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4276 emit_insn (gen_shufb (temp3, temp, temp, pat));
4277
4278 /* Compute Available Stack Size for back chain */
4279 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4280 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4281 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4282
4283 emit_insn (gen_addv4si3 (sp, sp, temp3));
4284 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4285 }
4286
4287 static void
4288 spu_init_libfuncs (void)
4289 {
4290 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4291 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4292 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4293 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4294 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4295 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4296 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4297 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4298 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4299 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4300 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4301
4302 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4303 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4304 }
4305
4306 /* Make a subreg, stripping any existing subreg. We could possibly just
4307 call simplify_subreg, but in this case we know what we want. */
4308 rtx
4309 spu_gen_subreg (enum machine_mode mode, rtx x)
4310 {
4311 if (GET_CODE (x) == SUBREG)
4312 x = SUBREG_REG (x);
4313 if (GET_MODE (x) == mode)
4314 return x;
4315 return gen_rtx_SUBREG (mode, x, 0);
4316 }
4317
4318 static bool
4319 spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
4320 {
4321 return (TYPE_MODE (type) == BLKmode
4322 && ((type) == 0
4323 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4324 || int_size_in_bytes (type) >
4325 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4326 }
4327 \f
4328 /* Create the built-in types and functions */
4329
4330 struct spu_builtin_description spu_builtins[] = {
4331 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4332 {fcode, icode, name, type, params, NULL_TREE},
4333 #include "spu-builtins.def"
4334 #undef DEF_BUILTIN
4335 };
4336
4337 static void
4338 spu_init_builtins (void)
4339 {
4340 struct spu_builtin_description *d;
4341 unsigned int i;
4342
4343 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4344 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4345 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4346 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4347 V4SF_type_node = build_vector_type (float_type_node, 4);
4348 V2DF_type_node = build_vector_type (double_type_node, 2);
4349
4350 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4351 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4352 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4353 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4354
4355 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4356
4357 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4358 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4359 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4360 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4361 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4362 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4363 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4364 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4365 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4366 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4367 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4368 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4369
4370 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4371 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4372 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4373 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4374 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4375 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4376 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4377 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4378
4379 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4380 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4381
4382 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4383
4384 spu_builtin_types[SPU_BTI_PTR] =
4385 build_pointer_type (build_qualified_type
4386 (void_type_node,
4387 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4388
4389 /* For each builtin we build a new prototype. The tree code will make
4390 sure nodes are shared. */
4391 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4392 {
4393 tree p;
4394 char name[64]; /* build_function will make a copy. */
4395 int parm;
4396
4397 if (d->name == 0)
4398 continue;
4399
4400 /* find last parm */
4401 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4402 {
4403 }
4404
4405 p = void_list_node;
4406 while (parm > 1)
4407 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4408
4409 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4410
4411 sprintf (name, "__builtin_%s", d->name);
4412 d->fndecl =
4413 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4414 NULL, NULL_TREE);
4415 if (d->fcode == SPU_MASK_FOR_LOAD)
4416 TREE_READONLY (d->fndecl) = 1;
4417 }
4418 }
4419
4420 void
4421 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4422 {
4423 static unsigned char arr[16] =
4424 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4425
4426 rtx temp = gen_reg_rtx (Pmode);
4427 rtx temp2 = gen_reg_rtx (V4SImode);
4428 rtx temp3 = gen_reg_rtx (V4SImode);
4429 rtx pat = gen_reg_rtx (TImode);
4430 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4431
4432 emit_move_insn (pat, array_to_constant (TImode, arr));
4433
4434 /* Restore the sp. */
4435 emit_move_insn (temp, op1);
4436 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4437
4438 /* Compute available stack size for sp. */
4439 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4440 emit_insn (gen_shufb (temp3, temp, temp, pat));
4441
4442 emit_insn (gen_addv4si3 (sp, sp, temp3));
4443 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4444 }
4445
4446 int
4447 spu_safe_dma (HOST_WIDE_INT channel)
4448 {
4449 return (channel >= 21 && channel <= 27);
4450 }
4451
4452 void
4453 spu_builtin_splats (rtx ops[])
4454 {
4455 enum machine_mode mode = GET_MODE (ops[0]);
4456 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4457 {
4458 unsigned char arr[16];
4459 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4460 emit_move_insn (ops[0], array_to_constant (mode, arr));
4461 }
4462 else if (!flag_pic && GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4463 {
4464 rtvec v = rtvec_alloc (4);
4465 RTVEC_ELT (v, 0) = ops[1];
4466 RTVEC_ELT (v, 1) = ops[1];
4467 RTVEC_ELT (v, 2) = ops[1];
4468 RTVEC_ELT (v, 3) = ops[1];
4469 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4470 }
4471 else
4472 {
4473 rtx reg = gen_reg_rtx (TImode);
4474 rtx shuf;
4475 if (GET_CODE (ops[1]) != REG
4476 && GET_CODE (ops[1]) != SUBREG)
4477 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4478 switch (mode)
4479 {
4480 case V2DImode:
4481 case V2DFmode:
4482 shuf =
4483 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4484 TImode);
4485 break;
4486 case V4SImode:
4487 case V4SFmode:
4488 shuf =
4489 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4490 TImode);
4491 break;
4492 case V8HImode:
4493 shuf =
4494 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4495 TImode);
4496 break;
4497 case V16QImode:
4498 shuf =
4499 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4500 TImode);
4501 break;
4502 default:
4503 abort ();
4504 }
4505 emit_move_insn (reg, shuf);
4506 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4507 }
4508 }
4509
4510 void
4511 spu_builtin_extract (rtx ops[])
4512 {
4513 enum machine_mode mode;
4514 rtx rot, from, tmp;
4515
4516 mode = GET_MODE (ops[1]);
4517
4518 if (GET_CODE (ops[2]) == CONST_INT)
4519 {
4520 switch (mode)
4521 {
4522 case V16QImode:
4523 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4524 break;
4525 case V8HImode:
4526 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4527 break;
4528 case V4SFmode:
4529 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4530 break;
4531 case V4SImode:
4532 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4533 break;
4534 case V2DImode:
4535 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4536 break;
4537 case V2DFmode:
4538 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4539 break;
4540 default:
4541 abort ();
4542 }
4543 return;
4544 }
4545
4546 from = spu_gen_subreg (TImode, ops[1]);
4547 rot = gen_reg_rtx (TImode);
4548 tmp = gen_reg_rtx (SImode);
4549
4550 switch (mode)
4551 {
4552 case V16QImode:
4553 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4554 break;
4555 case V8HImode:
4556 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4557 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4558 break;
4559 case V4SFmode:
4560 case V4SImode:
4561 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4562 break;
4563 case V2DImode:
4564 case V2DFmode:
4565 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4566 break;
4567 default:
4568 abort ();
4569 }
4570 emit_insn (gen_rotqby_ti (rot, from, tmp));
4571
4572 emit_insn (gen_spu_convert (ops[0], rot));
4573 }
4574
4575 void
4576 spu_builtin_insert (rtx ops[])
4577 {
4578 enum machine_mode mode = GET_MODE (ops[0]);
4579 enum machine_mode imode = GET_MODE_INNER (mode);
4580 rtx mask = gen_reg_rtx (TImode);
4581 rtx offset;
4582
4583 if (GET_CODE (ops[3]) == CONST_INT)
4584 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4585 else
4586 {
4587 offset = gen_reg_rtx (SImode);
4588 emit_insn (gen_mulsi3
4589 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4590 }
4591 emit_insn (gen_cpat
4592 (mask, stack_pointer_rtx, offset,
4593 GEN_INT (GET_MODE_SIZE (imode))));
4594 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4595 }
4596
4597 void
4598 spu_builtin_promote (rtx ops[])
4599 {
4600 enum machine_mode mode, imode;
4601 rtx rot, from, offset;
4602 HOST_WIDE_INT pos;
4603
4604 mode = GET_MODE (ops[0]);
4605 imode = GET_MODE_INNER (mode);
4606
4607 from = gen_reg_rtx (TImode);
4608 rot = spu_gen_subreg (TImode, ops[0]);
4609
4610 emit_insn (gen_spu_convert (from, ops[1]));
4611
4612 if (GET_CODE (ops[2]) == CONST_INT)
4613 {
4614 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4615 if (GET_MODE_SIZE (imode) < 4)
4616 pos += 4 - GET_MODE_SIZE (imode);
4617 offset = GEN_INT (pos & 15);
4618 }
4619 else
4620 {
4621 offset = gen_reg_rtx (SImode);
4622 switch (mode)
4623 {
4624 case V16QImode:
4625 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4626 break;
4627 case V8HImode:
4628 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4629 emit_insn (gen_addsi3 (offset, offset, offset));
4630 break;
4631 case V4SFmode:
4632 case V4SImode:
4633 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4634 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4635 break;
4636 case V2DImode:
4637 case V2DFmode:
4638 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4639 break;
4640 default:
4641 abort ();
4642 }
4643 }
4644 emit_insn (gen_rotqby_ti (rot, from, offset));
4645 }
4646
4647 void
4648 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4649 {
4650 rtx shuf = gen_reg_rtx (V4SImode);
4651 rtx insn = gen_reg_rtx (V4SImode);
4652 rtx shufc;
4653 rtx insnc;
4654 rtx mem;
4655
4656 fnaddr = force_reg (SImode, fnaddr);
4657 cxt = force_reg (SImode, cxt);
4658
4659 if (TARGET_LARGE_MEM)
4660 {
4661 rtx rotl = gen_reg_rtx (V4SImode);
4662 rtx mask = gen_reg_rtx (V4SImode);
4663 rtx bi = gen_reg_rtx (SImode);
4664 unsigned char shufa[16] = {
4665 2, 3, 0, 1, 18, 19, 16, 17,
4666 0, 1, 2, 3, 16, 17, 18, 19
4667 };
4668 unsigned char insna[16] = {
4669 0x41, 0, 0, 79,
4670 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4671 0x60, 0x80, 0, 79,
4672 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4673 };
4674
4675 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4676 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4677
4678 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4679 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4680 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4681 emit_insn (gen_selb (insn, insnc, rotl, mask));
4682
4683 mem = memory_address (Pmode, tramp);
4684 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4685
4686 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4687 mem = memory_address (Pmode, plus_constant (tramp, 16));
4688 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4689 }
4690 else
4691 {
4692 rtx scxt = gen_reg_rtx (SImode);
4693 rtx sfnaddr = gen_reg_rtx (SImode);
4694 unsigned char insna[16] = {
4695 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4696 0x30, 0, 0, 0,
4697 0, 0, 0, 0,
4698 0, 0, 0, 0
4699 };
4700
4701 shufc = gen_reg_rtx (TImode);
4702 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4703
4704 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4705 fits 18 bits and the last 4 are zeros. This will be true if
4706 the stack pointer is initialized to 0x3fff0 at program start,
4707 otherwise the ila instruction will be garbage. */
4708
4709 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4710 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4711 emit_insn (gen_cpat
4712 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4713 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4714 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4715
4716 mem = memory_address (Pmode, tramp);
4717 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4718
4719 }
4720 emit_insn (gen_sync ());
4721 }
4722
4723 void
4724 spu_expand_sign_extend (rtx ops[])
4725 {
4726 unsigned char arr[16];
4727 rtx pat = gen_reg_rtx (TImode);
4728 rtx sign, c;
4729 int i, last;
4730 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4731 if (GET_MODE (ops[1]) == QImode)
4732 {
4733 sign = gen_reg_rtx (HImode);
4734 emit_insn (gen_extendqihi2 (sign, ops[1]));
4735 for (i = 0; i < 16; i++)
4736 arr[i] = 0x12;
4737 arr[last] = 0x13;
4738 }
4739 else
4740 {
4741 for (i = 0; i < 16; i++)
4742 arr[i] = 0x10;
4743 switch (GET_MODE (ops[1]))
4744 {
4745 case HImode:
4746 sign = gen_reg_rtx (SImode);
4747 emit_insn (gen_extendhisi2 (sign, ops[1]));
4748 arr[last] = 0x03;
4749 arr[last - 1] = 0x02;
4750 break;
4751 case SImode:
4752 sign = gen_reg_rtx (SImode);
4753 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4754 for (i = 0; i < 4; i++)
4755 arr[last - i] = 3 - i;
4756 break;
4757 case DImode:
4758 sign = gen_reg_rtx (SImode);
4759 c = gen_reg_rtx (SImode);
4760 emit_insn (gen_spu_convert (c, ops[1]));
4761 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4762 for (i = 0; i < 8; i++)
4763 arr[last - i] = 7 - i;
4764 break;
4765 default:
4766 abort ();
4767 }
4768 }
4769 emit_move_insn (pat, array_to_constant (TImode, arr));
4770 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4771 }
4772
4773 /* expand vector initialization. If there are any constant parts,
4774 load constant parts first. Then load any non-constant parts. */
4775 void
4776 spu_expand_vector_init (rtx target, rtx vals)
4777 {
4778 enum machine_mode mode = GET_MODE (target);
4779 int n_elts = GET_MODE_NUNITS (mode);
4780 int n_var = 0;
4781 bool all_same = true;
4782 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4783 int i;
4784
4785 first = XVECEXP (vals, 0, 0);
4786 for (i = 0; i < n_elts; ++i)
4787 {
4788 x = XVECEXP (vals, 0, i);
4789 if (!CONSTANT_P (x))
4790 ++n_var;
4791 else
4792 {
4793 if (first_constant == NULL_RTX)
4794 first_constant = x;
4795 }
4796 if (i > 0 && !rtx_equal_p (x, first))
4797 all_same = false;
4798 }
4799
4800 /* if all elements are the same, use splats to repeat elements */
4801 if (all_same)
4802 {
4803 if (!CONSTANT_P (first)
4804 && !register_operand (first, GET_MODE (x)))
4805 first = force_reg (GET_MODE (first), first);
4806 emit_insn (gen_spu_splats (target, first));
4807 return;
4808 }
4809
4810 /* load constant parts */
4811 if (n_var != n_elts)
4812 {
4813 if (n_var == 0)
4814 {
4815 emit_move_insn (target,
4816 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4817 }
4818 else
4819 {
4820 rtx constant_parts_rtx = copy_rtx (vals);
4821
4822 gcc_assert (first_constant != NULL_RTX);
4823 /* fill empty slots with the first constant, this increases
4824 our chance of using splats in the recursive call below. */
4825 for (i = 0; i < n_elts; ++i)
4826 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4827 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4828
4829 spu_expand_vector_init (target, constant_parts_rtx);
4830 }
4831 }
4832
4833 /* load variable parts */
4834 if (n_var != 0)
4835 {
4836 rtx insert_operands[4];
4837
4838 insert_operands[0] = target;
4839 insert_operands[2] = target;
4840 for (i = 0; i < n_elts; ++i)
4841 {
4842 x = XVECEXP (vals, 0, i);
4843 if (!CONSTANT_P (x))
4844 {
4845 if (!register_operand (x, GET_MODE (x)))
4846 x = force_reg (GET_MODE (x), x);
4847 insert_operands[1] = x;
4848 insert_operands[3] = GEN_INT (i);
4849 spu_builtin_insert (insert_operands);
4850 }
4851 }
4852 }
4853 }
4854
4855 static rtx
4856 spu_force_reg (enum machine_mode mode, rtx op)
4857 {
4858 rtx x, r;
4859 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
4860 {
4861 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
4862 || GET_MODE (op) == BLKmode)
4863 return force_reg (mode, convert_to_mode (mode, op, 0));
4864 abort ();
4865 }
4866
4867 r = force_reg (GET_MODE (op), op);
4868 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
4869 {
4870 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
4871 if (x)
4872 return x;
4873 }
4874
4875 x = gen_reg_rtx (mode);
4876 emit_insn (gen_spu_convert (x, r));
4877 return x;
4878 }
4879
4880 static void
4881 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
4882 {
4883 HOST_WIDE_INT v = 0;
4884 int lsbits;
4885 /* Check the range of immediate operands. */
4886 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
4887 {
4888 int range = p - SPU_BTI_7;
4889
4890 if (!CONSTANT_P (op))
4891 error ("%s expects an integer literal in the range [%d, %d].",
4892 d->name,
4893 spu_builtin_range[range].low, spu_builtin_range[range].high);
4894
4895 if (GET_CODE (op) == CONST
4896 && (GET_CODE (XEXP (op, 0)) == PLUS
4897 || GET_CODE (XEXP (op, 0)) == MINUS))
4898 {
4899 v = INTVAL (XEXP (XEXP (op, 0), 1));
4900 op = XEXP (XEXP (op, 0), 0);
4901 }
4902 else if (GET_CODE (op) == CONST_INT)
4903 v = INTVAL (op);
4904 else if (GET_CODE (op) == CONST_VECTOR
4905 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
4906 v = INTVAL (CONST_VECTOR_ELT (op, 0));
4907
4908 /* The default for v is 0 which is valid in every range. */
4909 if (v < spu_builtin_range[range].low
4910 || v > spu_builtin_range[range].high)
4911 error ("%s expects an integer literal in the range [%d, %d]. ("
4912 HOST_WIDE_INT_PRINT_DEC ")",
4913 d->name,
4914 spu_builtin_range[range].low, spu_builtin_range[range].high,
4915 v);
4916
4917 switch (p)
4918 {
4919 case SPU_BTI_S10_4:
4920 lsbits = 4;
4921 break;
4922 case SPU_BTI_U16_2:
4923 /* This is only used in lqa, and stqa. Even though the insns
4924 encode 16 bits of the address (all but the 2 least
4925 significant), only 14 bits are used because it is masked to
4926 be 16 byte aligned. */
4927 lsbits = 4;
4928 break;
4929 case SPU_BTI_S16_2:
4930 /* This is used for lqr and stqr. */
4931 lsbits = 2;
4932 break;
4933 default:
4934 lsbits = 0;
4935 }
4936
4937 if (GET_CODE (op) == LABEL_REF
4938 || (GET_CODE (op) == SYMBOL_REF
4939 && SYMBOL_REF_FUNCTION_P (op))
4940 || (v & ((1 << lsbits) - 1)) != 0)
4941 warning (0, "%d least significant bits of %s are ignored.", lsbits,
4942 d->name);
4943 }
4944 }
4945
4946
4947 static void
4948 expand_builtin_args (struct spu_builtin_description *d, tree exp,
4949 rtx target, rtx ops[])
4950 {
4951 enum insn_code icode = d->icode;
4952 int i = 0, a;
4953
4954 /* Expand the arguments into rtl. */
4955
4956 if (d->parm[0] != SPU_BTI_VOID)
4957 ops[i++] = target;
4958
4959 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
4960 {
4961 tree arg = CALL_EXPR_ARG (exp, a);
4962 if (arg == 0)
4963 abort ();
4964 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
4965 }
4966 }
4967
4968 static rtx
4969 spu_expand_builtin_1 (struct spu_builtin_description *d,
4970 tree exp, rtx target)
4971 {
4972 rtx pat;
4973 rtx ops[8];
4974 enum insn_code icode = d->icode;
4975 enum machine_mode mode, tmode;
4976 int i, p;
4977 tree return_type;
4978
4979 /* Set up ops[] with values from arglist. */
4980 expand_builtin_args (d, exp, target, ops);
4981
4982 /* Handle the target operand which must be operand 0. */
4983 i = 0;
4984 if (d->parm[0] != SPU_BTI_VOID)
4985 {
4986
4987 /* We prefer the mode specified for the match_operand otherwise
4988 use the mode from the builtin function prototype. */
4989 tmode = insn_data[d->icode].operand[0].mode;
4990 if (tmode == VOIDmode)
4991 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
4992
4993 /* Try to use target because not using it can lead to extra copies
4994 and when we are using all of the registers extra copies leads
4995 to extra spills. */
4996 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
4997 ops[0] = target;
4998 else
4999 target = ops[0] = gen_reg_rtx (tmode);
5000
5001 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5002 abort ();
5003
5004 i++;
5005 }
5006
5007 if (d->fcode == SPU_MASK_FOR_LOAD)
5008 {
5009 enum machine_mode mode = insn_data[icode].operand[1].mode;
5010 tree arg;
5011 rtx addr, op, pat;
5012
5013 /* get addr */
5014 arg = CALL_EXPR_ARG (exp, 0);
5015 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5016 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5017 addr = memory_address (mode, op);
5018
5019 /* negate addr */
5020 op = gen_reg_rtx (GET_MODE (addr));
5021 emit_insn (gen_rtx_SET (VOIDmode, op,
5022 gen_rtx_NEG (GET_MODE (addr), addr)));
5023 op = gen_rtx_MEM (mode, op);
5024
5025 pat = GEN_FCN (icode) (target, op);
5026 if (!pat)
5027 return 0;
5028 emit_insn (pat);
5029 return target;
5030 }
5031
5032 /* Ignore align_hint, but still expand it's args in case they have
5033 side effects. */
5034 if (icode == CODE_FOR_spu_align_hint)
5035 return 0;
5036
5037 /* Handle the rest of the operands. */
5038 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5039 {
5040 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5041 mode = insn_data[d->icode].operand[i].mode;
5042 else
5043 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5044
5045 /* mode can be VOIDmode here for labels */
5046
5047 /* For specific intrinsics with an immediate operand, e.g.,
5048 si_ai(), we sometimes need to convert the scalar argument to a
5049 vector argument by splatting the scalar. */
5050 if (VECTOR_MODE_P (mode)
5051 && (GET_CODE (ops[i]) == CONST_INT
5052 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5053 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5054 {
5055 if (GET_CODE (ops[i]) == CONST_INT)
5056 ops[i] = spu_const (mode, INTVAL (ops[i]));
5057 else
5058 {
5059 rtx reg = gen_reg_rtx (mode);
5060 enum machine_mode imode = GET_MODE_INNER (mode);
5061 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5062 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5063 if (imode != GET_MODE (ops[i]))
5064 ops[i] = convert_to_mode (imode, ops[i],
5065 TYPE_UNSIGNED (spu_builtin_types
5066 [d->parm[i]]));
5067 emit_insn (gen_spu_splats (reg, ops[i]));
5068 ops[i] = reg;
5069 }
5070 }
5071
5072 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5073
5074 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5075 ops[i] = spu_force_reg (mode, ops[i]);
5076 }
5077
5078 switch (insn_data[icode].n_operands)
5079 {
5080 case 0:
5081 pat = GEN_FCN (icode) (0);
5082 break;
5083 case 1:
5084 pat = GEN_FCN (icode) (ops[0]);
5085 break;
5086 case 2:
5087 pat = GEN_FCN (icode) (ops[0], ops[1]);
5088 break;
5089 case 3:
5090 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5091 break;
5092 case 4:
5093 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5094 break;
5095 case 5:
5096 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5097 break;
5098 case 6:
5099 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5100 break;
5101 default:
5102 abort ();
5103 }
5104
5105 if (!pat)
5106 abort ();
5107
5108 if (d->type == B_CALL || d->type == B_BISLED)
5109 emit_call_insn (pat);
5110 else if (d->type == B_JUMP)
5111 {
5112 emit_jump_insn (pat);
5113 emit_barrier ();
5114 }
5115 else
5116 emit_insn (pat);
5117
5118 return_type = spu_builtin_types[d->parm[0]];
5119 if (d->parm[0] != SPU_BTI_VOID
5120 && GET_MODE (target) != TYPE_MODE (return_type))
5121 {
5122 /* target is the return value. It should always be the mode of
5123 the builtin function prototype. */
5124 target = spu_force_reg (TYPE_MODE (return_type), target);
5125 }
5126
5127 return target;
5128 }
5129
5130 rtx
5131 spu_expand_builtin (tree exp,
5132 rtx target,
5133 rtx subtarget ATTRIBUTE_UNUSED,
5134 enum machine_mode mode ATTRIBUTE_UNUSED,
5135 int ignore ATTRIBUTE_UNUSED)
5136 {
5137 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5138 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5139 struct spu_builtin_description *d;
5140
5141 if (fcode < NUM_SPU_BUILTINS)
5142 {
5143 d = &spu_builtins[fcode];
5144
5145 return spu_expand_builtin_1 (d, exp, target);
5146 }
5147 abort ();
5148 }
5149
5150 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5151 static tree
5152 spu_builtin_mul_widen_even (tree type)
5153 {
5154 switch (TYPE_MODE (type))
5155 {
5156 case V8HImode:
5157 if (TYPE_UNSIGNED (type))
5158 return spu_builtins[SPU_MULE_0].fndecl;
5159 else
5160 return spu_builtins[SPU_MULE_1].fndecl;
5161 break;
5162 default:
5163 return NULL_TREE;
5164 }
5165 }
5166
5167 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5168 static tree
5169 spu_builtin_mul_widen_odd (tree type)
5170 {
5171 switch (TYPE_MODE (type))
5172 {
5173 case V8HImode:
5174 if (TYPE_UNSIGNED (type))
5175 return spu_builtins[SPU_MULO_1].fndecl;
5176 else
5177 return spu_builtins[SPU_MULO_0].fndecl;
5178 break;
5179 default:
5180 return NULL_TREE;
5181 }
5182 }
5183
5184 /* Implement targetm.vectorize.builtin_mask_for_load. */
5185 static tree
5186 spu_builtin_mask_for_load (void)
5187 {
5188 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5189 gcc_assert (d);
5190 return d->fndecl;
5191 }
5192
5193 void
5194 spu_init_expanders (void)
5195 {
5196 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5197 * frame_pointer_needed is true. We don't know that until we're
5198 * expanding the prologue. */
5199 if (cfun)
5200 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
5201 }