]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
bf15631d171abf8c58c2adb4322f2a529c0f9ecd
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2014 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "tree.h"
31 #include "stringpool.h"
32 #include "stor-layout.h"
33 #include "calls.h"
34 #include "varasm.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "except.h"
38 #include "hashtab.h"
39 #include "hash-set.h"
40 #include "vec.h"
41 #include "machmode.h"
42 #include "input.h"
43 #include "function.h"
44 #include "output.h"
45 #include "basic-block.h"
46 #include "diagnostic-core.h"
47 #include "ggc.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "langhooks.h"
52 #include "reload.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "hash-table.h"
56 #include "tree-ssa-alias.h"
57 #include "internal-fn.h"
58 #include "gimple-fold.h"
59 #include "tree-eh.h"
60 #include "gimple-expr.h"
61 #include "is-a.h"
62 #include "gimple.h"
63 #include "gimplify.h"
64 #include "tm-constrs.h"
65 #include "ddg.h"
66 #include "sbitmap.h"
67 #include "timevar.h"
68 #include "df.h"
69 #include "dumpfile.h"
70 #include "cfgloop.h"
71 #include "builtins.h"
72
73 /* Builtin types, data and prototypes. */
74
75 enum spu_builtin_type_index
76 {
77 SPU_BTI_END_OF_PARAMS,
78
79 /* We create new type nodes for these. */
80 SPU_BTI_V16QI,
81 SPU_BTI_V8HI,
82 SPU_BTI_V4SI,
83 SPU_BTI_V2DI,
84 SPU_BTI_V4SF,
85 SPU_BTI_V2DF,
86 SPU_BTI_UV16QI,
87 SPU_BTI_UV8HI,
88 SPU_BTI_UV4SI,
89 SPU_BTI_UV2DI,
90
91 /* A 16-byte type. (Implemented with V16QI_type_node) */
92 SPU_BTI_QUADWORD,
93
94 /* These all correspond to intSI_type_node */
95 SPU_BTI_7,
96 SPU_BTI_S7,
97 SPU_BTI_U7,
98 SPU_BTI_S10,
99 SPU_BTI_S10_4,
100 SPU_BTI_U14,
101 SPU_BTI_16,
102 SPU_BTI_S16,
103 SPU_BTI_S16_2,
104 SPU_BTI_U16,
105 SPU_BTI_U16_2,
106 SPU_BTI_U18,
107
108 /* These correspond to the standard types */
109 SPU_BTI_INTQI,
110 SPU_BTI_INTHI,
111 SPU_BTI_INTSI,
112 SPU_BTI_INTDI,
113
114 SPU_BTI_UINTQI,
115 SPU_BTI_UINTHI,
116 SPU_BTI_UINTSI,
117 SPU_BTI_UINTDI,
118
119 SPU_BTI_FLOAT,
120 SPU_BTI_DOUBLE,
121
122 SPU_BTI_VOID,
123 SPU_BTI_PTR,
124
125 SPU_BTI_MAX
126 };
127
128 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
129 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
130 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
131 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
132 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
133 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
134 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
135 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
136 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
137 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
138
139 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
140
141 struct spu_builtin_range
142 {
143 int low, high;
144 };
145
146 static struct spu_builtin_range spu_builtin_range[] = {
147 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
148 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
149 {0ll, 0x7fll}, /* SPU_BTI_U7 */
150 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
151 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
152 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
153 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
154 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
155 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
156 {0ll, 0xffffll}, /* SPU_BTI_U16 */
157 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
158 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
159 };
160
161 \f
162 /* Target specific attribute specifications. */
163 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
164
165 /* Prototypes and external defs. */
166 static int get_pipe (rtx_insn *insn);
167 static int spu_naked_function_p (tree func);
168 static int mem_is_padded_component_ref (rtx x);
169 static void fix_range (const char *);
170 static rtx spu_expand_load (rtx, rtx, rtx, int);
171
172 /* Which instruction set architecture to use. */
173 int spu_arch;
174 /* Which cpu are we tuning for. */
175 int spu_tune;
176
177 /* The hardware requires 8 insns between a hint and the branch it
178 effects. This variable describes how many rtl instructions the
179 compiler needs to see before inserting a hint, and then the compiler
180 will insert enough nops to make it at least 8 insns. The default is
181 for the compiler to allow up to 2 nops be emitted. The nops are
182 inserted in pairs, so we round down. */
183 int spu_hint_dist = (8*4) - (2*4);
184
185 enum spu_immediate {
186 SPU_NONE,
187 SPU_IL,
188 SPU_ILA,
189 SPU_ILH,
190 SPU_ILHU,
191 SPU_ORI,
192 SPU_ORHI,
193 SPU_ORBI,
194 SPU_IOHL
195 };
196 enum immediate_class
197 {
198 IC_POOL, /* constant pool */
199 IC_IL1, /* one il* instruction */
200 IC_IL2, /* both ilhu and iohl instructions */
201 IC_IL1s, /* one il* instruction */
202 IC_IL2s, /* both ilhu and iohl instructions */
203 IC_FSMBI, /* the fsmbi instruction */
204 IC_CPAT, /* one of the c*d instructions */
205 IC_FSMBI2 /* fsmbi plus 1 other instruction */
206 };
207
208 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
209 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
210 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
211 static enum immediate_class classify_immediate (rtx op,
212 enum machine_mode mode);
213
214 /* Pointer mode for __ea references. */
215 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
216
217 \f
218 /* Define the structure for the machine field in struct function. */
219 struct GTY(()) machine_function
220 {
221 /* Register to use for PIC accesses. */
222 rtx pic_reg;
223 };
224
225 /* How to allocate a 'struct machine_function'. */
226 static struct machine_function *
227 spu_init_machine_status (void)
228 {
229 return ggc_cleared_alloc<machine_function> ();
230 }
231
232 /* Implement TARGET_OPTION_OVERRIDE. */
233 static void
234 spu_option_override (void)
235 {
236 /* Set up function hooks. */
237 init_machine_status = spu_init_machine_status;
238
239 /* Small loops will be unpeeled at -O3. For SPU it is more important
240 to keep code small by default. */
241 if (!flag_unroll_loops && !flag_peel_loops)
242 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
243 global_options.x_param_values,
244 global_options_set.x_param_values);
245
246 flag_omit_frame_pointer = 1;
247
248 /* Functions must be 8 byte aligned so we correctly handle dual issue */
249 if (align_functions < 8)
250 align_functions = 8;
251
252 spu_hint_dist = 8*4 - spu_max_nops*4;
253 if (spu_hint_dist < 0)
254 spu_hint_dist = 0;
255
256 if (spu_fixed_range_string)
257 fix_range (spu_fixed_range_string);
258
259 /* Determine processor architectural level. */
260 if (spu_arch_string)
261 {
262 if (strcmp (&spu_arch_string[0], "cell") == 0)
263 spu_arch = PROCESSOR_CELL;
264 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
265 spu_arch = PROCESSOR_CELLEDP;
266 else
267 error ("bad value (%s) for -march= switch", spu_arch_string);
268 }
269
270 /* Determine processor to tune for. */
271 if (spu_tune_string)
272 {
273 if (strcmp (&spu_tune_string[0], "cell") == 0)
274 spu_tune = PROCESSOR_CELL;
275 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
276 spu_tune = PROCESSOR_CELLEDP;
277 else
278 error ("bad value (%s) for -mtune= switch", spu_tune_string);
279 }
280
281 /* Change defaults according to the processor architecture. */
282 if (spu_arch == PROCESSOR_CELLEDP)
283 {
284 /* If no command line option has been otherwise specified, change
285 the default to -mno-safe-hints on celledp -- only the original
286 Cell/B.E. processors require this workaround. */
287 if (!(target_flags_explicit & MASK_SAFE_HINTS))
288 target_flags &= ~MASK_SAFE_HINTS;
289 }
290
291 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
292 }
293 \f
294 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
295 struct attribute_spec.handler. */
296
297 /* True if MODE is valid for the target. By "valid", we mean able to
298 be manipulated in non-trivial ways. In particular, this means all
299 the arithmetic is supported. */
300 static bool
301 spu_scalar_mode_supported_p (enum machine_mode mode)
302 {
303 switch (mode)
304 {
305 case QImode:
306 case HImode:
307 case SImode:
308 case SFmode:
309 case DImode:
310 case TImode:
311 case DFmode:
312 return true;
313
314 default:
315 return false;
316 }
317 }
318
319 /* Similarly for vector modes. "Supported" here is less strict. At
320 least some operations are supported; need to check optabs or builtins
321 for further details. */
322 static bool
323 spu_vector_mode_supported_p (enum machine_mode mode)
324 {
325 switch (mode)
326 {
327 case V16QImode:
328 case V8HImode:
329 case V4SImode:
330 case V2DImode:
331 case V4SFmode:
332 case V2DFmode:
333 return true;
334
335 default:
336 return false;
337 }
338 }
339
340 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
341 least significant bytes of the outer mode. This function returns
342 TRUE for the SUBREG's where this is correct. */
343 int
344 valid_subreg (rtx op)
345 {
346 enum machine_mode om = GET_MODE (op);
347 enum machine_mode im = GET_MODE (SUBREG_REG (op));
348 return om != VOIDmode && im != VOIDmode
349 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
350 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
351 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
352 }
353
354 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
355 and adjust the start offset. */
356 static rtx
357 adjust_operand (rtx op, HOST_WIDE_INT * start)
358 {
359 enum machine_mode mode;
360 int op_size;
361 /* Strip any paradoxical SUBREG. */
362 if (GET_CODE (op) == SUBREG
363 && (GET_MODE_BITSIZE (GET_MODE (op))
364 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
365 {
366 if (start)
367 *start -=
368 GET_MODE_BITSIZE (GET_MODE (op)) -
369 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
370 op = SUBREG_REG (op);
371 }
372 /* If it is smaller than SI, assure a SUBREG */
373 op_size = GET_MODE_BITSIZE (GET_MODE (op));
374 if (op_size < 32)
375 {
376 if (start)
377 *start += 32 - op_size;
378 op_size = 32;
379 }
380 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
381 mode = mode_for_size (op_size, MODE_INT, 0);
382 if (mode != GET_MODE (op))
383 op = gen_rtx_SUBREG (mode, op, 0);
384 return op;
385 }
386
387 void
388 spu_expand_extv (rtx ops[], int unsignedp)
389 {
390 rtx dst = ops[0], src = ops[1];
391 HOST_WIDE_INT width = INTVAL (ops[2]);
392 HOST_WIDE_INT start = INTVAL (ops[3]);
393 HOST_WIDE_INT align_mask;
394 rtx s0, s1, mask, r0;
395
396 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
397
398 if (MEM_P (src))
399 {
400 /* First, determine if we need 1 TImode load or 2. We need only 1
401 if the bits being extracted do not cross the alignment boundary
402 as determined by the MEM and its address. */
403
404 align_mask = -MEM_ALIGN (src);
405 if ((start & align_mask) == ((start + width - 1) & align_mask))
406 {
407 /* Alignment is sufficient for 1 load. */
408 s0 = gen_reg_rtx (TImode);
409 r0 = spu_expand_load (s0, 0, src, start / 8);
410 start &= 7;
411 if (r0)
412 emit_insn (gen_rotqby_ti (s0, s0, r0));
413 }
414 else
415 {
416 /* Need 2 loads. */
417 s0 = gen_reg_rtx (TImode);
418 s1 = gen_reg_rtx (TImode);
419 r0 = spu_expand_load (s0, s1, src, start / 8);
420 start &= 7;
421
422 gcc_assert (start + width <= 128);
423 if (r0)
424 {
425 rtx r1 = gen_reg_rtx (SImode);
426 mask = gen_reg_rtx (TImode);
427 emit_move_insn (mask, GEN_INT (-1));
428 emit_insn (gen_rotqby_ti (s0, s0, r0));
429 emit_insn (gen_rotqby_ti (s1, s1, r0));
430 if (GET_CODE (r0) == CONST_INT)
431 r1 = GEN_INT (INTVAL (r0) & 15);
432 else
433 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
434 emit_insn (gen_shlqby_ti (mask, mask, r1));
435 emit_insn (gen_selb (s0, s1, s0, mask));
436 }
437 }
438
439 }
440 else if (GET_CODE (src) == SUBREG)
441 {
442 rtx r = SUBREG_REG (src);
443 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
444 s0 = gen_reg_rtx (TImode);
445 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
446 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
447 else
448 emit_move_insn (s0, src);
449 }
450 else
451 {
452 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
453 s0 = gen_reg_rtx (TImode);
454 emit_move_insn (s0, src);
455 }
456
457 /* Now s0 is TImode and contains the bits to extract at start. */
458
459 if (start)
460 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
461
462 if (128 - width)
463 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
464
465 emit_move_insn (dst, s0);
466 }
467
468 void
469 spu_expand_insv (rtx ops[])
470 {
471 HOST_WIDE_INT width = INTVAL (ops[1]);
472 HOST_WIDE_INT start = INTVAL (ops[2]);
473 HOST_WIDE_INT maskbits;
474 enum machine_mode dst_mode;
475 rtx dst = ops[0], src = ops[3];
476 int dst_size;
477 rtx mask;
478 rtx shift_reg;
479 int shift;
480
481
482 if (GET_CODE (ops[0]) == MEM)
483 dst = gen_reg_rtx (TImode);
484 else
485 dst = adjust_operand (dst, &start);
486 dst_mode = GET_MODE (dst);
487 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
488
489 if (CONSTANT_P (src))
490 {
491 enum machine_mode m =
492 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
493 src = force_reg (m, convert_to_mode (m, src, 0));
494 }
495 src = adjust_operand (src, 0);
496
497 mask = gen_reg_rtx (dst_mode);
498 shift_reg = gen_reg_rtx (dst_mode);
499 shift = dst_size - start - width;
500
501 /* It's not safe to use subreg here because the compiler assumes
502 that the SUBREG_REG is right justified in the SUBREG. */
503 convert_move (shift_reg, src, 1);
504
505 if (shift > 0)
506 {
507 switch (dst_mode)
508 {
509 case SImode:
510 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
511 break;
512 case DImode:
513 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 break;
515 case TImode:
516 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
517 break;
518 default:
519 abort ();
520 }
521 }
522 else if (shift < 0)
523 abort ();
524
525 switch (dst_size)
526 {
527 case 32:
528 maskbits = (-1ll << (32 - width - start));
529 if (start)
530 maskbits += (1ll << (32 - start));
531 emit_move_insn (mask, GEN_INT (maskbits));
532 break;
533 case 64:
534 maskbits = (-1ll << (64 - width - start));
535 if (start)
536 maskbits += (1ll << (64 - start));
537 emit_move_insn (mask, GEN_INT (maskbits));
538 break;
539 case 128:
540 {
541 unsigned char arr[16];
542 int i = start / 8;
543 memset (arr, 0, sizeof (arr));
544 arr[i] = 0xff >> (start & 7);
545 for (i++; i <= (start + width - 1) / 8; i++)
546 arr[i] = 0xff;
547 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
548 emit_move_insn (mask, array_to_constant (TImode, arr));
549 }
550 break;
551 default:
552 abort ();
553 }
554 if (GET_CODE (ops[0]) == MEM)
555 {
556 rtx low = gen_reg_rtx (SImode);
557 rtx rotl = gen_reg_rtx (SImode);
558 rtx mask0 = gen_reg_rtx (TImode);
559 rtx addr;
560 rtx addr0;
561 rtx addr1;
562 rtx mem;
563
564 addr = force_reg (Pmode, XEXP (ops[0], 0));
565 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
566 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
567 emit_insn (gen_negsi2 (rotl, low));
568 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
569 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
570 mem = change_address (ops[0], TImode, addr0);
571 set_mem_alias_set (mem, 0);
572 emit_move_insn (dst, mem);
573 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
574 if (start + width > MEM_ALIGN (ops[0]))
575 {
576 rtx shl = gen_reg_rtx (SImode);
577 rtx mask1 = gen_reg_rtx (TImode);
578 rtx dst1 = gen_reg_rtx (TImode);
579 rtx mem1;
580 addr1 = plus_constant (Pmode, addr, 16);
581 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
582 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
583 emit_insn (gen_shlqby_ti (mask1, mask, shl));
584 mem1 = change_address (ops[0], TImode, addr1);
585 set_mem_alias_set (mem1, 0);
586 emit_move_insn (dst1, mem1);
587 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
588 emit_move_insn (mem1, dst1);
589 }
590 emit_move_insn (mem, dst);
591 }
592 else
593 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
594 }
595
596
597 int
598 spu_expand_block_move (rtx ops[])
599 {
600 HOST_WIDE_INT bytes, align, offset;
601 rtx src, dst, sreg, dreg, target;
602 int i;
603 if (GET_CODE (ops[2]) != CONST_INT
604 || GET_CODE (ops[3]) != CONST_INT
605 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
606 return 0;
607
608 bytes = INTVAL (ops[2]);
609 align = INTVAL (ops[3]);
610
611 if (bytes <= 0)
612 return 1;
613
614 dst = ops[0];
615 src = ops[1];
616
617 if (align == 16)
618 {
619 for (offset = 0; offset + 16 <= bytes; offset += 16)
620 {
621 dst = adjust_address (ops[0], V16QImode, offset);
622 src = adjust_address (ops[1], V16QImode, offset);
623 emit_move_insn (dst, src);
624 }
625 if (offset < bytes)
626 {
627 rtx mask;
628 unsigned char arr[16] = { 0 };
629 for (i = 0; i < bytes - offset; i++)
630 arr[i] = 0xff;
631 dst = adjust_address (ops[0], V16QImode, offset);
632 src = adjust_address (ops[1], V16QImode, offset);
633 mask = gen_reg_rtx (V16QImode);
634 sreg = gen_reg_rtx (V16QImode);
635 dreg = gen_reg_rtx (V16QImode);
636 target = gen_reg_rtx (V16QImode);
637 emit_move_insn (mask, array_to_constant (V16QImode, arr));
638 emit_move_insn (dreg, dst);
639 emit_move_insn (sreg, src);
640 emit_insn (gen_selb (target, dreg, sreg, mask));
641 emit_move_insn (dst, target);
642 }
643 return 1;
644 }
645 return 0;
646 }
647
648 enum spu_comp_code
649 { SPU_EQ, SPU_GT, SPU_GTU };
650
651 int spu_comp_icode[12][3] = {
652 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
653 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
654 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
655 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
656 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
657 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
658 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
659 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
660 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
661 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
662 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
663 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
664 };
665
666 /* Generate a compare for CODE. Return a brand-new rtx that represents
667 the result of the compare. GCC can figure this out too if we don't
668 provide all variations of compares, but GCC always wants to use
669 WORD_MODE, we can generate better code in most cases if we do it
670 ourselves. */
671 void
672 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
673 {
674 int reverse_compare = 0;
675 int reverse_test = 0;
676 rtx compare_result, eq_result;
677 rtx comp_rtx, eq_rtx;
678 enum machine_mode comp_mode;
679 enum machine_mode op_mode;
680 enum spu_comp_code scode, eq_code;
681 enum insn_code ior_code;
682 enum rtx_code code = GET_CODE (cmp);
683 rtx op0 = XEXP (cmp, 0);
684 rtx op1 = XEXP (cmp, 1);
685 int index;
686 int eq_test = 0;
687
688 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
689 and so on, to keep the constant in operand 1. */
690 if (GET_CODE (op1) == CONST_INT)
691 {
692 HOST_WIDE_INT val = INTVAL (op1) - 1;
693 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
694 switch (code)
695 {
696 case GE:
697 op1 = GEN_INT (val);
698 code = GT;
699 break;
700 case LT:
701 op1 = GEN_INT (val);
702 code = LE;
703 break;
704 case GEU:
705 op1 = GEN_INT (val);
706 code = GTU;
707 break;
708 case LTU:
709 op1 = GEN_INT (val);
710 code = LEU;
711 break;
712 default:
713 break;
714 }
715 }
716
717 /* However, if we generate an integer result, performing a reverse test
718 would require an extra negation, so avoid that where possible. */
719 if (GET_CODE (op1) == CONST_INT && is_set == 1)
720 {
721 HOST_WIDE_INT val = INTVAL (op1) + 1;
722 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
723 switch (code)
724 {
725 case LE:
726 op1 = GEN_INT (val);
727 code = LT;
728 break;
729 case LEU:
730 op1 = GEN_INT (val);
731 code = LTU;
732 break;
733 default:
734 break;
735 }
736 }
737
738 comp_mode = SImode;
739 op_mode = GET_MODE (op0);
740
741 switch (code)
742 {
743 case GE:
744 scode = SPU_GT;
745 if (HONOR_NANS (op_mode))
746 {
747 reverse_compare = 0;
748 reverse_test = 0;
749 eq_test = 1;
750 eq_code = SPU_EQ;
751 }
752 else
753 {
754 reverse_compare = 1;
755 reverse_test = 1;
756 }
757 break;
758 case LE:
759 scode = SPU_GT;
760 if (HONOR_NANS (op_mode))
761 {
762 reverse_compare = 1;
763 reverse_test = 0;
764 eq_test = 1;
765 eq_code = SPU_EQ;
766 }
767 else
768 {
769 reverse_compare = 0;
770 reverse_test = 1;
771 }
772 break;
773 case LT:
774 reverse_compare = 1;
775 reverse_test = 0;
776 scode = SPU_GT;
777 break;
778 case GEU:
779 reverse_compare = 1;
780 reverse_test = 1;
781 scode = SPU_GTU;
782 break;
783 case LEU:
784 reverse_compare = 0;
785 reverse_test = 1;
786 scode = SPU_GTU;
787 break;
788 case LTU:
789 reverse_compare = 1;
790 reverse_test = 0;
791 scode = SPU_GTU;
792 break;
793 case NE:
794 reverse_compare = 0;
795 reverse_test = 1;
796 scode = SPU_EQ;
797 break;
798
799 case EQ:
800 scode = SPU_EQ;
801 break;
802 case GT:
803 scode = SPU_GT;
804 break;
805 case GTU:
806 scode = SPU_GTU;
807 break;
808 default:
809 scode = SPU_EQ;
810 break;
811 }
812
813 switch (op_mode)
814 {
815 case QImode:
816 index = 0;
817 comp_mode = QImode;
818 break;
819 case HImode:
820 index = 1;
821 comp_mode = HImode;
822 break;
823 case SImode:
824 index = 2;
825 break;
826 case DImode:
827 index = 3;
828 break;
829 case TImode:
830 index = 4;
831 break;
832 case SFmode:
833 index = 5;
834 break;
835 case DFmode:
836 index = 6;
837 break;
838 case V16QImode:
839 index = 7;
840 comp_mode = op_mode;
841 break;
842 case V8HImode:
843 index = 8;
844 comp_mode = op_mode;
845 break;
846 case V4SImode:
847 index = 9;
848 comp_mode = op_mode;
849 break;
850 case V4SFmode:
851 index = 10;
852 comp_mode = V4SImode;
853 break;
854 case V2DFmode:
855 index = 11;
856 comp_mode = V2DImode;
857 break;
858 case V2DImode:
859 default:
860 abort ();
861 }
862
863 if (GET_MODE (op1) == DFmode
864 && (scode != SPU_GT && scode != SPU_EQ))
865 abort ();
866
867 if (is_set == 0 && op1 == const0_rtx
868 && (GET_MODE (op0) == SImode
869 || GET_MODE (op0) == HImode
870 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
871 {
872 /* Don't need to set a register with the result when we are
873 comparing against zero and branching. */
874 reverse_test = !reverse_test;
875 compare_result = op0;
876 }
877 else
878 {
879 compare_result = gen_reg_rtx (comp_mode);
880
881 if (reverse_compare)
882 {
883 rtx t = op1;
884 op1 = op0;
885 op0 = t;
886 }
887
888 if (spu_comp_icode[index][scode] == 0)
889 abort ();
890
891 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
892 (op0, op_mode))
893 op0 = force_reg (op_mode, op0);
894 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
895 (op1, op_mode))
896 op1 = force_reg (op_mode, op1);
897 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
898 op0, op1);
899 if (comp_rtx == 0)
900 abort ();
901 emit_insn (comp_rtx);
902
903 if (eq_test)
904 {
905 eq_result = gen_reg_rtx (comp_mode);
906 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
907 op0, op1);
908 if (eq_rtx == 0)
909 abort ();
910 emit_insn (eq_rtx);
911 ior_code = optab_handler (ior_optab, comp_mode);
912 gcc_assert (ior_code != CODE_FOR_nothing);
913 emit_insn (GEN_FCN (ior_code)
914 (compare_result, compare_result, eq_result));
915 }
916 }
917
918 if (is_set == 0)
919 {
920 rtx bcomp;
921 rtx loc_ref;
922
923 /* We don't have branch on QI compare insns, so we convert the
924 QI compare result to a HI result. */
925 if (comp_mode == QImode)
926 {
927 rtx old_res = compare_result;
928 compare_result = gen_reg_rtx (HImode);
929 comp_mode = HImode;
930 emit_insn (gen_extendqihi2 (compare_result, old_res));
931 }
932
933 if (reverse_test)
934 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
935 else
936 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
937
938 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
939 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
940 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
941 loc_ref, pc_rtx)));
942 }
943 else if (is_set == 2)
944 {
945 rtx target = operands[0];
946 int compare_size = GET_MODE_BITSIZE (comp_mode);
947 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
948 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
949 rtx select_mask;
950 rtx op_t = operands[2];
951 rtx op_f = operands[3];
952
953 /* The result of the comparison can be SI, HI or QI mode. Create a
954 mask based on that result. */
955 if (target_size > compare_size)
956 {
957 select_mask = gen_reg_rtx (mode);
958 emit_insn (gen_extend_compare (select_mask, compare_result));
959 }
960 else if (target_size < compare_size)
961 select_mask =
962 gen_rtx_SUBREG (mode, compare_result,
963 (compare_size - target_size) / BITS_PER_UNIT);
964 else if (comp_mode != mode)
965 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
966 else
967 select_mask = compare_result;
968
969 if (GET_MODE (target) != GET_MODE (op_t)
970 || GET_MODE (target) != GET_MODE (op_f))
971 abort ();
972
973 if (reverse_test)
974 emit_insn (gen_selb (target, op_t, op_f, select_mask));
975 else
976 emit_insn (gen_selb (target, op_f, op_t, select_mask));
977 }
978 else
979 {
980 rtx target = operands[0];
981 if (reverse_test)
982 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
983 gen_rtx_NOT (comp_mode, compare_result)));
984 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
985 emit_insn (gen_extendhisi2 (target, compare_result));
986 else if (GET_MODE (target) == SImode
987 && GET_MODE (compare_result) == QImode)
988 emit_insn (gen_extend_compare (target, compare_result));
989 else
990 emit_move_insn (target, compare_result);
991 }
992 }
993
994 HOST_WIDE_INT
995 const_double_to_hwint (rtx x)
996 {
997 HOST_WIDE_INT val;
998 REAL_VALUE_TYPE rv;
999 if (GET_MODE (x) == SFmode)
1000 {
1001 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1002 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1003 }
1004 else if (GET_MODE (x) == DFmode)
1005 {
1006 long l[2];
1007 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1008 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1009 val = l[0];
1010 val = (val << 32) | (l[1] & 0xffffffff);
1011 }
1012 else
1013 abort ();
1014 return val;
1015 }
1016
1017 rtx
1018 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1019 {
1020 long tv[2];
1021 REAL_VALUE_TYPE rv;
1022 gcc_assert (mode == SFmode || mode == DFmode);
1023
1024 if (mode == SFmode)
1025 tv[0] = (v << 32) >> 32;
1026 else if (mode == DFmode)
1027 {
1028 tv[1] = (v << 32) >> 32;
1029 tv[0] = v >> 32;
1030 }
1031 real_from_target (&rv, tv, mode);
1032 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1033 }
1034
1035 void
1036 print_operand_address (FILE * file, register rtx addr)
1037 {
1038 rtx reg;
1039 rtx offset;
1040
1041 if (GET_CODE (addr) == AND
1042 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1043 && INTVAL (XEXP (addr, 1)) == -16)
1044 addr = XEXP (addr, 0);
1045
1046 switch (GET_CODE (addr))
1047 {
1048 case REG:
1049 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1050 break;
1051
1052 case PLUS:
1053 reg = XEXP (addr, 0);
1054 offset = XEXP (addr, 1);
1055 if (GET_CODE (offset) == REG)
1056 {
1057 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1058 reg_names[REGNO (offset)]);
1059 }
1060 else if (GET_CODE (offset) == CONST_INT)
1061 {
1062 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1063 INTVAL (offset), reg_names[REGNO (reg)]);
1064 }
1065 else
1066 abort ();
1067 break;
1068
1069 case CONST:
1070 case LABEL_REF:
1071 case SYMBOL_REF:
1072 case CONST_INT:
1073 output_addr_const (file, addr);
1074 break;
1075
1076 default:
1077 debug_rtx (addr);
1078 abort ();
1079 }
1080 }
1081
1082 void
1083 print_operand (FILE * file, rtx x, int code)
1084 {
1085 enum machine_mode mode = GET_MODE (x);
1086 HOST_WIDE_INT val;
1087 unsigned char arr[16];
1088 int xcode = GET_CODE (x);
1089 int i, info;
1090 if (GET_MODE (x) == VOIDmode)
1091 switch (code)
1092 {
1093 case 'L': /* 128 bits, signed */
1094 case 'm': /* 128 bits, signed */
1095 case 'T': /* 128 bits, signed */
1096 case 't': /* 128 bits, signed */
1097 mode = TImode;
1098 break;
1099 case 'K': /* 64 bits, signed */
1100 case 'k': /* 64 bits, signed */
1101 case 'D': /* 64 bits, signed */
1102 case 'd': /* 64 bits, signed */
1103 mode = DImode;
1104 break;
1105 case 'J': /* 32 bits, signed */
1106 case 'j': /* 32 bits, signed */
1107 case 's': /* 32 bits, signed */
1108 case 'S': /* 32 bits, signed */
1109 mode = SImode;
1110 break;
1111 }
1112 switch (code)
1113 {
1114
1115 case 'j': /* 32 bits, signed */
1116 case 'k': /* 64 bits, signed */
1117 case 'm': /* 128 bits, signed */
1118 if (xcode == CONST_INT
1119 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1120 {
1121 gcc_assert (logical_immediate_p (x, mode));
1122 constant_to_array (mode, x, arr);
1123 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1124 val = trunc_int_for_mode (val, SImode);
1125 switch (which_logical_immediate (val))
1126 {
1127 case SPU_ORI:
1128 break;
1129 case SPU_ORHI:
1130 fprintf (file, "h");
1131 break;
1132 case SPU_ORBI:
1133 fprintf (file, "b");
1134 break;
1135 default:
1136 gcc_unreachable();
1137 }
1138 }
1139 else
1140 gcc_unreachable();
1141 return;
1142
1143 case 'J': /* 32 bits, signed */
1144 case 'K': /* 64 bits, signed */
1145 case 'L': /* 128 bits, signed */
1146 if (xcode == CONST_INT
1147 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1148 {
1149 gcc_assert (logical_immediate_p (x, mode)
1150 || iohl_immediate_p (x, mode));
1151 constant_to_array (mode, x, arr);
1152 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1153 val = trunc_int_for_mode (val, SImode);
1154 switch (which_logical_immediate (val))
1155 {
1156 case SPU_ORI:
1157 case SPU_IOHL:
1158 break;
1159 case SPU_ORHI:
1160 val = trunc_int_for_mode (val, HImode);
1161 break;
1162 case SPU_ORBI:
1163 val = trunc_int_for_mode (val, QImode);
1164 break;
1165 default:
1166 gcc_unreachable();
1167 }
1168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1169 }
1170 else
1171 gcc_unreachable();
1172 return;
1173
1174 case 't': /* 128 bits, signed */
1175 case 'd': /* 64 bits, signed */
1176 case 's': /* 32 bits, signed */
1177 if (CONSTANT_P (x))
1178 {
1179 enum immediate_class c = classify_immediate (x, mode);
1180 switch (c)
1181 {
1182 case IC_IL1:
1183 constant_to_array (mode, x, arr);
1184 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1185 val = trunc_int_for_mode (val, SImode);
1186 switch (which_immediate_load (val))
1187 {
1188 case SPU_IL:
1189 break;
1190 case SPU_ILA:
1191 fprintf (file, "a");
1192 break;
1193 case SPU_ILH:
1194 fprintf (file, "h");
1195 break;
1196 case SPU_ILHU:
1197 fprintf (file, "hu");
1198 break;
1199 default:
1200 gcc_unreachable ();
1201 }
1202 break;
1203 case IC_CPAT:
1204 constant_to_array (mode, x, arr);
1205 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1206 if (info == 1)
1207 fprintf (file, "b");
1208 else if (info == 2)
1209 fprintf (file, "h");
1210 else if (info == 4)
1211 fprintf (file, "w");
1212 else if (info == 8)
1213 fprintf (file, "d");
1214 break;
1215 case IC_IL1s:
1216 if (xcode == CONST_VECTOR)
1217 {
1218 x = CONST_VECTOR_ELT (x, 0);
1219 xcode = GET_CODE (x);
1220 }
1221 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1222 fprintf (file, "a");
1223 else if (xcode == HIGH)
1224 fprintf (file, "hu");
1225 break;
1226 case IC_FSMBI:
1227 case IC_FSMBI2:
1228 case IC_IL2:
1229 case IC_IL2s:
1230 case IC_POOL:
1231 abort ();
1232 }
1233 }
1234 else
1235 gcc_unreachable ();
1236 return;
1237
1238 case 'T': /* 128 bits, signed */
1239 case 'D': /* 64 bits, signed */
1240 case 'S': /* 32 bits, signed */
1241 if (CONSTANT_P (x))
1242 {
1243 enum immediate_class c = classify_immediate (x, mode);
1244 switch (c)
1245 {
1246 case IC_IL1:
1247 constant_to_array (mode, x, arr);
1248 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1249 val = trunc_int_for_mode (val, SImode);
1250 switch (which_immediate_load (val))
1251 {
1252 case SPU_IL:
1253 case SPU_ILA:
1254 break;
1255 case SPU_ILH:
1256 case SPU_ILHU:
1257 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1258 break;
1259 default:
1260 gcc_unreachable ();
1261 }
1262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1263 break;
1264 case IC_FSMBI:
1265 constant_to_array (mode, x, arr);
1266 val = 0;
1267 for (i = 0; i < 16; i++)
1268 {
1269 val <<= 1;
1270 val |= arr[i] & 1;
1271 }
1272 print_operand (file, GEN_INT (val), 0);
1273 break;
1274 case IC_CPAT:
1275 constant_to_array (mode, x, arr);
1276 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1277 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1278 break;
1279 case IC_IL1s:
1280 if (xcode == HIGH)
1281 x = XEXP (x, 0);
1282 if (GET_CODE (x) == CONST_VECTOR)
1283 x = CONST_VECTOR_ELT (x, 0);
1284 output_addr_const (file, x);
1285 if (xcode == HIGH)
1286 fprintf (file, "@h");
1287 break;
1288 case IC_IL2:
1289 case IC_IL2s:
1290 case IC_FSMBI2:
1291 case IC_POOL:
1292 abort ();
1293 }
1294 }
1295 else
1296 gcc_unreachable ();
1297 return;
1298
1299 case 'C':
1300 if (xcode == CONST_INT)
1301 {
1302 /* Only 4 least significant bits are relevant for generate
1303 control word instructions. */
1304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1305 return;
1306 }
1307 break;
1308
1309 case 'M': /* print code for c*d */
1310 if (GET_CODE (x) == CONST_INT)
1311 switch (INTVAL (x))
1312 {
1313 case 1:
1314 fprintf (file, "b");
1315 break;
1316 case 2:
1317 fprintf (file, "h");
1318 break;
1319 case 4:
1320 fprintf (file, "w");
1321 break;
1322 case 8:
1323 fprintf (file, "d");
1324 break;
1325 default:
1326 gcc_unreachable();
1327 }
1328 else
1329 gcc_unreachable();
1330 return;
1331
1332 case 'N': /* Negate the operand */
1333 if (xcode == CONST_INT)
1334 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1335 else if (xcode == CONST_VECTOR)
1336 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1337 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1338 return;
1339
1340 case 'I': /* enable/disable interrupts */
1341 if (xcode == CONST_INT)
1342 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1343 return;
1344
1345 case 'b': /* branch modifiers */
1346 if (xcode == REG)
1347 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1348 else if (COMPARISON_P (x))
1349 fprintf (file, "%s", xcode == NE ? "n" : "");
1350 return;
1351
1352 case 'i': /* indirect call */
1353 if (xcode == MEM)
1354 {
1355 if (GET_CODE (XEXP (x, 0)) == REG)
1356 /* Used in indirect function calls. */
1357 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1358 else
1359 output_address (XEXP (x, 0));
1360 }
1361 return;
1362
1363 case 'p': /* load/store */
1364 if (xcode == MEM)
1365 {
1366 x = XEXP (x, 0);
1367 xcode = GET_CODE (x);
1368 }
1369 if (xcode == AND)
1370 {
1371 x = XEXP (x, 0);
1372 xcode = GET_CODE (x);
1373 }
1374 if (xcode == REG)
1375 fprintf (file, "d");
1376 else if (xcode == CONST_INT)
1377 fprintf (file, "a");
1378 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1379 fprintf (file, "r");
1380 else if (xcode == PLUS || xcode == LO_SUM)
1381 {
1382 if (GET_CODE (XEXP (x, 1)) == REG)
1383 fprintf (file, "x");
1384 else
1385 fprintf (file, "d");
1386 }
1387 return;
1388
1389 case 'e':
1390 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1391 val &= 0x7;
1392 output_addr_const (file, GEN_INT (val));
1393 return;
1394
1395 case 'f':
1396 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1397 val &= 0x1f;
1398 output_addr_const (file, GEN_INT (val));
1399 return;
1400
1401 case 'g':
1402 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1403 val &= 0x3f;
1404 output_addr_const (file, GEN_INT (val));
1405 return;
1406
1407 case 'h':
1408 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1409 val = (val >> 3) & 0x1f;
1410 output_addr_const (file, GEN_INT (val));
1411 return;
1412
1413 case 'E':
1414 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1415 val = -val;
1416 val &= 0x7;
1417 output_addr_const (file, GEN_INT (val));
1418 return;
1419
1420 case 'F':
1421 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1422 val = -val;
1423 val &= 0x1f;
1424 output_addr_const (file, GEN_INT (val));
1425 return;
1426
1427 case 'G':
1428 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1429 val = -val;
1430 val &= 0x3f;
1431 output_addr_const (file, GEN_INT (val));
1432 return;
1433
1434 case 'H':
1435 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1436 val = -(val & -8ll);
1437 val = (val >> 3) & 0x1f;
1438 output_addr_const (file, GEN_INT (val));
1439 return;
1440
1441 case 'v':
1442 case 'w':
1443 constant_to_array (mode, x, arr);
1444 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1445 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1446 return;
1447
1448 case 0:
1449 if (xcode == REG)
1450 fprintf (file, "%s", reg_names[REGNO (x)]);
1451 else if (xcode == MEM)
1452 output_address (XEXP (x, 0));
1453 else if (xcode == CONST_VECTOR)
1454 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1455 else
1456 output_addr_const (file, x);
1457 return;
1458
1459 /* unused letters
1460 o qr u yz
1461 AB OPQR UVWXYZ */
1462 default:
1463 output_operand_lossage ("invalid %%xn code");
1464 }
1465 gcc_unreachable ();
1466 }
1467
1468 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1469 caller saved register. For leaf functions it is more efficient to
1470 use a volatile register because we won't need to save and restore the
1471 pic register. This routine is only valid after register allocation
1472 is completed, so we can pick an unused register. */
1473 static rtx
1474 get_pic_reg (void)
1475 {
1476 if (!reload_completed && !reload_in_progress)
1477 abort ();
1478
1479 /* If we've already made the decision, we need to keep with it. Once we've
1480 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1481 return true since the register is now live; this should not cause us to
1482 "switch back" to using pic_offset_table_rtx. */
1483 if (!cfun->machine->pic_reg)
1484 {
1485 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1486 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1487 else
1488 cfun->machine->pic_reg = pic_offset_table_rtx;
1489 }
1490
1491 return cfun->machine->pic_reg;
1492 }
1493
1494 /* Split constant addresses to handle cases that are too large.
1495 Add in the pic register when in PIC mode.
1496 Split immediates that require more than 1 instruction. */
1497 int
1498 spu_split_immediate (rtx * ops)
1499 {
1500 enum machine_mode mode = GET_MODE (ops[0]);
1501 enum immediate_class c = classify_immediate (ops[1], mode);
1502
1503 switch (c)
1504 {
1505 case IC_IL2:
1506 {
1507 unsigned char arrhi[16];
1508 unsigned char arrlo[16];
1509 rtx to, temp, hi, lo;
1510 int i;
1511 enum machine_mode imode = mode;
1512 /* We need to do reals as ints because the constant used in the
1513 IOR might not be a legitimate real constant. */
1514 imode = int_mode_for_mode (mode);
1515 constant_to_array (mode, ops[1], arrhi);
1516 if (imode != mode)
1517 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1518 else
1519 to = ops[0];
1520 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1521 for (i = 0; i < 16; i += 4)
1522 {
1523 arrlo[i + 2] = arrhi[i + 2];
1524 arrlo[i + 3] = arrhi[i + 3];
1525 arrlo[i + 0] = arrlo[i + 1] = 0;
1526 arrhi[i + 2] = arrhi[i + 3] = 0;
1527 }
1528 hi = array_to_constant (imode, arrhi);
1529 lo = array_to_constant (imode, arrlo);
1530 emit_move_insn (temp, hi);
1531 emit_insn (gen_rtx_SET
1532 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1533 return 1;
1534 }
1535 case IC_FSMBI2:
1536 {
1537 unsigned char arr_fsmbi[16];
1538 unsigned char arr_andbi[16];
1539 rtx to, reg_fsmbi, reg_and;
1540 int i;
1541 enum machine_mode imode = mode;
1542 /* We need to do reals as ints because the constant used in the
1543 * AND might not be a legitimate real constant. */
1544 imode = int_mode_for_mode (mode);
1545 constant_to_array (mode, ops[1], arr_fsmbi);
1546 if (imode != mode)
1547 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1548 else
1549 to = ops[0];
1550 for (i = 0; i < 16; i++)
1551 if (arr_fsmbi[i] != 0)
1552 {
1553 arr_andbi[0] = arr_fsmbi[i];
1554 arr_fsmbi[i] = 0xff;
1555 }
1556 for (i = 1; i < 16; i++)
1557 arr_andbi[i] = arr_andbi[0];
1558 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1559 reg_and = array_to_constant (imode, arr_andbi);
1560 emit_move_insn (to, reg_fsmbi);
1561 emit_insn (gen_rtx_SET
1562 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1563 return 1;
1564 }
1565 case IC_POOL:
1566 if (reload_in_progress || reload_completed)
1567 {
1568 rtx mem = force_const_mem (mode, ops[1]);
1569 if (TARGET_LARGE_MEM)
1570 {
1571 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1572 emit_move_insn (addr, XEXP (mem, 0));
1573 mem = replace_equiv_address (mem, addr);
1574 }
1575 emit_move_insn (ops[0], mem);
1576 return 1;
1577 }
1578 break;
1579 case IC_IL1s:
1580 case IC_IL2s:
1581 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1582 {
1583 if (c == IC_IL2s)
1584 {
1585 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1586 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1587 }
1588 else if (flag_pic)
1589 emit_insn (gen_pic (ops[0], ops[1]));
1590 if (flag_pic)
1591 {
1592 rtx pic_reg = get_pic_reg ();
1593 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1594 }
1595 return flag_pic || c == IC_IL2s;
1596 }
1597 break;
1598 case IC_IL1:
1599 case IC_FSMBI:
1600 case IC_CPAT:
1601 break;
1602 }
1603 return 0;
1604 }
1605
1606 /* SAVING is TRUE when we are generating the actual load and store
1607 instructions for REGNO. When determining the size of the stack
1608 needed for saving register we must allocate enough space for the
1609 worst case, because we don't always have the information early enough
1610 to not allocate it. But we can at least eliminate the actual loads
1611 and stores during the prologue/epilogue. */
1612 static int
1613 need_to_save_reg (int regno, int saving)
1614 {
1615 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1616 return 1;
1617 if (flag_pic
1618 && regno == PIC_OFFSET_TABLE_REGNUM
1619 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1620 return 1;
1621 return 0;
1622 }
1623
1624 /* This function is only correct starting with local register
1625 allocation */
1626 int
1627 spu_saved_regs_size (void)
1628 {
1629 int reg_save_size = 0;
1630 int regno;
1631
1632 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1633 if (need_to_save_reg (regno, 0))
1634 reg_save_size += 0x10;
1635 return reg_save_size;
1636 }
1637
1638 static rtx_insn *
1639 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1640 {
1641 rtx reg = gen_rtx_REG (V4SImode, regno);
1642 rtx mem =
1643 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1644 return emit_insn (gen_movv4si (mem, reg));
1645 }
1646
1647 static rtx_insn *
1648 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1649 {
1650 rtx reg = gen_rtx_REG (V4SImode, regno);
1651 rtx mem =
1652 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1653 return emit_insn (gen_movv4si (reg, mem));
1654 }
1655
1656 /* This happens after reload, so we need to expand it. */
1657 static rtx_insn *
1658 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1659 {
1660 rtx_insn *insn;
1661 if (satisfies_constraint_K (GEN_INT (imm)))
1662 {
1663 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1664 }
1665 else
1666 {
1667 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1668 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1669 if (REGNO (src) == REGNO (scratch))
1670 abort ();
1671 }
1672 return insn;
1673 }
1674
1675 /* Return nonzero if this function is known to have a null epilogue. */
1676
1677 int
1678 direct_return (void)
1679 {
1680 if (reload_completed)
1681 {
1682 if (cfun->static_chain_decl == 0
1683 && (spu_saved_regs_size ()
1684 + get_frame_size ()
1685 + crtl->outgoing_args_size
1686 + crtl->args.pretend_args_size == 0)
1687 && crtl->is_leaf)
1688 return 1;
1689 }
1690 return 0;
1691 }
1692
1693 /*
1694 The stack frame looks like this:
1695 +-------------+
1696 | incoming |
1697 | args |
1698 AP -> +-------------+
1699 | $lr save |
1700 +-------------+
1701 prev SP | back chain |
1702 +-------------+
1703 | var args |
1704 | reg save | crtl->args.pretend_args_size bytes
1705 +-------------+
1706 | ... |
1707 | saved regs | spu_saved_regs_size() bytes
1708 FP -> +-------------+
1709 | ... |
1710 | vars | get_frame_size() bytes
1711 HFP -> +-------------+
1712 | ... |
1713 | outgoing |
1714 | args | crtl->outgoing_args_size bytes
1715 +-------------+
1716 | $lr of next |
1717 | frame |
1718 +-------------+
1719 | back chain |
1720 SP -> +-------------+
1721
1722 */
1723 void
1724 spu_expand_prologue (void)
1725 {
1726 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1727 HOST_WIDE_INT total_size;
1728 HOST_WIDE_INT saved_regs_size;
1729 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1730 rtx scratch_reg_0, scratch_reg_1;
1731 rtx_insn *insn;
1732 rtx real;
1733
1734 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1735 cfun->machine->pic_reg = pic_offset_table_rtx;
1736
1737 if (spu_naked_function_p (current_function_decl))
1738 return;
1739
1740 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1741 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1742
1743 saved_regs_size = spu_saved_regs_size ();
1744 total_size = size + saved_regs_size
1745 + crtl->outgoing_args_size
1746 + crtl->args.pretend_args_size;
1747
1748 if (!crtl->is_leaf
1749 || cfun->calls_alloca || total_size > 0)
1750 total_size += STACK_POINTER_OFFSET;
1751
1752 /* Save this first because code after this might use the link
1753 register as a scratch register. */
1754 if (!crtl->is_leaf)
1755 {
1756 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1757 RTX_FRAME_RELATED_P (insn) = 1;
1758 }
1759
1760 if (total_size > 0)
1761 {
1762 offset = -crtl->args.pretend_args_size;
1763 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1764 if (need_to_save_reg (regno, 1))
1765 {
1766 offset -= 16;
1767 insn = frame_emit_store (regno, sp_reg, offset);
1768 RTX_FRAME_RELATED_P (insn) = 1;
1769 }
1770 }
1771
1772 if (flag_pic && cfun->machine->pic_reg)
1773 {
1774 rtx pic_reg = cfun->machine->pic_reg;
1775 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1776 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1777 }
1778
1779 if (total_size > 0)
1780 {
1781 if (flag_stack_check)
1782 {
1783 /* We compare against total_size-1 because
1784 ($sp >= total_size) <=> ($sp > total_size-1) */
1785 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1786 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1787 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1788 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1789 {
1790 emit_move_insn (scratch_v4si, size_v4si);
1791 size_v4si = scratch_v4si;
1792 }
1793 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1794 emit_insn (gen_vec_extractv4si
1795 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1796 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1797 }
1798
1799 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1800 the value of the previous $sp because we save it as the back
1801 chain. */
1802 if (total_size <= 2000)
1803 {
1804 /* In this case we save the back chain first. */
1805 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1806 insn =
1807 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1808 }
1809 else
1810 {
1811 insn = emit_move_insn (scratch_reg_0, sp_reg);
1812 insn =
1813 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1814 }
1815 RTX_FRAME_RELATED_P (insn) = 1;
1816 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1817 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1818
1819 if (total_size > 2000)
1820 {
1821 /* Save the back chain ptr */
1822 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1823 }
1824
1825 if (frame_pointer_needed)
1826 {
1827 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1828 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1829 + crtl->outgoing_args_size;
1830 /* Set the new frame_pointer */
1831 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1832 RTX_FRAME_RELATED_P (insn) = 1;
1833 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1834 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1835 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1836 }
1837 }
1838
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = total_size;
1841 }
1842
1843 void
1844 spu_expand_epilogue (bool sibcall_p)
1845 {
1846 int size = get_frame_size (), offset, regno;
1847 HOST_WIDE_INT saved_regs_size, total_size;
1848 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1849 rtx scratch_reg_0;
1850
1851 if (spu_naked_function_p (current_function_decl))
1852 return;
1853
1854 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1855
1856 saved_regs_size = spu_saved_regs_size ();
1857 total_size = size + saved_regs_size
1858 + crtl->outgoing_args_size
1859 + crtl->args.pretend_args_size;
1860
1861 if (!crtl->is_leaf
1862 || cfun->calls_alloca || total_size > 0)
1863 total_size += STACK_POINTER_OFFSET;
1864
1865 if (total_size > 0)
1866 {
1867 if (cfun->calls_alloca)
1868 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1869 else
1870 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1871
1872
1873 if (saved_regs_size > 0)
1874 {
1875 offset = -crtl->args.pretend_args_size;
1876 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1877 if (need_to_save_reg (regno, 1))
1878 {
1879 offset -= 0x10;
1880 frame_emit_load (regno, sp_reg, offset);
1881 }
1882 }
1883 }
1884
1885 if (!crtl->is_leaf)
1886 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1887
1888 if (!sibcall_p)
1889 {
1890 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1891 emit_jump_insn (gen__return ());
1892 }
1893 }
1894
1895 rtx
1896 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1897 {
1898 if (count != 0)
1899 return 0;
1900 /* This is inefficient because it ends up copying to a save-register
1901 which then gets saved even though $lr has already been saved. But
1902 it does generate better code for leaf functions and we don't need
1903 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1904 used for __builtin_return_address anyway, so maybe we don't care if
1905 it's inefficient. */
1906 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1907 }
1908 \f
1909
1910 /* Given VAL, generate a constant appropriate for MODE.
1911 If MODE is a vector mode, every element will be VAL.
1912 For TImode, VAL will be zero extended to 128 bits. */
1913 rtx
1914 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1915 {
1916 rtx inner;
1917 rtvec v;
1918 int units, i;
1919
1920 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1921 || GET_MODE_CLASS (mode) == MODE_FLOAT
1922 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1923 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1924
1925 if (GET_MODE_CLASS (mode) == MODE_INT)
1926 return immed_double_const (val, 0, mode);
1927
1928 /* val is the bit representation of the float */
1929 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1930 return hwint_to_const_double (mode, val);
1931
1932 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1933 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1934 else
1935 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1936
1937 units = GET_MODE_NUNITS (mode);
1938
1939 v = rtvec_alloc (units);
1940
1941 for (i = 0; i < units; ++i)
1942 RTVEC_ELT (v, i) = inner;
1943
1944 return gen_rtx_CONST_VECTOR (mode, v);
1945 }
1946
1947 /* Create a MODE vector constant from 4 ints. */
1948 rtx
1949 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1950 {
1951 unsigned char arr[16];
1952 arr[0] = (a >> 24) & 0xff;
1953 arr[1] = (a >> 16) & 0xff;
1954 arr[2] = (a >> 8) & 0xff;
1955 arr[3] = (a >> 0) & 0xff;
1956 arr[4] = (b >> 24) & 0xff;
1957 arr[5] = (b >> 16) & 0xff;
1958 arr[6] = (b >> 8) & 0xff;
1959 arr[7] = (b >> 0) & 0xff;
1960 arr[8] = (c >> 24) & 0xff;
1961 arr[9] = (c >> 16) & 0xff;
1962 arr[10] = (c >> 8) & 0xff;
1963 arr[11] = (c >> 0) & 0xff;
1964 arr[12] = (d >> 24) & 0xff;
1965 arr[13] = (d >> 16) & 0xff;
1966 arr[14] = (d >> 8) & 0xff;
1967 arr[15] = (d >> 0) & 0xff;
1968 return array_to_constant(mode, arr);
1969 }
1970 \f
1971 /* branch hint stuff */
1972
1973 /* An array of these is used to propagate hints to predecessor blocks. */
1974 struct spu_bb_info
1975 {
1976 rtx_insn *prop_jump; /* propagated from another block */
1977 int bb_index; /* the original block. */
1978 };
1979 static struct spu_bb_info *spu_bb_info;
1980
1981 #define STOP_HINT_P(INSN) \
1982 (CALL_P(INSN) \
1983 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1984 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1985
1986 /* 1 when RTX is a hinted branch or its target. We keep track of
1987 what has been hinted so the safe-hint code can test it easily. */
1988 #define HINTED_P(RTX) \
1989 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1990
1991 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1992 #define SCHED_ON_EVEN_P(RTX) \
1993 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1994
1995 /* Emit a nop for INSN such that the two will dual issue. This assumes
1996 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1997 We check for TImode to handle a MULTI1 insn which has dual issued its
1998 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1999 static void
2000 emit_nop_for_insn (rtx_insn *insn)
2001 {
2002 int p;
2003 rtx_insn *new_insn;
2004
2005 /* We need to handle JUMP_TABLE_DATA separately. */
2006 if (JUMP_TABLE_DATA_P (insn))
2007 {
2008 new_insn = emit_insn_after (gen_lnop(), insn);
2009 recog_memoized (new_insn);
2010 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2011 return;
2012 }
2013
2014 p = get_pipe (insn);
2015 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2016 new_insn = emit_insn_after (gen_lnop (), insn);
2017 else if (p == 1 && GET_MODE (insn) == TImode)
2018 {
2019 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2020 PUT_MODE (new_insn, TImode);
2021 PUT_MODE (insn, VOIDmode);
2022 }
2023 else
2024 new_insn = emit_insn_after (gen_lnop (), insn);
2025 recog_memoized (new_insn);
2026 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2027 }
2028
2029 /* Insert nops in basic blocks to meet dual issue alignment
2030 requirements. Also make sure hbrp and hint instructions are at least
2031 one cycle apart, possibly inserting a nop. */
2032 static void
2033 pad_bb(void)
2034 {
2035 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2036 int length;
2037 int addr;
2038
2039 /* This sets up INSN_ADDRESSES. */
2040 shorten_branches (get_insns ());
2041
2042 /* Keep track of length added by nops. */
2043 length = 0;
2044
2045 prev_insn = 0;
2046 insn = get_insns ();
2047 if (!active_insn_p (insn))
2048 insn = next_active_insn (insn);
2049 for (; insn; insn = next_insn)
2050 {
2051 next_insn = next_active_insn (insn);
2052 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2053 || INSN_CODE (insn) == CODE_FOR_hbr)
2054 {
2055 if (hbr_insn)
2056 {
2057 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2058 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2059 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2060 || (a1 - a0 == 4))
2061 {
2062 prev_insn = emit_insn_before (gen_lnop (), insn);
2063 PUT_MODE (prev_insn, GET_MODE (insn));
2064 PUT_MODE (insn, TImode);
2065 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2066 length += 4;
2067 }
2068 }
2069 hbr_insn = insn;
2070 }
2071 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2072 {
2073 if (GET_MODE (insn) == TImode)
2074 PUT_MODE (next_insn, TImode);
2075 insn = next_insn;
2076 next_insn = next_active_insn (insn);
2077 }
2078 addr = INSN_ADDRESSES (INSN_UID (insn));
2079 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2080 {
2081 if (((addr + length) & 7) != 0)
2082 {
2083 emit_nop_for_insn (prev_insn);
2084 length += 4;
2085 }
2086 }
2087 else if (GET_MODE (insn) == TImode
2088 && ((next_insn && GET_MODE (next_insn) != TImode)
2089 || get_attr_type (insn) == TYPE_MULTI0)
2090 && ((addr + length) & 7) != 0)
2091 {
2092 /* prev_insn will always be set because the first insn is
2093 always 8-byte aligned. */
2094 emit_nop_for_insn (prev_insn);
2095 length += 4;
2096 }
2097 prev_insn = insn;
2098 }
2099 }
2100
2101 \f
2102 /* Routines for branch hints. */
2103
2104 static void
2105 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2106 int distance, sbitmap blocks)
2107 {
2108 rtx branch_label = 0;
2109 rtx_insn *hint;
2110 rtx_insn *insn;
2111 rtx_jump_table_data *table;
2112
2113 if (before == 0 || branch == 0 || target == 0)
2114 return;
2115
2116 /* While scheduling we require hints to be no further than 600, so
2117 we need to enforce that here too */
2118 if (distance > 600)
2119 return;
2120
2121 /* If we have a Basic block note, emit it after the basic block note. */
2122 if (NOTE_INSN_BASIC_BLOCK_P (before))
2123 before = NEXT_INSN (before);
2124
2125 branch_label = gen_label_rtx ();
2126 LABEL_NUSES (branch_label)++;
2127 LABEL_PRESERVE_P (branch_label) = 1;
2128 insn = emit_label_before (branch_label, branch);
2129 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2130 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2131
2132 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2133 recog_memoized (hint);
2134 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2135 HINTED_P (branch) = 1;
2136
2137 if (GET_CODE (target) == LABEL_REF)
2138 HINTED_P (XEXP (target, 0)) = 1;
2139 else if (tablejump_p (branch, 0, &table))
2140 {
2141 rtvec vec;
2142 int j;
2143 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2144 vec = XVEC (PATTERN (table), 0);
2145 else
2146 vec = XVEC (PATTERN (table), 1);
2147 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2148 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2149 }
2150
2151 if (distance >= 588)
2152 {
2153 /* Make sure the hint isn't scheduled any earlier than this point,
2154 which could make it too far for the branch offest to fit */
2155 insn = emit_insn_before (gen_blockage (), hint);
2156 recog_memoized (insn);
2157 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2158 }
2159 else if (distance <= 8 * 4)
2160 {
2161 /* To guarantee at least 8 insns between the hint and branch we
2162 insert nops. */
2163 int d;
2164 for (d = distance; d < 8 * 4; d += 4)
2165 {
2166 insn =
2167 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2168 recog_memoized (insn);
2169 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2170 }
2171
2172 /* Make sure any nops inserted aren't scheduled before the hint. */
2173 insn = emit_insn_after (gen_blockage (), hint);
2174 recog_memoized (insn);
2175 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2176
2177 /* Make sure any nops inserted aren't scheduled after the call. */
2178 if (CALL_P (branch) && distance < 8 * 4)
2179 {
2180 insn = emit_insn_before (gen_blockage (), branch);
2181 recog_memoized (insn);
2182 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2183 }
2184 }
2185 }
2186
2187 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2188 the rtx for the branch target. */
2189 static rtx
2190 get_branch_target (rtx_insn *branch)
2191 {
2192 if (JUMP_P (branch))
2193 {
2194 rtx set, src;
2195
2196 /* Return statements */
2197 if (GET_CODE (PATTERN (branch)) == RETURN)
2198 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2199
2200 /* ASM GOTOs. */
2201 if (extract_asm_operands (PATTERN (branch)) != NULL)
2202 return NULL;
2203
2204 set = single_set (branch);
2205 src = SET_SRC (set);
2206 if (GET_CODE (SET_DEST (set)) != PC)
2207 abort ();
2208
2209 if (GET_CODE (src) == IF_THEN_ELSE)
2210 {
2211 rtx lab = 0;
2212 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2213 if (note)
2214 {
2215 /* If the more probable case is not a fall through, then
2216 try a branch hint. */
2217 int prob = XINT (note, 0);
2218 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2219 && GET_CODE (XEXP (src, 1)) != PC)
2220 lab = XEXP (src, 1);
2221 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2222 && GET_CODE (XEXP (src, 2)) != PC)
2223 lab = XEXP (src, 2);
2224 }
2225 if (lab)
2226 {
2227 if (GET_CODE (lab) == RETURN)
2228 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2229 return lab;
2230 }
2231 return 0;
2232 }
2233
2234 return src;
2235 }
2236 else if (CALL_P (branch))
2237 {
2238 rtx call;
2239 /* All of our call patterns are in a PARALLEL and the CALL is
2240 the first pattern in the PARALLEL. */
2241 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2242 abort ();
2243 call = XVECEXP (PATTERN (branch), 0, 0);
2244 if (GET_CODE (call) == SET)
2245 call = SET_SRC (call);
2246 if (GET_CODE (call) != CALL)
2247 abort ();
2248 return XEXP (XEXP (call, 0), 0);
2249 }
2250 return 0;
2251 }
2252
2253 /* The special $hbr register is used to prevent the insn scheduler from
2254 moving hbr insns across instructions which invalidate them. It
2255 should only be used in a clobber, and this function searches for
2256 insns which clobber it. */
2257 static bool
2258 insn_clobbers_hbr (rtx_insn *insn)
2259 {
2260 if (INSN_P (insn)
2261 && GET_CODE (PATTERN (insn)) == PARALLEL)
2262 {
2263 rtx parallel = PATTERN (insn);
2264 rtx clobber;
2265 int j;
2266 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2267 {
2268 clobber = XVECEXP (parallel, 0, j);
2269 if (GET_CODE (clobber) == CLOBBER
2270 && GET_CODE (XEXP (clobber, 0)) == REG
2271 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2272 return 1;
2273 }
2274 }
2275 return 0;
2276 }
2277
2278 /* Search up to 32 insns starting at FIRST:
2279 - at any kind of hinted branch, just return
2280 - at any unconditional branch in the first 15 insns, just return
2281 - at a call or indirect branch, after the first 15 insns, force it to
2282 an even address and return
2283 - at any unconditional branch, after the first 15 insns, force it to
2284 an even address.
2285 At then end of the search, insert an hbrp within 4 insns of FIRST,
2286 and an hbrp within 16 instructions of FIRST.
2287 */
2288 static void
2289 insert_hbrp_for_ilb_runout (rtx_insn *first)
2290 {
2291 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2292 int addr = 0, length, first_addr = -1;
2293 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2294 int insert_lnop_after = 0;
2295 for (insn = first; insn; insn = NEXT_INSN (insn))
2296 if (INSN_P (insn))
2297 {
2298 if (first_addr == -1)
2299 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2300 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2301 length = get_attr_length (insn);
2302
2303 if (before_4 == 0 && addr + length >= 4 * 4)
2304 before_4 = insn;
2305 /* We test for 14 instructions because the first hbrp will add
2306 up to 2 instructions. */
2307 if (before_16 == 0 && addr + length >= 14 * 4)
2308 before_16 = insn;
2309
2310 if (INSN_CODE (insn) == CODE_FOR_hbr)
2311 {
2312 /* Make sure an hbrp is at least 2 cycles away from a hint.
2313 Insert an lnop after the hbrp when necessary. */
2314 if (before_4 == 0 && addr > 0)
2315 {
2316 before_4 = insn;
2317 insert_lnop_after |= 1;
2318 }
2319 else if (before_4 && addr <= 4 * 4)
2320 insert_lnop_after |= 1;
2321 if (before_16 == 0 && addr > 10 * 4)
2322 {
2323 before_16 = insn;
2324 insert_lnop_after |= 2;
2325 }
2326 else if (before_16 && addr <= 14 * 4)
2327 insert_lnop_after |= 2;
2328 }
2329
2330 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2331 {
2332 if (addr < hbrp_addr0)
2333 hbrp_addr0 = addr;
2334 else if (addr < hbrp_addr1)
2335 hbrp_addr1 = addr;
2336 }
2337
2338 if (CALL_P (insn) || JUMP_P (insn))
2339 {
2340 if (HINTED_P (insn))
2341 return;
2342
2343 /* Any branch after the first 15 insns should be on an even
2344 address to avoid a special case branch. There might be
2345 some nops and/or hbrps inserted, so we test after 10
2346 insns. */
2347 if (addr > 10 * 4)
2348 SCHED_ON_EVEN_P (insn) = 1;
2349 }
2350
2351 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2352 return;
2353
2354
2355 if (addr + length >= 32 * 4)
2356 {
2357 gcc_assert (before_4 && before_16);
2358 if (hbrp_addr0 > 4 * 4)
2359 {
2360 insn =
2361 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2362 recog_memoized (insn);
2363 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2364 INSN_ADDRESSES_NEW (insn,
2365 INSN_ADDRESSES (INSN_UID (before_4)));
2366 PUT_MODE (insn, GET_MODE (before_4));
2367 PUT_MODE (before_4, TImode);
2368 if (insert_lnop_after & 1)
2369 {
2370 insn = emit_insn_before (gen_lnop (), before_4);
2371 recog_memoized (insn);
2372 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2373 INSN_ADDRESSES_NEW (insn,
2374 INSN_ADDRESSES (INSN_UID (before_4)));
2375 PUT_MODE (insn, TImode);
2376 }
2377 }
2378 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2379 && hbrp_addr1 > 16 * 4)
2380 {
2381 insn =
2382 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2383 recog_memoized (insn);
2384 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2385 INSN_ADDRESSES_NEW (insn,
2386 INSN_ADDRESSES (INSN_UID (before_16)));
2387 PUT_MODE (insn, GET_MODE (before_16));
2388 PUT_MODE (before_16, TImode);
2389 if (insert_lnop_after & 2)
2390 {
2391 insn = emit_insn_before (gen_lnop (), before_16);
2392 recog_memoized (insn);
2393 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2394 INSN_ADDRESSES_NEW (insn,
2395 INSN_ADDRESSES (INSN_UID
2396 (before_16)));
2397 PUT_MODE (insn, TImode);
2398 }
2399 }
2400 return;
2401 }
2402 }
2403 else if (BARRIER_P (insn))
2404 return;
2405
2406 }
2407
2408 /* The SPU might hang when it executes 48 inline instructions after a
2409 hinted branch jumps to its hinted target. The beginning of a
2410 function and the return from a call might have been hinted, and
2411 must be handled as well. To prevent a hang we insert 2 hbrps. The
2412 first should be within 6 insns of the branch target. The second
2413 should be within 22 insns of the branch target. When determining
2414 if hbrps are necessary, we look for only 32 inline instructions,
2415 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2416 when inserting new hbrps, we insert them within 4 and 16 insns of
2417 the target. */
2418 static void
2419 insert_hbrp (void)
2420 {
2421 rtx_insn *insn;
2422 if (TARGET_SAFE_HINTS)
2423 {
2424 shorten_branches (get_insns ());
2425 /* Insert hbrp at beginning of function */
2426 insn = next_active_insn (get_insns ());
2427 if (insn)
2428 insert_hbrp_for_ilb_runout (insn);
2429 /* Insert hbrp after hinted targets. */
2430 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2431 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2432 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2433 }
2434 }
2435
2436 static int in_spu_reorg;
2437
2438 static void
2439 spu_var_tracking (void)
2440 {
2441 if (flag_var_tracking)
2442 {
2443 df_analyze ();
2444 timevar_push (TV_VAR_TRACKING);
2445 variable_tracking_main ();
2446 timevar_pop (TV_VAR_TRACKING);
2447 df_finish_pass (false);
2448 }
2449 }
2450
2451 /* Insert branch hints. There are no branch optimizations after this
2452 pass, so it's safe to set our branch hints now. */
2453 static void
2454 spu_machine_dependent_reorg (void)
2455 {
2456 sbitmap blocks;
2457 basic_block bb;
2458 rtx_insn *branch, *insn;
2459 rtx branch_target = 0;
2460 int branch_addr = 0, insn_addr, required_dist = 0;
2461 int i;
2462 unsigned int j;
2463
2464 if (!TARGET_BRANCH_HINTS || optimize == 0)
2465 {
2466 /* We still do it for unoptimized code because an external
2467 function might have hinted a call or return. */
2468 compute_bb_for_insn ();
2469 insert_hbrp ();
2470 pad_bb ();
2471 spu_var_tracking ();
2472 free_bb_for_insn ();
2473 return;
2474 }
2475
2476 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2477 bitmap_clear (blocks);
2478
2479 in_spu_reorg = 1;
2480 compute_bb_for_insn ();
2481
2482 /* (Re-)discover loops so that bb->loop_father can be used
2483 in the analysis below. */
2484 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2485
2486 compact_blocks ();
2487
2488 spu_bb_info =
2489 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2490 sizeof (struct spu_bb_info));
2491
2492 /* We need exact insn addresses and lengths. */
2493 shorten_branches (get_insns ());
2494
2495 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2496 {
2497 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2498 branch = 0;
2499 if (spu_bb_info[i].prop_jump)
2500 {
2501 branch = spu_bb_info[i].prop_jump;
2502 branch_target = get_branch_target (branch);
2503 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2504 required_dist = spu_hint_dist;
2505 }
2506 /* Search from end of a block to beginning. In this loop, find
2507 jumps which need a branch and emit them only when:
2508 - it's an indirect branch and we're at the insn which sets
2509 the register
2510 - we're at an insn that will invalidate the hint. e.g., a
2511 call, another hint insn, inline asm that clobbers $hbr, and
2512 some inlined operations (divmodsi4). Don't consider jumps
2513 because they are only at the end of a block and are
2514 considered when we are deciding whether to propagate
2515 - we're getting too far away from the branch. The hbr insns
2516 only have a signed 10 bit offset
2517 We go back as far as possible so the branch will be considered
2518 for propagation when we get to the beginning of the block. */
2519 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2520 {
2521 if (INSN_P (insn))
2522 {
2523 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2524 if (branch
2525 && ((GET_CODE (branch_target) == REG
2526 && set_of (branch_target, insn) != NULL_RTX)
2527 || insn_clobbers_hbr (insn)
2528 || branch_addr - insn_addr > 600))
2529 {
2530 rtx_insn *next = NEXT_INSN (insn);
2531 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2532 if (insn != BB_END (bb)
2533 && branch_addr - next_addr >= required_dist)
2534 {
2535 if (dump_file)
2536 fprintf (dump_file,
2537 "hint for %i in block %i before %i\n",
2538 INSN_UID (branch), bb->index,
2539 INSN_UID (next));
2540 spu_emit_branch_hint (next, branch, branch_target,
2541 branch_addr - next_addr, blocks);
2542 }
2543 branch = 0;
2544 }
2545
2546 /* JUMP_P will only be true at the end of a block. When
2547 branch is already set it means we've previously decided
2548 to propagate a hint for that branch into this block. */
2549 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2550 {
2551 branch = 0;
2552 if ((branch_target = get_branch_target (insn)))
2553 {
2554 branch = insn;
2555 branch_addr = insn_addr;
2556 required_dist = spu_hint_dist;
2557 }
2558 }
2559 }
2560 if (insn == BB_HEAD (bb))
2561 break;
2562 }
2563
2564 if (branch)
2565 {
2566 /* If we haven't emitted a hint for this branch yet, it might
2567 be profitable to emit it in one of the predecessor blocks,
2568 especially for loops. */
2569 rtx_insn *bbend;
2570 basic_block prev = 0, prop = 0, prev2 = 0;
2571 int loop_exit = 0, simple_loop = 0;
2572 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2573
2574 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2575 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2576 prev = EDGE_PRED (bb, j)->src;
2577 else
2578 prev2 = EDGE_PRED (bb, j)->src;
2579
2580 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2581 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2582 loop_exit = 1;
2583 else if (EDGE_SUCC (bb, j)->dest == bb)
2584 simple_loop = 1;
2585
2586 /* If this branch is a loop exit then propagate to previous
2587 fallthru block. This catches the cases when it is a simple
2588 loop or when there is an initial branch into the loop. */
2589 if (prev && (loop_exit || simple_loop)
2590 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2591 prop = prev;
2592
2593 /* If there is only one adjacent predecessor. Don't propagate
2594 outside this loop. */
2595 else if (prev && single_pred_p (bb)
2596 && prev->loop_father == bb->loop_father)
2597 prop = prev;
2598
2599 /* If this is the JOIN block of a simple IF-THEN then
2600 propagate the hint to the HEADER block. */
2601 else if (prev && prev2
2602 && EDGE_COUNT (bb->preds) == 2
2603 && EDGE_COUNT (prev->preds) == 1
2604 && EDGE_PRED (prev, 0)->src == prev2
2605 && prev2->loop_father == bb->loop_father
2606 && GET_CODE (branch_target) != REG)
2607 prop = prev;
2608
2609 /* Don't propagate when:
2610 - this is a simple loop and the hint would be too far
2611 - this is not a simple loop and there are 16 insns in
2612 this block already
2613 - the predecessor block ends in a branch that will be
2614 hinted
2615 - the predecessor block ends in an insn that invalidates
2616 the hint */
2617 if (prop
2618 && prop->index >= 0
2619 && (bbend = BB_END (prop))
2620 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2621 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2622 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2623 {
2624 if (dump_file)
2625 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2626 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2627 bb->index, prop->index, bb_loop_depth (bb),
2628 INSN_UID (branch), loop_exit, simple_loop,
2629 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2630
2631 spu_bb_info[prop->index].prop_jump = branch;
2632 spu_bb_info[prop->index].bb_index = i;
2633 }
2634 else if (branch_addr - next_addr >= required_dist)
2635 {
2636 if (dump_file)
2637 fprintf (dump_file, "hint for %i in block %i before %i\n",
2638 INSN_UID (branch), bb->index,
2639 INSN_UID (NEXT_INSN (insn)));
2640 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2641 branch_addr - next_addr, blocks);
2642 }
2643 branch = 0;
2644 }
2645 }
2646 free (spu_bb_info);
2647
2648 if (!bitmap_empty_p (blocks))
2649 find_many_sub_basic_blocks (blocks);
2650
2651 /* We have to schedule to make sure alignment is ok. */
2652 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2653
2654 /* The hints need to be scheduled, so call it again. */
2655 schedule_insns ();
2656 df_finish_pass (true);
2657
2658 insert_hbrp ();
2659
2660 pad_bb ();
2661
2662 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2663 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2664 {
2665 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2666 between its branch label and the branch . We don't move the
2667 label because GCC expects it at the beginning of the block. */
2668 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2669 rtx label_ref = XVECEXP (unspec, 0, 0);
2670 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2671 rtx_insn *branch;
2672 int offset = 0;
2673 for (branch = NEXT_INSN (label);
2674 !JUMP_P (branch) && !CALL_P (branch);
2675 branch = NEXT_INSN (branch))
2676 if (NONJUMP_INSN_P (branch))
2677 offset += get_attr_length (branch);
2678 if (offset > 0)
2679 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2680 }
2681
2682 spu_var_tracking ();
2683
2684 loop_optimizer_finalize ();
2685
2686 free_bb_for_insn ();
2687
2688 in_spu_reorg = 0;
2689 }
2690 \f
2691
2692 /* Insn scheduling routines, primarily for dual issue. */
2693 static int
2694 spu_sched_issue_rate (void)
2695 {
2696 return 2;
2697 }
2698
2699 static int
2700 uses_ls_unit(rtx_insn *insn)
2701 {
2702 rtx set = single_set (insn);
2703 if (set != 0
2704 && (GET_CODE (SET_DEST (set)) == MEM
2705 || GET_CODE (SET_SRC (set)) == MEM))
2706 return 1;
2707 return 0;
2708 }
2709
2710 static int
2711 get_pipe (rtx_insn *insn)
2712 {
2713 enum attr_type t;
2714 /* Handle inline asm */
2715 if (INSN_CODE (insn) == -1)
2716 return -1;
2717 t = get_attr_type (insn);
2718 switch (t)
2719 {
2720 case TYPE_CONVERT:
2721 return -2;
2722 case TYPE_MULTI0:
2723 return -1;
2724
2725 case TYPE_FX2:
2726 case TYPE_FX3:
2727 case TYPE_SPR:
2728 case TYPE_NOP:
2729 case TYPE_FXB:
2730 case TYPE_FPD:
2731 case TYPE_FP6:
2732 case TYPE_FP7:
2733 return 0;
2734
2735 case TYPE_LNOP:
2736 case TYPE_SHUF:
2737 case TYPE_LOAD:
2738 case TYPE_STORE:
2739 case TYPE_BR:
2740 case TYPE_MULTI1:
2741 case TYPE_HBR:
2742 case TYPE_IPREFETCH:
2743 return 1;
2744 default:
2745 abort ();
2746 }
2747 }
2748
2749
2750 /* haifa-sched.c has a static variable that keeps track of the current
2751 cycle. It is passed to spu_sched_reorder, and we record it here for
2752 use by spu_sched_variable_issue. It won't be accurate if the
2753 scheduler updates it's clock_var between the two calls. */
2754 static int clock_var;
2755
2756 /* This is used to keep track of insn alignment. Set to 0 at the
2757 beginning of each block and increased by the "length" attr of each
2758 insn scheduled. */
2759 static int spu_sched_length;
2760
2761 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2762 ready list appropriately in spu_sched_reorder(). */
2763 static int pipe0_clock;
2764 static int pipe1_clock;
2765
2766 static int prev_clock_var;
2767
2768 static int prev_priority;
2769
2770 /* The SPU needs to load the next ilb sometime during the execution of
2771 the previous ilb. There is a potential conflict if every cycle has a
2772 load or store. To avoid the conflict we make sure the load/store
2773 unit is free for at least one cycle during the execution of insns in
2774 the previous ilb. */
2775 static int spu_ls_first;
2776 static int prev_ls_clock;
2777
2778 static void
2779 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2780 int max_ready ATTRIBUTE_UNUSED)
2781 {
2782 spu_sched_length = 0;
2783 }
2784
2785 static void
2786 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2787 int max_ready ATTRIBUTE_UNUSED)
2788 {
2789 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2790 {
2791 /* When any block might be at least 8-byte aligned, assume they
2792 will all be at least 8-byte aligned to make sure dual issue
2793 works out correctly. */
2794 spu_sched_length = 0;
2795 }
2796 spu_ls_first = INT_MAX;
2797 clock_var = -1;
2798 prev_ls_clock = -1;
2799 pipe0_clock = -1;
2800 pipe1_clock = -1;
2801 prev_clock_var = -1;
2802 prev_priority = -1;
2803 }
2804
2805 static int
2806 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2807 int verbose ATTRIBUTE_UNUSED,
2808 rtx_insn *insn, int more)
2809 {
2810 int len;
2811 int p;
2812 if (GET_CODE (PATTERN (insn)) == USE
2813 || GET_CODE (PATTERN (insn)) == CLOBBER
2814 || (len = get_attr_length (insn)) == 0)
2815 return more;
2816
2817 spu_sched_length += len;
2818
2819 /* Reset on inline asm */
2820 if (INSN_CODE (insn) == -1)
2821 {
2822 spu_ls_first = INT_MAX;
2823 pipe0_clock = -1;
2824 pipe1_clock = -1;
2825 return 0;
2826 }
2827 p = get_pipe (insn);
2828 if (p == 0)
2829 pipe0_clock = clock_var;
2830 else
2831 pipe1_clock = clock_var;
2832
2833 if (in_spu_reorg)
2834 {
2835 if (clock_var - prev_ls_clock > 1
2836 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2837 spu_ls_first = INT_MAX;
2838 if (uses_ls_unit (insn))
2839 {
2840 if (spu_ls_first == INT_MAX)
2841 spu_ls_first = spu_sched_length;
2842 prev_ls_clock = clock_var;
2843 }
2844
2845 /* The scheduler hasn't inserted the nop, but we will later on.
2846 Include those nops in spu_sched_length. */
2847 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2848 spu_sched_length += 4;
2849 prev_clock_var = clock_var;
2850
2851 /* more is -1 when called from spu_sched_reorder for new insns
2852 that don't have INSN_PRIORITY */
2853 if (more >= 0)
2854 prev_priority = INSN_PRIORITY (insn);
2855 }
2856
2857 /* Always try issuing more insns. spu_sched_reorder will decide
2858 when the cycle should be advanced. */
2859 return 1;
2860 }
2861
2862 /* This function is called for both TARGET_SCHED_REORDER and
2863 TARGET_SCHED_REORDER2. */
2864 static int
2865 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2866 rtx_insn **ready, int *nreadyp, int clock)
2867 {
2868 int i, nready = *nreadyp;
2869 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2870 rtx_insn *insn;
2871
2872 clock_var = clock;
2873
2874 if (nready <= 0 || pipe1_clock >= clock)
2875 return 0;
2876
2877 /* Find any rtl insns that don't generate assembly insns and schedule
2878 them first. */
2879 for (i = nready - 1; i >= 0; i--)
2880 {
2881 insn = ready[i];
2882 if (INSN_CODE (insn) == -1
2883 || INSN_CODE (insn) == CODE_FOR_blockage
2884 || (INSN_P (insn) && get_attr_length (insn) == 0))
2885 {
2886 ready[i] = ready[nready - 1];
2887 ready[nready - 1] = insn;
2888 return 1;
2889 }
2890 }
2891
2892 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2893 for (i = 0; i < nready; i++)
2894 if (INSN_CODE (ready[i]) != -1)
2895 {
2896 insn = ready[i];
2897 switch (get_attr_type (insn))
2898 {
2899 default:
2900 case TYPE_MULTI0:
2901 case TYPE_CONVERT:
2902 case TYPE_FX2:
2903 case TYPE_FX3:
2904 case TYPE_SPR:
2905 case TYPE_NOP:
2906 case TYPE_FXB:
2907 case TYPE_FPD:
2908 case TYPE_FP6:
2909 case TYPE_FP7:
2910 pipe_0 = i;
2911 break;
2912 case TYPE_LOAD:
2913 case TYPE_STORE:
2914 pipe_ls = i;
2915 case TYPE_LNOP:
2916 case TYPE_SHUF:
2917 case TYPE_BR:
2918 case TYPE_MULTI1:
2919 case TYPE_HBR:
2920 pipe_1 = i;
2921 break;
2922 case TYPE_IPREFETCH:
2923 pipe_hbrp = i;
2924 break;
2925 }
2926 }
2927
2928 /* In the first scheduling phase, schedule loads and stores together
2929 to increase the chance they will get merged during postreload CSE. */
2930 if (!reload_completed && pipe_ls >= 0)
2931 {
2932 insn = ready[pipe_ls];
2933 ready[pipe_ls] = ready[nready - 1];
2934 ready[nready - 1] = insn;
2935 return 1;
2936 }
2937
2938 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2939 if (pipe_hbrp >= 0)
2940 pipe_1 = pipe_hbrp;
2941
2942 /* When we have loads/stores in every cycle of the last 15 insns and
2943 we are about to schedule another load/store, emit an hbrp insn
2944 instead. */
2945 if (in_spu_reorg
2946 && spu_sched_length - spu_ls_first >= 4 * 15
2947 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2948 {
2949 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2950 recog_memoized (insn);
2951 if (pipe0_clock < clock)
2952 PUT_MODE (insn, TImode);
2953 spu_sched_variable_issue (file, verbose, insn, -1);
2954 return 0;
2955 }
2956
2957 /* In general, we want to emit nops to increase dual issue, but dual
2958 issue isn't faster when one of the insns could be scheduled later
2959 without effecting the critical path. We look at INSN_PRIORITY to
2960 make a good guess, but it isn't perfect so -mdual-nops=n can be
2961 used to effect it. */
2962 if (in_spu_reorg && spu_dual_nops < 10)
2963 {
2964 /* When we are at an even address and we are not issuing nops to
2965 improve scheduling then we need to advance the cycle. */
2966 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2967 && (spu_dual_nops == 0
2968 || (pipe_1 != -1
2969 && prev_priority >
2970 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2971 return 0;
2972
2973 /* When at an odd address, schedule the highest priority insn
2974 without considering pipeline. */
2975 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2976 && (spu_dual_nops == 0
2977 || (prev_priority >
2978 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2979 return 1;
2980 }
2981
2982
2983 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2984 pipe0 insn in the ready list, schedule it. */
2985 if (pipe0_clock < clock && pipe_0 >= 0)
2986 schedule_i = pipe_0;
2987
2988 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2989 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2990 else
2991 schedule_i = pipe_1;
2992
2993 if (schedule_i > -1)
2994 {
2995 insn = ready[schedule_i];
2996 ready[schedule_i] = ready[nready - 1];
2997 ready[nready - 1] = insn;
2998 return 1;
2999 }
3000 return 0;
3001 }
3002
3003 /* INSN is dependent on DEP_INSN. */
3004 static int
3005 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3006 {
3007 rtx set;
3008
3009 /* The blockage pattern is used to prevent instructions from being
3010 moved across it and has no cost. */
3011 if (INSN_CODE (insn) == CODE_FOR_blockage
3012 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3013 return 0;
3014
3015 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3016 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3017 return 0;
3018
3019 /* Make sure hbrps are spread out. */
3020 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3021 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3022 return 8;
3023
3024 /* Make sure hints and hbrps are 2 cycles apart. */
3025 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3026 || INSN_CODE (insn) == CODE_FOR_hbr)
3027 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3028 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3029 return 2;
3030
3031 /* An hbrp has no real dependency on other insns. */
3032 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3033 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3034 return 0;
3035
3036 /* Assuming that it is unlikely an argument register will be used in
3037 the first cycle of the called function, we reduce the cost for
3038 slightly better scheduling of dep_insn. When not hinted, the
3039 mispredicted branch would hide the cost as well. */
3040 if (CALL_P (insn))
3041 {
3042 rtx target = get_branch_target (insn);
3043 if (GET_CODE (target) != REG || !set_of (target, insn))
3044 return cost - 2;
3045 return cost;
3046 }
3047
3048 /* And when returning from a function, let's assume the return values
3049 are completed sooner too. */
3050 if (CALL_P (dep_insn))
3051 return cost - 2;
3052
3053 /* Make sure an instruction that loads from the back chain is schedule
3054 away from the return instruction so a hint is more likely to get
3055 issued. */
3056 if (INSN_CODE (insn) == CODE_FOR__return
3057 && (set = single_set (dep_insn))
3058 && GET_CODE (SET_DEST (set)) == REG
3059 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3060 return 20;
3061
3062 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3063 scheduler makes every insn in a block anti-dependent on the final
3064 jump_insn. We adjust here so higher cost insns will get scheduled
3065 earlier. */
3066 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3067 return insn_cost (dep_insn) - 3;
3068
3069 return cost;
3070 }
3071 \f
3072 /* Create a CONST_DOUBLE from a string. */
3073 rtx
3074 spu_float_const (const char *string, enum machine_mode mode)
3075 {
3076 REAL_VALUE_TYPE value;
3077 value = REAL_VALUE_ATOF (string, mode);
3078 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3079 }
3080
3081 int
3082 spu_constant_address_p (rtx x)
3083 {
3084 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3085 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3086 || GET_CODE (x) == HIGH);
3087 }
3088
3089 static enum spu_immediate
3090 which_immediate_load (HOST_WIDE_INT val)
3091 {
3092 gcc_assert (val == trunc_int_for_mode (val, SImode));
3093
3094 if (val >= -0x8000 && val <= 0x7fff)
3095 return SPU_IL;
3096 if (val >= 0 && val <= 0x3ffff)
3097 return SPU_ILA;
3098 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3099 return SPU_ILH;
3100 if ((val & 0xffff) == 0)
3101 return SPU_ILHU;
3102
3103 return SPU_NONE;
3104 }
3105
3106 /* Return true when OP can be loaded by one of the il instructions, or
3107 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3108 int
3109 immediate_load_p (rtx op, enum machine_mode mode)
3110 {
3111 if (CONSTANT_P (op))
3112 {
3113 enum immediate_class c = classify_immediate (op, mode);
3114 return c == IC_IL1 || c == IC_IL1s
3115 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3116 }
3117 return 0;
3118 }
3119
3120 /* Return true if the first SIZE bytes of arr is a constant that can be
3121 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3122 represent the size and offset of the instruction to use. */
3123 static int
3124 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3125 {
3126 int cpat, run, i, start;
3127 cpat = 1;
3128 run = 0;
3129 start = -1;
3130 for (i = 0; i < size && cpat; i++)
3131 if (arr[i] != i+16)
3132 {
3133 if (!run)
3134 {
3135 start = i;
3136 if (arr[i] == 3)
3137 run = 1;
3138 else if (arr[i] == 2 && arr[i+1] == 3)
3139 run = 2;
3140 else if (arr[i] == 0)
3141 {
3142 while (arr[i+run] == run && i+run < 16)
3143 run++;
3144 if (run != 4 && run != 8)
3145 cpat = 0;
3146 }
3147 else
3148 cpat = 0;
3149 if ((i & (run-1)) != 0)
3150 cpat = 0;
3151 i += run;
3152 }
3153 else
3154 cpat = 0;
3155 }
3156 if (cpat && (run || size < 16))
3157 {
3158 if (run == 0)
3159 run = 1;
3160 if (prun)
3161 *prun = run;
3162 if (pstart)
3163 *pstart = start == -1 ? 16-run : start;
3164 return 1;
3165 }
3166 return 0;
3167 }
3168
3169 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3170 it into a register. MODE is only valid when OP is a CONST_INT. */
3171 static enum immediate_class
3172 classify_immediate (rtx op, enum machine_mode mode)
3173 {
3174 HOST_WIDE_INT val;
3175 unsigned char arr[16];
3176 int i, j, repeated, fsmbi, repeat;
3177
3178 gcc_assert (CONSTANT_P (op));
3179
3180 if (GET_MODE (op) != VOIDmode)
3181 mode = GET_MODE (op);
3182
3183 /* A V4SI const_vector with all identical symbols is ok. */
3184 if (!flag_pic
3185 && mode == V4SImode
3186 && GET_CODE (op) == CONST_VECTOR
3187 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3188 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3189 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3190 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3191 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3192 op = CONST_VECTOR_ELT (op, 0);
3193
3194 switch (GET_CODE (op))
3195 {
3196 case SYMBOL_REF:
3197 case LABEL_REF:
3198 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3199
3200 case CONST:
3201 /* We can never know if the resulting address fits in 18 bits and can be
3202 loaded with ila. For now, assume the address will not overflow if
3203 the displacement is "small" (fits 'K' constraint). */
3204 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3205 {
3206 rtx sym = XEXP (XEXP (op, 0), 0);
3207 rtx cst = XEXP (XEXP (op, 0), 1);
3208
3209 if (GET_CODE (sym) == SYMBOL_REF
3210 && GET_CODE (cst) == CONST_INT
3211 && satisfies_constraint_K (cst))
3212 return IC_IL1s;
3213 }
3214 return IC_IL2s;
3215
3216 case HIGH:
3217 return IC_IL1s;
3218
3219 case CONST_VECTOR:
3220 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3221 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3222 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3223 return IC_POOL;
3224 /* Fall through. */
3225
3226 case CONST_INT:
3227 case CONST_DOUBLE:
3228 constant_to_array (mode, op, arr);
3229
3230 /* Check that each 4-byte slot is identical. */
3231 repeated = 1;
3232 for (i = 4; i < 16; i += 4)
3233 for (j = 0; j < 4; j++)
3234 if (arr[j] != arr[i + j])
3235 repeated = 0;
3236
3237 if (repeated)
3238 {
3239 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3240 val = trunc_int_for_mode (val, SImode);
3241
3242 if (which_immediate_load (val) != SPU_NONE)
3243 return IC_IL1;
3244 }
3245
3246 /* Any mode of 2 bytes or smaller can be loaded with an il
3247 instruction. */
3248 gcc_assert (GET_MODE_SIZE (mode) > 2);
3249
3250 fsmbi = 1;
3251 repeat = 0;
3252 for (i = 0; i < 16 && fsmbi; i++)
3253 if (arr[i] != 0 && repeat == 0)
3254 repeat = arr[i];
3255 else if (arr[i] != 0 && arr[i] != repeat)
3256 fsmbi = 0;
3257 if (fsmbi)
3258 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3259
3260 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3261 return IC_CPAT;
3262
3263 if (repeated)
3264 return IC_IL2;
3265
3266 return IC_POOL;
3267 default:
3268 break;
3269 }
3270 gcc_unreachable ();
3271 }
3272
3273 static enum spu_immediate
3274 which_logical_immediate (HOST_WIDE_INT val)
3275 {
3276 gcc_assert (val == trunc_int_for_mode (val, SImode));
3277
3278 if (val >= -0x200 && val <= 0x1ff)
3279 return SPU_ORI;
3280 if (val >= 0 && val <= 0xffff)
3281 return SPU_IOHL;
3282 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3283 {
3284 val = trunc_int_for_mode (val, HImode);
3285 if (val >= -0x200 && val <= 0x1ff)
3286 return SPU_ORHI;
3287 if ((val & 0xff) == ((val >> 8) & 0xff))
3288 {
3289 val = trunc_int_for_mode (val, QImode);
3290 if (val >= -0x200 && val <= 0x1ff)
3291 return SPU_ORBI;
3292 }
3293 }
3294 return SPU_NONE;
3295 }
3296
3297 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3298 CONST_DOUBLEs. */
3299 static int
3300 const_vector_immediate_p (rtx x)
3301 {
3302 int i;
3303 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3304 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3305 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3306 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3307 return 0;
3308 return 1;
3309 }
3310
3311 int
3312 logical_immediate_p (rtx op, enum machine_mode mode)
3313 {
3314 HOST_WIDE_INT val;
3315 unsigned char arr[16];
3316 int i, j;
3317
3318 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3319 || GET_CODE (op) == CONST_VECTOR);
3320
3321 if (GET_CODE (op) == CONST_VECTOR
3322 && !const_vector_immediate_p (op))
3323 return 0;
3324
3325 if (GET_MODE (op) != VOIDmode)
3326 mode = GET_MODE (op);
3327
3328 constant_to_array (mode, op, arr);
3329
3330 /* Check that bytes are repeated. */
3331 for (i = 4; i < 16; i += 4)
3332 for (j = 0; j < 4; j++)
3333 if (arr[j] != arr[i + j])
3334 return 0;
3335
3336 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3337 val = trunc_int_for_mode (val, SImode);
3338
3339 i = which_logical_immediate (val);
3340 return i != SPU_NONE && i != SPU_IOHL;
3341 }
3342
3343 int
3344 iohl_immediate_p (rtx op, enum machine_mode mode)
3345 {
3346 HOST_WIDE_INT val;
3347 unsigned char arr[16];
3348 int i, j;
3349
3350 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3351 || GET_CODE (op) == CONST_VECTOR);
3352
3353 if (GET_CODE (op) == CONST_VECTOR
3354 && !const_vector_immediate_p (op))
3355 return 0;
3356
3357 if (GET_MODE (op) != VOIDmode)
3358 mode = GET_MODE (op);
3359
3360 constant_to_array (mode, op, arr);
3361
3362 /* Check that bytes are repeated. */
3363 for (i = 4; i < 16; i += 4)
3364 for (j = 0; j < 4; j++)
3365 if (arr[j] != arr[i + j])
3366 return 0;
3367
3368 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3369 val = trunc_int_for_mode (val, SImode);
3370
3371 return val >= 0 && val <= 0xffff;
3372 }
3373
3374 int
3375 arith_immediate_p (rtx op, enum machine_mode mode,
3376 HOST_WIDE_INT low, HOST_WIDE_INT high)
3377 {
3378 HOST_WIDE_INT val;
3379 unsigned char arr[16];
3380 int bytes, i, j;
3381
3382 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3383 || GET_CODE (op) == CONST_VECTOR);
3384
3385 if (GET_CODE (op) == CONST_VECTOR
3386 && !const_vector_immediate_p (op))
3387 return 0;
3388
3389 if (GET_MODE (op) != VOIDmode)
3390 mode = GET_MODE (op);
3391
3392 constant_to_array (mode, op, arr);
3393
3394 if (VECTOR_MODE_P (mode))
3395 mode = GET_MODE_INNER (mode);
3396
3397 bytes = GET_MODE_SIZE (mode);
3398 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3399
3400 /* Check that bytes are repeated. */
3401 for (i = bytes; i < 16; i += bytes)
3402 for (j = 0; j < bytes; j++)
3403 if (arr[j] != arr[i + j])
3404 return 0;
3405
3406 val = arr[0];
3407 for (j = 1; j < bytes; j++)
3408 val = (val << 8) | arr[j];
3409
3410 val = trunc_int_for_mode (val, mode);
3411
3412 return val >= low && val <= high;
3413 }
3414
3415 /* TRUE when op is an immediate and an exact power of 2, and given that
3416 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3417 all entries must be the same. */
3418 bool
3419 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3420 {
3421 enum machine_mode int_mode;
3422 HOST_WIDE_INT val;
3423 unsigned char arr[16];
3424 int bytes, i, j;
3425
3426 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3427 || GET_CODE (op) == CONST_VECTOR);
3428
3429 if (GET_CODE (op) == CONST_VECTOR
3430 && !const_vector_immediate_p (op))
3431 return 0;
3432
3433 if (GET_MODE (op) != VOIDmode)
3434 mode = GET_MODE (op);
3435
3436 constant_to_array (mode, op, arr);
3437
3438 if (VECTOR_MODE_P (mode))
3439 mode = GET_MODE_INNER (mode);
3440
3441 bytes = GET_MODE_SIZE (mode);
3442 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3443
3444 /* Check that bytes are repeated. */
3445 for (i = bytes; i < 16; i += bytes)
3446 for (j = 0; j < bytes; j++)
3447 if (arr[j] != arr[i + j])
3448 return 0;
3449
3450 val = arr[0];
3451 for (j = 1; j < bytes; j++)
3452 val = (val << 8) | arr[j];
3453
3454 val = trunc_int_for_mode (val, int_mode);
3455
3456 /* Currently, we only handle SFmode */
3457 gcc_assert (mode == SFmode);
3458 if (mode == SFmode)
3459 {
3460 int exp = (val >> 23) - 127;
3461 return val > 0 && (val & 0x007fffff) == 0
3462 && exp >= low && exp <= high;
3463 }
3464 return FALSE;
3465 }
3466
3467 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3468
3469 static int
3470 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3471 {
3472 rtx x = *px;
3473 tree decl;
3474
3475 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3476 {
3477 rtx plus = XEXP (x, 0);
3478 rtx op0 = XEXP (plus, 0);
3479 rtx op1 = XEXP (plus, 1);
3480 if (GET_CODE (op1) == CONST_INT)
3481 x = op0;
3482 }
3483
3484 return (GET_CODE (x) == SYMBOL_REF
3485 && (decl = SYMBOL_REF_DECL (x)) != 0
3486 && TREE_CODE (decl) == VAR_DECL
3487 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3488 }
3489
3490 /* We accept:
3491 - any 32-bit constant (SImode, SFmode)
3492 - any constant that can be generated with fsmbi (any mode)
3493 - a 64-bit constant where the high and low bits are identical
3494 (DImode, DFmode)
3495 - a 128-bit constant where the four 32-bit words match. */
3496 bool
3497 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3498 {
3499 if (GET_CODE (x) == HIGH)
3500 x = XEXP (x, 0);
3501
3502 /* Reject any __ea qualified reference. These can't appear in
3503 instructions but must be forced to the constant pool. */
3504 if (for_each_rtx (&x, ea_symbol_ref, 0))
3505 return 0;
3506
3507 /* V4SI with all identical symbols is valid. */
3508 if (!flag_pic
3509 && mode == V4SImode
3510 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3511 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3512 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3513 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3514 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3515 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3516
3517 if (GET_CODE (x) == CONST_VECTOR
3518 && !const_vector_immediate_p (x))
3519 return 0;
3520 return 1;
3521 }
3522
3523 /* Valid address are:
3524 - symbol_ref, label_ref, const
3525 - reg
3526 - reg + const_int, where const_int is 16 byte aligned
3527 - reg + reg, alignment doesn't matter
3528 The alignment matters in the reg+const case because lqd and stqd
3529 ignore the 4 least significant bits of the const. We only care about
3530 16 byte modes because the expand phase will change all smaller MEM
3531 references to TImode. */
3532 static bool
3533 spu_legitimate_address_p (enum machine_mode mode,
3534 rtx x, bool reg_ok_strict)
3535 {
3536 int aligned = GET_MODE_SIZE (mode) >= 16;
3537 if (aligned
3538 && GET_CODE (x) == AND
3539 && GET_CODE (XEXP (x, 1)) == CONST_INT
3540 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3541 x = XEXP (x, 0);
3542 switch (GET_CODE (x))
3543 {
3544 case LABEL_REF:
3545 return !TARGET_LARGE_MEM;
3546
3547 case SYMBOL_REF:
3548 case CONST:
3549 /* Keep __ea references until reload so that spu_expand_mov can see them
3550 in MEMs. */
3551 if (ea_symbol_ref (&x, 0))
3552 return !reload_in_progress && !reload_completed;
3553 return !TARGET_LARGE_MEM;
3554
3555 case CONST_INT:
3556 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3557
3558 case SUBREG:
3559 x = XEXP (x, 0);
3560 if (REG_P (x))
3561 return 0;
3562
3563 case REG:
3564 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3565
3566 case PLUS:
3567 case LO_SUM:
3568 {
3569 rtx op0 = XEXP (x, 0);
3570 rtx op1 = XEXP (x, 1);
3571 if (GET_CODE (op0) == SUBREG)
3572 op0 = XEXP (op0, 0);
3573 if (GET_CODE (op1) == SUBREG)
3574 op1 = XEXP (op1, 0);
3575 if (GET_CODE (op0) == REG
3576 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3577 && GET_CODE (op1) == CONST_INT
3578 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3579 /* If virtual registers are involved, the displacement will
3580 change later on anyway, so checking would be premature.
3581 Reload will make sure the final displacement after
3582 register elimination is OK. */
3583 || op0 == arg_pointer_rtx
3584 || op0 == frame_pointer_rtx
3585 || op0 == virtual_stack_vars_rtx)
3586 && (!aligned || (INTVAL (op1) & 15) == 0))
3587 return TRUE;
3588 if (GET_CODE (op0) == REG
3589 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3590 && GET_CODE (op1) == REG
3591 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3592 return TRUE;
3593 }
3594 break;
3595
3596 default:
3597 break;
3598 }
3599 return FALSE;
3600 }
3601
3602 /* Like spu_legitimate_address_p, except with named addresses. */
3603 static bool
3604 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3605 bool reg_ok_strict, addr_space_t as)
3606 {
3607 if (as == ADDR_SPACE_EA)
3608 return (REG_P (x) && (GET_MODE (x) == EAmode));
3609
3610 else if (as != ADDR_SPACE_GENERIC)
3611 gcc_unreachable ();
3612
3613 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3614 }
3615
3616 /* When the address is reg + const_int, force the const_int into a
3617 register. */
3618 static rtx
3619 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3620 enum machine_mode mode ATTRIBUTE_UNUSED)
3621 {
3622 rtx op0, op1;
3623 /* Make sure both operands are registers. */
3624 if (GET_CODE (x) == PLUS)
3625 {
3626 op0 = XEXP (x, 0);
3627 op1 = XEXP (x, 1);
3628 if (ALIGNED_SYMBOL_REF_P (op0))
3629 {
3630 op0 = force_reg (Pmode, op0);
3631 mark_reg_pointer (op0, 128);
3632 }
3633 else if (GET_CODE (op0) != REG)
3634 op0 = force_reg (Pmode, op0);
3635 if (ALIGNED_SYMBOL_REF_P (op1))
3636 {
3637 op1 = force_reg (Pmode, op1);
3638 mark_reg_pointer (op1, 128);
3639 }
3640 else if (GET_CODE (op1) != REG)
3641 op1 = force_reg (Pmode, op1);
3642 x = gen_rtx_PLUS (Pmode, op0, op1);
3643 }
3644 return x;
3645 }
3646
3647 /* Like spu_legitimate_address, except with named address support. */
3648 static rtx
3649 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3650 addr_space_t as)
3651 {
3652 if (as != ADDR_SPACE_GENERIC)
3653 return x;
3654
3655 return spu_legitimize_address (x, oldx, mode);
3656 }
3657
3658 /* Reload reg + const_int for out-of-range displacements. */
3659 rtx
3660 spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3661 int opnum, int type)
3662 {
3663 bool removed_and = false;
3664
3665 if (GET_CODE (ad) == AND
3666 && CONST_INT_P (XEXP (ad, 1))
3667 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3668 {
3669 ad = XEXP (ad, 0);
3670 removed_and = true;
3671 }
3672
3673 if (GET_CODE (ad) == PLUS
3674 && REG_P (XEXP (ad, 0))
3675 && CONST_INT_P (XEXP (ad, 1))
3676 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3677 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3678 {
3679 /* Unshare the sum. */
3680 ad = copy_rtx (ad);
3681
3682 /* Reload the displacement. */
3683 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3684 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3685 opnum, (enum reload_type) type);
3686
3687 /* Add back AND for alignment if we stripped it. */
3688 if (removed_and)
3689 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3690
3691 return ad;
3692 }
3693
3694 return NULL_RTX;
3695 }
3696
3697 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3698 struct attribute_spec.handler. */
3699 static tree
3700 spu_handle_fndecl_attribute (tree * node,
3701 tree name,
3702 tree args ATTRIBUTE_UNUSED,
3703 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3704 {
3705 if (TREE_CODE (*node) != FUNCTION_DECL)
3706 {
3707 warning (0, "%qE attribute only applies to functions",
3708 name);
3709 *no_add_attrs = true;
3710 }
3711
3712 return NULL_TREE;
3713 }
3714
3715 /* Handle the "vector" attribute. */
3716 static tree
3717 spu_handle_vector_attribute (tree * node, tree name,
3718 tree args ATTRIBUTE_UNUSED,
3719 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3720 {
3721 tree type = *node, result = NULL_TREE;
3722 enum machine_mode mode;
3723 int unsigned_p;
3724
3725 while (POINTER_TYPE_P (type)
3726 || TREE_CODE (type) == FUNCTION_TYPE
3727 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3728 type = TREE_TYPE (type);
3729
3730 mode = TYPE_MODE (type);
3731
3732 unsigned_p = TYPE_UNSIGNED (type);
3733 switch (mode)
3734 {
3735 case DImode:
3736 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3737 break;
3738 case SImode:
3739 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3740 break;
3741 case HImode:
3742 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3743 break;
3744 case QImode:
3745 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3746 break;
3747 case SFmode:
3748 result = V4SF_type_node;
3749 break;
3750 case DFmode:
3751 result = V2DF_type_node;
3752 break;
3753 default:
3754 break;
3755 }
3756
3757 /* Propagate qualifiers attached to the element type
3758 onto the vector type. */
3759 if (result && result != type && TYPE_QUALS (type))
3760 result = build_qualified_type (result, TYPE_QUALS (type));
3761
3762 *no_add_attrs = true; /* No need to hang on to the attribute. */
3763
3764 if (!result)
3765 warning (0, "%qE attribute ignored", name);
3766 else
3767 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3768
3769 return NULL_TREE;
3770 }
3771
3772 /* Return nonzero if FUNC is a naked function. */
3773 static int
3774 spu_naked_function_p (tree func)
3775 {
3776 tree a;
3777
3778 if (TREE_CODE (func) != FUNCTION_DECL)
3779 abort ();
3780
3781 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3782 return a != NULL_TREE;
3783 }
3784
3785 int
3786 spu_initial_elimination_offset (int from, int to)
3787 {
3788 int saved_regs_size = spu_saved_regs_size ();
3789 int sp_offset = 0;
3790 if (!crtl->is_leaf || crtl->outgoing_args_size
3791 || get_frame_size () || saved_regs_size)
3792 sp_offset = STACK_POINTER_OFFSET;
3793 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3794 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3795 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3796 return get_frame_size ();
3797 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3798 return sp_offset + crtl->outgoing_args_size
3799 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3800 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3801 return get_frame_size () + saved_regs_size + sp_offset;
3802 else
3803 gcc_unreachable ();
3804 }
3805
3806 rtx
3807 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3808 {
3809 enum machine_mode mode = TYPE_MODE (type);
3810 int byte_size = ((mode == BLKmode)
3811 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3812
3813 /* Make sure small structs are left justified in a register. */
3814 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3815 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3816 {
3817 enum machine_mode smode;
3818 rtvec v;
3819 int i;
3820 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3821 int n = byte_size / UNITS_PER_WORD;
3822 v = rtvec_alloc (nregs);
3823 for (i = 0; i < n; i++)
3824 {
3825 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3826 gen_rtx_REG (TImode,
3827 FIRST_RETURN_REGNUM
3828 + i),
3829 GEN_INT (UNITS_PER_WORD * i));
3830 byte_size -= UNITS_PER_WORD;
3831 }
3832
3833 if (n < nregs)
3834 {
3835 if (byte_size < 4)
3836 byte_size = 4;
3837 smode =
3838 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3839 RTVEC_ELT (v, n) =
3840 gen_rtx_EXPR_LIST (VOIDmode,
3841 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3842 GEN_INT (UNITS_PER_WORD * n));
3843 }
3844 return gen_rtx_PARALLEL (mode, v);
3845 }
3846 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3847 }
3848
3849 static rtx
3850 spu_function_arg (cumulative_args_t cum_v,
3851 enum machine_mode mode,
3852 const_tree type, bool named ATTRIBUTE_UNUSED)
3853 {
3854 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3855 int byte_size;
3856
3857 if (*cum >= MAX_REGISTER_ARGS)
3858 return 0;
3859
3860 byte_size = ((mode == BLKmode)
3861 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3862
3863 /* The ABI does not allow parameters to be passed partially in
3864 reg and partially in stack. */
3865 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3866 return 0;
3867
3868 /* Make sure small structs are left justified in a register. */
3869 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3870 && byte_size < UNITS_PER_WORD && byte_size > 0)
3871 {
3872 enum machine_mode smode;
3873 rtx gr_reg;
3874 if (byte_size < 4)
3875 byte_size = 4;
3876 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3877 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3878 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3879 const0_rtx);
3880 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3881 }
3882 else
3883 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3884 }
3885
3886 static void
3887 spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
3888 const_tree type, bool named ATTRIBUTE_UNUSED)
3889 {
3890 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3891
3892 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3893 ? 1
3894 : mode == BLKmode
3895 ? ((int_size_in_bytes (type) + 15) / 16)
3896 : mode == VOIDmode
3897 ? 1
3898 : HARD_REGNO_NREGS (cum, mode));
3899 }
3900
3901 /* Variable sized types are passed by reference. */
3902 static bool
3903 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3904 enum machine_mode mode ATTRIBUTE_UNUSED,
3905 const_tree type, bool named ATTRIBUTE_UNUSED)
3906 {
3907 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3908 }
3909 \f
3910
3911 /* Var args. */
3912
3913 /* Create and return the va_list datatype.
3914
3915 On SPU, va_list is an array type equivalent to
3916
3917 typedef struct __va_list_tag
3918 {
3919 void *__args __attribute__((__aligned(16)));
3920 void *__skip __attribute__((__aligned(16)));
3921
3922 } va_list[1];
3923
3924 where __args points to the arg that will be returned by the next
3925 va_arg(), and __skip points to the previous stack frame such that
3926 when __args == __skip we should advance __args by 32 bytes. */
3927 static tree
3928 spu_build_builtin_va_list (void)
3929 {
3930 tree f_args, f_skip, record, type_decl;
3931 bool owp;
3932
3933 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3934
3935 type_decl =
3936 build_decl (BUILTINS_LOCATION,
3937 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3938
3939 f_args = build_decl (BUILTINS_LOCATION,
3940 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3941 f_skip = build_decl (BUILTINS_LOCATION,
3942 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3943
3944 DECL_FIELD_CONTEXT (f_args) = record;
3945 DECL_ALIGN (f_args) = 128;
3946 DECL_USER_ALIGN (f_args) = 1;
3947
3948 DECL_FIELD_CONTEXT (f_skip) = record;
3949 DECL_ALIGN (f_skip) = 128;
3950 DECL_USER_ALIGN (f_skip) = 1;
3951
3952 TYPE_STUB_DECL (record) = type_decl;
3953 TYPE_NAME (record) = type_decl;
3954 TYPE_FIELDS (record) = f_args;
3955 DECL_CHAIN (f_args) = f_skip;
3956
3957 /* We know this is being padded and we want it too. It is an internal
3958 type so hide the warnings from the user. */
3959 owp = warn_padded;
3960 warn_padded = false;
3961
3962 layout_type (record);
3963
3964 warn_padded = owp;
3965
3966 /* The correct type is an array type of one element. */
3967 return build_array_type (record, build_index_type (size_zero_node));
3968 }
3969
3970 /* Implement va_start by filling the va_list structure VALIST.
3971 NEXTARG points to the first anonymous stack argument.
3972
3973 The following global variables are used to initialize
3974 the va_list structure:
3975
3976 crtl->args.info;
3977 the CUMULATIVE_ARGS for this function
3978
3979 crtl->args.arg_offset_rtx:
3980 holds the offset of the first anonymous stack argument
3981 (relative to the virtual arg pointer). */
3982
3983 static void
3984 spu_va_start (tree valist, rtx nextarg)
3985 {
3986 tree f_args, f_skip;
3987 tree args, skip, t;
3988
3989 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3990 f_skip = DECL_CHAIN (f_args);
3991
3992 valist = build_simple_mem_ref (valist);
3993 args =
3994 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3995 skip =
3996 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3997
3998 /* Find the __args area. */
3999 t = make_tree (TREE_TYPE (args), nextarg);
4000 if (crtl->args.pretend_args_size > 0)
4001 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4002 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4003 TREE_SIDE_EFFECTS (t) = 1;
4004 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4005
4006 /* Find the __skip area. */
4007 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4008 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4009 - STACK_POINTER_OFFSET));
4010 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4011 TREE_SIDE_EFFECTS (t) = 1;
4012 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4013 }
4014
4015 /* Gimplify va_arg by updating the va_list structure
4016 VALIST as required to retrieve an argument of type
4017 TYPE, and returning that argument.
4018
4019 ret = va_arg(VALIST, TYPE);
4020
4021 generates code equivalent to:
4022
4023 paddedsize = (sizeof(TYPE) + 15) & -16;
4024 if (VALIST.__args + paddedsize > VALIST.__skip
4025 && VALIST.__args <= VALIST.__skip)
4026 addr = VALIST.__skip + 32;
4027 else
4028 addr = VALIST.__args;
4029 VALIST.__args = addr + paddedsize;
4030 ret = *(TYPE *)addr;
4031 */
4032 static tree
4033 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4034 gimple_seq * post_p ATTRIBUTE_UNUSED)
4035 {
4036 tree f_args, f_skip;
4037 tree args, skip;
4038 HOST_WIDE_INT size, rsize;
4039 tree addr, tmp;
4040 bool pass_by_reference_p;
4041
4042 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4043 f_skip = DECL_CHAIN (f_args);
4044
4045 valist = build_simple_mem_ref (valist);
4046 args =
4047 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4048 skip =
4049 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4050
4051 addr = create_tmp_var (ptr_type_node, "va_arg");
4052
4053 /* if an object is dynamically sized, a pointer to it is passed
4054 instead of the object itself. */
4055 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4056 false);
4057 if (pass_by_reference_p)
4058 type = build_pointer_type (type);
4059 size = int_size_in_bytes (type);
4060 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4061
4062 /* build conditional expression to calculate addr. The expression
4063 will be gimplified later. */
4064 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4065 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4066 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4067 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4068 unshare_expr (skip)));
4069
4070 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4071 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4072 unshare_expr (args));
4073
4074 gimplify_assign (addr, tmp, pre_p);
4075
4076 /* update VALIST.__args */
4077 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4078 gimplify_assign (unshare_expr (args), tmp, pre_p);
4079
4080 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4081 addr);
4082
4083 if (pass_by_reference_p)
4084 addr = build_va_arg_indirect_ref (addr);
4085
4086 return build_va_arg_indirect_ref (addr);
4087 }
4088
4089 /* Save parameter registers starting with the register that corresponds
4090 to the first unnamed parameters. If the first unnamed parameter is
4091 in the stack then save no registers. Set pretend_args_size to the
4092 amount of space needed to save the registers. */
4093 static void
4094 spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4095 tree type, int *pretend_size, int no_rtl)
4096 {
4097 if (!no_rtl)
4098 {
4099 rtx tmp;
4100 int regno;
4101 int offset;
4102 int ncum = *get_cumulative_args (cum);
4103
4104 /* cum currently points to the last named argument, we want to
4105 start at the next argument. */
4106 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4107
4108 offset = -STACK_POINTER_OFFSET;
4109 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4110 {
4111 tmp = gen_frame_mem (V4SImode,
4112 plus_constant (Pmode, virtual_incoming_args_rtx,
4113 offset));
4114 emit_move_insn (tmp,
4115 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4116 offset += 16;
4117 }
4118 *pretend_size = offset + STACK_POINTER_OFFSET;
4119 }
4120 }
4121 \f
4122 static void
4123 spu_conditional_register_usage (void)
4124 {
4125 if (flag_pic)
4126 {
4127 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4128 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4129 }
4130 }
4131
4132 /* This is called any time we inspect the alignment of a register for
4133 addresses. */
4134 static int
4135 reg_aligned_for_addr (rtx x)
4136 {
4137 int regno =
4138 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4139 return REGNO_POINTER_ALIGN (regno) >= 128;
4140 }
4141
4142 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4143 into its SYMBOL_REF_FLAGS. */
4144 static void
4145 spu_encode_section_info (tree decl, rtx rtl, int first)
4146 {
4147 default_encode_section_info (decl, rtl, first);
4148
4149 /* If a variable has a forced alignment to < 16 bytes, mark it with
4150 SYMBOL_FLAG_ALIGN1. */
4151 if (TREE_CODE (decl) == VAR_DECL
4152 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4153 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4154 }
4155
4156 /* Return TRUE if we are certain the mem refers to a complete object
4157 which is both 16-byte aligned and padded to a 16-byte boundary. This
4158 would make it safe to store with a single instruction.
4159 We guarantee the alignment and padding for static objects by aligning
4160 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4161 FIXME: We currently cannot guarantee this for objects on the stack
4162 because assign_parm_setup_stack calls assign_stack_local with the
4163 alignment of the parameter mode and in that case the alignment never
4164 gets adjusted by LOCAL_ALIGNMENT. */
4165 static int
4166 store_with_one_insn_p (rtx mem)
4167 {
4168 enum machine_mode mode = GET_MODE (mem);
4169 rtx addr = XEXP (mem, 0);
4170 if (mode == BLKmode)
4171 return 0;
4172 if (GET_MODE_SIZE (mode) >= 16)
4173 return 1;
4174 /* Only static objects. */
4175 if (GET_CODE (addr) == SYMBOL_REF)
4176 {
4177 /* We use the associated declaration to make sure the access is
4178 referring to the whole object.
4179 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4180 if it is necessary. Will there be cases where one exists, and
4181 the other does not? Will there be cases where both exist, but
4182 have different types? */
4183 tree decl = MEM_EXPR (mem);
4184 if (decl
4185 && TREE_CODE (decl) == VAR_DECL
4186 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4187 return 1;
4188 decl = SYMBOL_REF_DECL (addr);
4189 if (decl
4190 && TREE_CODE (decl) == VAR_DECL
4191 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4192 return 1;
4193 }
4194 return 0;
4195 }
4196
4197 /* Return 1 when the address is not valid for a simple load and store as
4198 required by the '_mov*' patterns. We could make this less strict
4199 for loads, but we prefer mem's to look the same so they are more
4200 likely to be merged. */
4201 static int
4202 address_needs_split (rtx mem)
4203 {
4204 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4205 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4206 || !(store_with_one_insn_p (mem)
4207 || mem_is_padded_component_ref (mem))))
4208 return 1;
4209
4210 return 0;
4211 }
4212
4213 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4214 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4215 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4216
4217 /* MEM is known to be an __ea qualified memory access. Emit a call to
4218 fetch the ppu memory to local store, and return its address in local
4219 store. */
4220
4221 static void
4222 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4223 {
4224 if (is_store)
4225 {
4226 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4227 if (!cache_fetch_dirty)
4228 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4229 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4230 2, ea_addr, EAmode, ndirty, SImode);
4231 }
4232 else
4233 {
4234 if (!cache_fetch)
4235 cache_fetch = init_one_libfunc ("__cache_fetch");
4236 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4237 1, ea_addr, EAmode);
4238 }
4239 }
4240
4241 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4242 dirty bit marking, inline.
4243
4244 The cache control data structure is an array of
4245
4246 struct __cache_tag_array
4247 {
4248 unsigned int tag_lo[4];
4249 unsigned int tag_hi[4];
4250 void *data_pointer[4];
4251 int reserved[4];
4252 vector unsigned short dirty_bits[4];
4253 } */
4254
4255 static void
4256 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4257 {
4258 rtx ea_addr_si;
4259 HOST_WIDE_INT v;
4260 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4261 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4262 rtx index_mask = gen_reg_rtx (SImode);
4263 rtx tag_arr = gen_reg_rtx (Pmode);
4264 rtx splat_mask = gen_reg_rtx (TImode);
4265 rtx splat = gen_reg_rtx (V4SImode);
4266 rtx splat_hi = NULL_RTX;
4267 rtx tag_index = gen_reg_rtx (Pmode);
4268 rtx block_off = gen_reg_rtx (SImode);
4269 rtx tag_addr = gen_reg_rtx (Pmode);
4270 rtx tag = gen_reg_rtx (V4SImode);
4271 rtx cache_tag = gen_reg_rtx (V4SImode);
4272 rtx cache_tag_hi = NULL_RTX;
4273 rtx cache_ptrs = gen_reg_rtx (TImode);
4274 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4275 rtx tag_equal = gen_reg_rtx (V4SImode);
4276 rtx tag_equal_hi = NULL_RTX;
4277 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4278 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4279 rtx eq_index = gen_reg_rtx (SImode);
4280 rtx bcomp, hit_label, hit_ref, cont_label;
4281 rtx_insn *insn;
4282
4283 if (spu_ea_model != 32)
4284 {
4285 splat_hi = gen_reg_rtx (V4SImode);
4286 cache_tag_hi = gen_reg_rtx (V4SImode);
4287 tag_equal_hi = gen_reg_rtx (V4SImode);
4288 }
4289
4290 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4291 emit_move_insn (tag_arr, tag_arr_sym);
4292 v = 0x0001020300010203LL;
4293 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4294 ea_addr_si = ea_addr;
4295 if (spu_ea_model != 32)
4296 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4297
4298 /* tag_index = ea_addr & (tag_array_size - 128) */
4299 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4300
4301 /* splat ea_addr to all 4 slots. */
4302 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4303 /* Similarly for high 32 bits of ea_addr. */
4304 if (spu_ea_model != 32)
4305 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4306
4307 /* block_off = ea_addr & 127 */
4308 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4309
4310 /* tag_addr = tag_arr + tag_index */
4311 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4312
4313 /* Read cache tags. */
4314 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4315 if (spu_ea_model != 32)
4316 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4317 plus_constant (Pmode,
4318 tag_addr, 16)));
4319
4320 /* tag = ea_addr & -128 */
4321 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4322
4323 /* Read all four cache data pointers. */
4324 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4325 plus_constant (Pmode,
4326 tag_addr, 32)));
4327
4328 /* Compare tags. */
4329 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4330 if (spu_ea_model != 32)
4331 {
4332 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4333 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4334 }
4335
4336 /* At most one of the tags compare equal, so tag_equal has one
4337 32-bit slot set to all 1's, with the other slots all zero.
4338 gbb picks off low bit from each byte in the 128-bit registers,
4339 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4340 we have a hit. */
4341 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4342 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4343
4344 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4345 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4346
4347 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4348 (rotating eq_index mod 16 bytes). */
4349 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4350 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4351
4352 /* Add block offset to form final data address. */
4353 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4354
4355 /* Check that we did hit. */
4356 hit_label = gen_label_rtx ();
4357 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4358 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4359 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4360 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4361 hit_ref, pc_rtx)));
4362 /* Say that this branch is very likely to happen. */
4363 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4364 add_int_reg_note (insn, REG_BR_PROB, v);
4365
4366 ea_load_store (mem, is_store, ea_addr, data_addr);
4367 cont_label = gen_label_rtx ();
4368 emit_jump_insn (gen_jump (cont_label));
4369 emit_barrier ();
4370
4371 emit_label (hit_label);
4372
4373 if (is_store)
4374 {
4375 HOST_WIDE_INT v_hi;
4376 rtx dirty_bits = gen_reg_rtx (TImode);
4377 rtx dirty_off = gen_reg_rtx (SImode);
4378 rtx dirty_128 = gen_reg_rtx (TImode);
4379 rtx neg_block_off = gen_reg_rtx (SImode);
4380
4381 /* Set up mask with one dirty bit per byte of the mem we are
4382 writing, starting from top bit. */
4383 v_hi = v = -1;
4384 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4385 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4386 {
4387 v_hi = v;
4388 v = 0;
4389 }
4390 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4391
4392 /* Form index into cache dirty_bits. eq_index is one of
4393 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4394 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4395 offset to each of the four dirty_bits elements. */
4396 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4397
4398 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4399
4400 /* Rotate bit mask to proper bit. */
4401 emit_insn (gen_negsi2 (neg_block_off, block_off));
4402 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4403 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4404
4405 /* Or in the new dirty bits. */
4406 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4407
4408 /* Store. */
4409 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4410 }
4411
4412 emit_label (cont_label);
4413 }
4414
4415 static rtx
4416 expand_ea_mem (rtx mem, bool is_store)
4417 {
4418 rtx ea_addr;
4419 rtx data_addr = gen_reg_rtx (Pmode);
4420 rtx new_mem;
4421
4422 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4423 if (optimize_size || optimize == 0)
4424 ea_load_store (mem, is_store, ea_addr, data_addr);
4425 else
4426 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4427
4428 if (ea_alias_set == -1)
4429 ea_alias_set = new_alias_set ();
4430
4431 /* We generate a new MEM RTX to refer to the copy of the data
4432 in the cache. We do not copy memory attributes (except the
4433 alignment) from the original MEM, as they may no longer apply
4434 to the cache copy. */
4435 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4436 set_mem_alias_set (new_mem, ea_alias_set);
4437 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4438
4439 return new_mem;
4440 }
4441
4442 int
4443 spu_expand_mov (rtx * ops, enum machine_mode mode)
4444 {
4445 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4446 {
4447 /* Perform the move in the destination SUBREG's inner mode. */
4448 ops[0] = SUBREG_REG (ops[0]);
4449 mode = GET_MODE (ops[0]);
4450 ops[1] = gen_lowpart_common (mode, ops[1]);
4451 gcc_assert (ops[1]);
4452 }
4453
4454 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4455 {
4456 rtx from = SUBREG_REG (ops[1]);
4457 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4458
4459 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4460 && GET_MODE_CLASS (imode) == MODE_INT
4461 && subreg_lowpart_p (ops[1]));
4462
4463 if (GET_MODE_SIZE (imode) < 4)
4464 imode = SImode;
4465 if (imode != GET_MODE (from))
4466 from = gen_rtx_SUBREG (imode, from, 0);
4467
4468 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4469 {
4470 enum insn_code icode = convert_optab_handler (trunc_optab,
4471 mode, imode);
4472 emit_insn (GEN_FCN (icode) (ops[0], from));
4473 }
4474 else
4475 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4476 return 1;
4477 }
4478
4479 /* At least one of the operands needs to be a register. */
4480 if ((reload_in_progress | reload_completed) == 0
4481 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4482 {
4483 rtx temp = force_reg (mode, ops[1]);
4484 emit_move_insn (ops[0], temp);
4485 return 1;
4486 }
4487 if (reload_in_progress || reload_completed)
4488 {
4489 if (CONSTANT_P (ops[1]))
4490 return spu_split_immediate (ops);
4491 return 0;
4492 }
4493
4494 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4495 extend them. */
4496 if (GET_CODE (ops[1]) == CONST_INT)
4497 {
4498 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4499 if (val != INTVAL (ops[1]))
4500 {
4501 emit_move_insn (ops[0], GEN_INT (val));
4502 return 1;
4503 }
4504 }
4505 if (MEM_P (ops[0]))
4506 {
4507 if (MEM_ADDR_SPACE (ops[0]))
4508 ops[0] = expand_ea_mem (ops[0], true);
4509 return spu_split_store (ops);
4510 }
4511 if (MEM_P (ops[1]))
4512 {
4513 if (MEM_ADDR_SPACE (ops[1]))
4514 ops[1] = expand_ea_mem (ops[1], false);
4515 return spu_split_load (ops);
4516 }
4517
4518 return 0;
4519 }
4520
4521 static void
4522 spu_convert_move (rtx dst, rtx src)
4523 {
4524 enum machine_mode mode = GET_MODE (dst);
4525 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4526 rtx reg;
4527 gcc_assert (GET_MODE (src) == TImode);
4528 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4529 emit_insn (gen_rtx_SET (VOIDmode, reg,
4530 gen_rtx_TRUNCATE (int_mode,
4531 gen_rtx_LSHIFTRT (TImode, src,
4532 GEN_INT (int_mode == DImode ? 64 : 96)))));
4533 if (int_mode != mode)
4534 {
4535 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4536 emit_move_insn (dst, reg);
4537 }
4538 }
4539
4540 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4541 the address from SRC and SRC+16. Return a REG or CONST_INT that
4542 specifies how many bytes to rotate the loaded registers, plus any
4543 extra from EXTRA_ROTQBY. The address and rotate amounts are
4544 normalized to improve merging of loads and rotate computations. */
4545 static rtx
4546 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4547 {
4548 rtx addr = XEXP (src, 0);
4549 rtx p0, p1, rot, addr0, addr1;
4550 int rot_amt;
4551
4552 rot = 0;
4553 rot_amt = 0;
4554
4555 if (MEM_ALIGN (src) >= 128)
4556 /* Address is already aligned; simply perform a TImode load. */ ;
4557 else if (GET_CODE (addr) == PLUS)
4558 {
4559 /* 8 cases:
4560 aligned reg + aligned reg => lqx
4561 aligned reg + unaligned reg => lqx, rotqby
4562 aligned reg + aligned const => lqd
4563 aligned reg + unaligned const => lqd, rotqbyi
4564 unaligned reg + aligned reg => lqx, rotqby
4565 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4566 unaligned reg + aligned const => lqd, rotqby
4567 unaligned reg + unaligned const -> not allowed by legitimate address
4568 */
4569 p0 = XEXP (addr, 0);
4570 p1 = XEXP (addr, 1);
4571 if (!reg_aligned_for_addr (p0))
4572 {
4573 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4574 {
4575 rot = gen_reg_rtx (SImode);
4576 emit_insn (gen_addsi3 (rot, p0, p1));
4577 }
4578 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4579 {
4580 if (INTVAL (p1) > 0
4581 && REG_POINTER (p0)
4582 && INTVAL (p1) * BITS_PER_UNIT
4583 < REGNO_POINTER_ALIGN (REGNO (p0)))
4584 {
4585 rot = gen_reg_rtx (SImode);
4586 emit_insn (gen_addsi3 (rot, p0, p1));
4587 addr = p0;
4588 }
4589 else
4590 {
4591 rtx x = gen_reg_rtx (SImode);
4592 emit_move_insn (x, p1);
4593 if (!spu_arith_operand (p1, SImode))
4594 p1 = x;
4595 rot = gen_reg_rtx (SImode);
4596 emit_insn (gen_addsi3 (rot, p0, p1));
4597 addr = gen_rtx_PLUS (Pmode, p0, x);
4598 }
4599 }
4600 else
4601 rot = p0;
4602 }
4603 else
4604 {
4605 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4606 {
4607 rot_amt = INTVAL (p1) & 15;
4608 if (INTVAL (p1) & -16)
4609 {
4610 p1 = GEN_INT (INTVAL (p1) & -16);
4611 addr = gen_rtx_PLUS (SImode, p0, p1);
4612 }
4613 else
4614 addr = p0;
4615 }
4616 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4617 rot = p1;
4618 }
4619 }
4620 else if (REG_P (addr))
4621 {
4622 if (!reg_aligned_for_addr (addr))
4623 rot = addr;
4624 }
4625 else if (GET_CODE (addr) == CONST)
4626 {
4627 if (GET_CODE (XEXP (addr, 0)) == PLUS
4628 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4629 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4630 {
4631 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4632 if (rot_amt & -16)
4633 addr = gen_rtx_CONST (Pmode,
4634 gen_rtx_PLUS (Pmode,
4635 XEXP (XEXP (addr, 0), 0),
4636 GEN_INT (rot_amt & -16)));
4637 else
4638 addr = XEXP (XEXP (addr, 0), 0);
4639 }
4640 else
4641 {
4642 rot = gen_reg_rtx (Pmode);
4643 emit_move_insn (rot, addr);
4644 }
4645 }
4646 else if (GET_CODE (addr) == CONST_INT)
4647 {
4648 rot_amt = INTVAL (addr);
4649 addr = GEN_INT (rot_amt & -16);
4650 }
4651 else if (!ALIGNED_SYMBOL_REF_P (addr))
4652 {
4653 rot = gen_reg_rtx (Pmode);
4654 emit_move_insn (rot, addr);
4655 }
4656
4657 rot_amt += extra_rotby;
4658
4659 rot_amt &= 15;
4660
4661 if (rot && rot_amt)
4662 {
4663 rtx x = gen_reg_rtx (SImode);
4664 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4665 rot = x;
4666 rot_amt = 0;
4667 }
4668 if (!rot && rot_amt)
4669 rot = GEN_INT (rot_amt);
4670
4671 addr0 = copy_rtx (addr);
4672 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4673 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4674
4675 if (dst1)
4676 {
4677 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4678 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4679 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4680 }
4681
4682 return rot;
4683 }
4684
4685 int
4686 spu_split_load (rtx * ops)
4687 {
4688 enum machine_mode mode = GET_MODE (ops[0]);
4689 rtx addr, load, rot;
4690 int rot_amt;
4691
4692 if (GET_MODE_SIZE (mode) >= 16)
4693 return 0;
4694
4695 addr = XEXP (ops[1], 0);
4696 gcc_assert (GET_CODE (addr) != AND);
4697
4698 if (!address_needs_split (ops[1]))
4699 {
4700 ops[1] = change_address (ops[1], TImode, addr);
4701 load = gen_reg_rtx (TImode);
4702 emit_insn (gen__movti (load, ops[1]));
4703 spu_convert_move (ops[0], load);
4704 return 1;
4705 }
4706
4707 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4708
4709 load = gen_reg_rtx (TImode);
4710 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4711
4712 if (rot)
4713 emit_insn (gen_rotqby_ti (load, load, rot));
4714
4715 spu_convert_move (ops[0], load);
4716 return 1;
4717 }
4718
4719 int
4720 spu_split_store (rtx * ops)
4721 {
4722 enum machine_mode mode = GET_MODE (ops[0]);
4723 rtx reg;
4724 rtx addr, p0, p1, p1_lo, smem;
4725 int aform;
4726 int scalar;
4727
4728 if (GET_MODE_SIZE (mode) >= 16)
4729 return 0;
4730
4731 addr = XEXP (ops[0], 0);
4732 gcc_assert (GET_CODE (addr) != AND);
4733
4734 if (!address_needs_split (ops[0]))
4735 {
4736 reg = gen_reg_rtx (TImode);
4737 emit_insn (gen_spu_convert (reg, ops[1]));
4738 ops[0] = change_address (ops[0], TImode, addr);
4739 emit_move_insn (ops[0], reg);
4740 return 1;
4741 }
4742
4743 if (GET_CODE (addr) == PLUS)
4744 {
4745 /* 8 cases:
4746 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4747 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4748 aligned reg + aligned const => lqd, c?d, shuf, stqx
4749 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4750 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4751 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4752 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4753 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4754 */
4755 aform = 0;
4756 p0 = XEXP (addr, 0);
4757 p1 = p1_lo = XEXP (addr, 1);
4758 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4759 {
4760 p1_lo = GEN_INT (INTVAL (p1) & 15);
4761 if (reg_aligned_for_addr (p0))
4762 {
4763 p1 = GEN_INT (INTVAL (p1) & -16);
4764 if (p1 == const0_rtx)
4765 addr = p0;
4766 else
4767 addr = gen_rtx_PLUS (SImode, p0, p1);
4768 }
4769 else
4770 {
4771 rtx x = gen_reg_rtx (SImode);
4772 emit_move_insn (x, p1);
4773 addr = gen_rtx_PLUS (SImode, p0, x);
4774 }
4775 }
4776 }
4777 else if (REG_P (addr))
4778 {
4779 aform = 0;
4780 p0 = addr;
4781 p1 = p1_lo = const0_rtx;
4782 }
4783 else
4784 {
4785 aform = 1;
4786 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4787 p1 = 0; /* aform doesn't use p1 */
4788 p1_lo = addr;
4789 if (ALIGNED_SYMBOL_REF_P (addr))
4790 p1_lo = const0_rtx;
4791 else if (GET_CODE (addr) == CONST
4792 && GET_CODE (XEXP (addr, 0)) == PLUS
4793 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4794 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4795 {
4796 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4797 if ((v & -16) != 0)
4798 addr = gen_rtx_CONST (Pmode,
4799 gen_rtx_PLUS (Pmode,
4800 XEXP (XEXP (addr, 0), 0),
4801 GEN_INT (v & -16)));
4802 else
4803 addr = XEXP (XEXP (addr, 0), 0);
4804 p1_lo = GEN_INT (v & 15);
4805 }
4806 else if (GET_CODE (addr) == CONST_INT)
4807 {
4808 p1_lo = GEN_INT (INTVAL (addr) & 15);
4809 addr = GEN_INT (INTVAL (addr) & -16);
4810 }
4811 else
4812 {
4813 p1_lo = gen_reg_rtx (SImode);
4814 emit_move_insn (p1_lo, addr);
4815 }
4816 }
4817
4818 gcc_assert (aform == 0 || aform == 1);
4819 reg = gen_reg_rtx (TImode);
4820
4821 scalar = store_with_one_insn_p (ops[0]);
4822 if (!scalar)
4823 {
4824 /* We could copy the flags from the ops[0] MEM to mem here,
4825 We don't because we want this load to be optimized away if
4826 possible, and copying the flags will prevent that in certain
4827 cases, e.g. consider the volatile flag. */
4828
4829 rtx pat = gen_reg_rtx (TImode);
4830 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4831 set_mem_alias_set (lmem, 0);
4832 emit_insn (gen_movti (reg, lmem));
4833
4834 if (!p0 || reg_aligned_for_addr (p0))
4835 p0 = stack_pointer_rtx;
4836 if (!p1_lo)
4837 p1_lo = const0_rtx;
4838
4839 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4840 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4841 }
4842 else
4843 {
4844 if (GET_CODE (ops[1]) == REG)
4845 emit_insn (gen_spu_convert (reg, ops[1]));
4846 else if (GET_CODE (ops[1]) == SUBREG)
4847 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4848 else
4849 abort ();
4850 }
4851
4852 if (GET_MODE_SIZE (mode) < 4 && scalar)
4853 emit_insn (gen_ashlti3
4854 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4855
4856 smem = change_address (ops[0], TImode, copy_rtx (addr));
4857 /* We can't use the previous alias set because the memory has changed
4858 size and can potentially overlap objects of other types. */
4859 set_mem_alias_set (smem, 0);
4860
4861 emit_insn (gen_movti (smem, reg));
4862 return 1;
4863 }
4864
4865 /* Return TRUE if X is MEM which is a struct member reference
4866 and the member can safely be loaded and stored with a single
4867 instruction because it is padded. */
4868 static int
4869 mem_is_padded_component_ref (rtx x)
4870 {
4871 tree t = MEM_EXPR (x);
4872 tree r;
4873 if (!t || TREE_CODE (t) != COMPONENT_REF)
4874 return 0;
4875 t = TREE_OPERAND (t, 1);
4876 if (!t || TREE_CODE (t) != FIELD_DECL
4877 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4878 return 0;
4879 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4880 r = DECL_FIELD_CONTEXT (t);
4881 if (!r || TREE_CODE (r) != RECORD_TYPE)
4882 return 0;
4883 /* Make sure they are the same mode */
4884 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4885 return 0;
4886 /* If there are no following fields then the field alignment assures
4887 the structure is padded to the alignment which means this field is
4888 padded too. */
4889 if (TREE_CHAIN (t) == 0)
4890 return 1;
4891 /* If the following field is also aligned then this field will be
4892 padded. */
4893 t = TREE_CHAIN (t);
4894 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4895 return 1;
4896 return 0;
4897 }
4898
4899 /* Parse the -mfixed-range= option string. */
4900 static void
4901 fix_range (const char *const_str)
4902 {
4903 int i, first, last;
4904 char *str, *dash, *comma;
4905
4906 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4907 REG2 are either register names or register numbers. The effect
4908 of this option is to mark the registers in the range from REG1 to
4909 REG2 as ``fixed'' so they won't be used by the compiler. */
4910
4911 i = strlen (const_str);
4912 str = (char *) alloca (i + 1);
4913 memcpy (str, const_str, i + 1);
4914
4915 while (1)
4916 {
4917 dash = strchr (str, '-');
4918 if (!dash)
4919 {
4920 warning (0, "value of -mfixed-range must have form REG1-REG2");
4921 return;
4922 }
4923 *dash = '\0';
4924 comma = strchr (dash + 1, ',');
4925 if (comma)
4926 *comma = '\0';
4927
4928 first = decode_reg_name (str);
4929 if (first < 0)
4930 {
4931 warning (0, "unknown register name: %s", str);
4932 return;
4933 }
4934
4935 last = decode_reg_name (dash + 1);
4936 if (last < 0)
4937 {
4938 warning (0, "unknown register name: %s", dash + 1);
4939 return;
4940 }
4941
4942 *dash = '-';
4943
4944 if (first > last)
4945 {
4946 warning (0, "%s-%s is an empty range", str, dash + 1);
4947 return;
4948 }
4949
4950 for (i = first; i <= last; ++i)
4951 fixed_regs[i] = call_used_regs[i] = 1;
4952
4953 if (!comma)
4954 break;
4955
4956 *comma = ',';
4957 str = comma + 1;
4958 }
4959 }
4960
4961 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4962 can be generated using the fsmbi instruction. */
4963 int
4964 fsmbi_const_p (rtx x)
4965 {
4966 if (CONSTANT_P (x))
4967 {
4968 /* We can always choose TImode for CONST_INT because the high bits
4969 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4970 enum immediate_class c = classify_immediate (x, TImode);
4971 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4972 }
4973 return 0;
4974 }
4975
4976 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4977 can be generated using the cbd, chd, cwd or cdd instruction. */
4978 int
4979 cpat_const_p (rtx x, enum machine_mode mode)
4980 {
4981 if (CONSTANT_P (x))
4982 {
4983 enum immediate_class c = classify_immediate (x, mode);
4984 return c == IC_CPAT;
4985 }
4986 return 0;
4987 }
4988
4989 rtx
4990 gen_cpat_const (rtx * ops)
4991 {
4992 unsigned char dst[16];
4993 int i, offset, shift, isize;
4994 if (GET_CODE (ops[3]) != CONST_INT
4995 || GET_CODE (ops[2]) != CONST_INT
4996 || (GET_CODE (ops[1]) != CONST_INT
4997 && GET_CODE (ops[1]) != REG))
4998 return 0;
4999 if (GET_CODE (ops[1]) == REG
5000 && (!REG_POINTER (ops[1])
5001 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5002 return 0;
5003
5004 for (i = 0; i < 16; i++)
5005 dst[i] = i + 16;
5006 isize = INTVAL (ops[3]);
5007 if (isize == 1)
5008 shift = 3;
5009 else if (isize == 2)
5010 shift = 2;
5011 else
5012 shift = 0;
5013 offset = (INTVAL (ops[2]) +
5014 (GET_CODE (ops[1]) ==
5015 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5016 for (i = 0; i < isize; i++)
5017 dst[offset + i] = i + shift;
5018 return array_to_constant (TImode, dst);
5019 }
5020
5021 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5022 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5023 than 16 bytes, the value is repeated across the rest of the array. */
5024 void
5025 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5026 {
5027 HOST_WIDE_INT val;
5028 int i, j, first;
5029
5030 memset (arr, 0, 16);
5031 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5032 if (GET_CODE (x) == CONST_INT
5033 || (GET_CODE (x) == CONST_DOUBLE
5034 && (mode == SFmode || mode == DFmode)))
5035 {
5036 gcc_assert (mode != VOIDmode && mode != BLKmode);
5037
5038 if (GET_CODE (x) == CONST_DOUBLE)
5039 val = const_double_to_hwint (x);
5040 else
5041 val = INTVAL (x);
5042 first = GET_MODE_SIZE (mode) - 1;
5043 for (i = first; i >= 0; i--)
5044 {
5045 arr[i] = val & 0xff;
5046 val >>= 8;
5047 }
5048 /* Splat the constant across the whole array. */
5049 for (j = 0, i = first + 1; i < 16; i++)
5050 {
5051 arr[i] = arr[j];
5052 j = (j == first) ? 0 : j + 1;
5053 }
5054 }
5055 else if (GET_CODE (x) == CONST_DOUBLE)
5056 {
5057 val = CONST_DOUBLE_LOW (x);
5058 for (i = 15; i >= 8; i--)
5059 {
5060 arr[i] = val & 0xff;
5061 val >>= 8;
5062 }
5063 val = CONST_DOUBLE_HIGH (x);
5064 for (i = 7; i >= 0; i--)
5065 {
5066 arr[i] = val & 0xff;
5067 val >>= 8;
5068 }
5069 }
5070 else if (GET_CODE (x) == CONST_VECTOR)
5071 {
5072 int units;
5073 rtx elt;
5074 mode = GET_MODE_INNER (mode);
5075 units = CONST_VECTOR_NUNITS (x);
5076 for (i = 0; i < units; i++)
5077 {
5078 elt = CONST_VECTOR_ELT (x, i);
5079 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5080 {
5081 if (GET_CODE (elt) == CONST_DOUBLE)
5082 val = const_double_to_hwint (elt);
5083 else
5084 val = INTVAL (elt);
5085 first = GET_MODE_SIZE (mode) - 1;
5086 if (first + i * GET_MODE_SIZE (mode) > 16)
5087 abort ();
5088 for (j = first; j >= 0; j--)
5089 {
5090 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5091 val >>= 8;
5092 }
5093 }
5094 }
5095 }
5096 else
5097 gcc_unreachable();
5098 }
5099
5100 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5101 smaller than 16 bytes, use the bytes that would represent that value
5102 in a register, e.g., for QImode return the value of arr[3]. */
5103 rtx
5104 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5105 {
5106 enum machine_mode inner_mode;
5107 rtvec v;
5108 int units, size, i, j, k;
5109 HOST_WIDE_INT val;
5110
5111 if (GET_MODE_CLASS (mode) == MODE_INT
5112 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5113 {
5114 j = GET_MODE_SIZE (mode);
5115 i = j < 4 ? 4 - j : 0;
5116 for (val = 0; i < j; i++)
5117 val = (val << 8) | arr[i];
5118 val = trunc_int_for_mode (val, mode);
5119 return GEN_INT (val);
5120 }
5121
5122 if (mode == TImode)
5123 {
5124 HOST_WIDE_INT high;
5125 for (i = high = 0; i < 8; i++)
5126 high = (high << 8) | arr[i];
5127 for (i = 8, val = 0; i < 16; i++)
5128 val = (val << 8) | arr[i];
5129 return immed_double_const (val, high, TImode);
5130 }
5131 if (mode == SFmode)
5132 {
5133 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5134 val = trunc_int_for_mode (val, SImode);
5135 return hwint_to_const_double (SFmode, val);
5136 }
5137 if (mode == DFmode)
5138 {
5139 for (i = 0, val = 0; i < 8; i++)
5140 val = (val << 8) | arr[i];
5141 return hwint_to_const_double (DFmode, val);
5142 }
5143
5144 if (!VECTOR_MODE_P (mode))
5145 abort ();
5146
5147 units = GET_MODE_NUNITS (mode);
5148 size = GET_MODE_UNIT_SIZE (mode);
5149 inner_mode = GET_MODE_INNER (mode);
5150 v = rtvec_alloc (units);
5151
5152 for (k = i = 0; i < units; ++i)
5153 {
5154 val = 0;
5155 for (j = 0; j < size; j++, k++)
5156 val = (val << 8) | arr[k];
5157
5158 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5159 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5160 else
5161 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5162 }
5163 if (k > 16)
5164 abort ();
5165
5166 return gen_rtx_CONST_VECTOR (mode, v);
5167 }
5168
5169 static void
5170 reloc_diagnostic (rtx x)
5171 {
5172 tree decl = 0;
5173 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5174 return;
5175
5176 if (GET_CODE (x) == SYMBOL_REF)
5177 decl = SYMBOL_REF_DECL (x);
5178 else if (GET_CODE (x) == CONST
5179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5180 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5181
5182 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5183 if (decl && !DECL_P (decl))
5184 decl = 0;
5185
5186 /* The decl could be a string constant. */
5187 if (decl && DECL_P (decl))
5188 {
5189 location_t loc;
5190 /* We use last_assemble_variable_decl to get line information. It's
5191 not always going to be right and might not even be close, but will
5192 be right for the more common cases. */
5193 if (!last_assemble_variable_decl || in_section == ctors_section)
5194 loc = DECL_SOURCE_LOCATION (decl);
5195 else
5196 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5197
5198 if (TARGET_WARN_RELOC)
5199 warning_at (loc, 0,
5200 "creating run-time relocation for %qD", decl);
5201 else
5202 error_at (loc,
5203 "creating run-time relocation for %qD", decl);
5204 }
5205 else
5206 {
5207 if (TARGET_WARN_RELOC)
5208 warning_at (input_location, 0, "creating run-time relocation");
5209 else
5210 error_at (input_location, "creating run-time relocation");
5211 }
5212 }
5213
5214 /* Hook into assemble_integer so we can generate an error for run-time
5215 relocations. The SPU ABI disallows them. */
5216 static bool
5217 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5218 {
5219 /* By default run-time relocations aren't supported, but we allow them
5220 in case users support it in their own run-time loader. And we provide
5221 a warning for those users that don't. */
5222 if ((GET_CODE (x) == SYMBOL_REF)
5223 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5224 reloc_diagnostic (x);
5225
5226 return default_assemble_integer (x, size, aligned_p);
5227 }
5228
5229 static void
5230 spu_asm_globalize_label (FILE * file, const char *name)
5231 {
5232 fputs ("\t.global\t", file);
5233 assemble_name (file, name);
5234 fputs ("\n", file);
5235 }
5236
5237 static bool
5238 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5239 int opno ATTRIBUTE_UNUSED, int *total,
5240 bool speed ATTRIBUTE_UNUSED)
5241 {
5242 enum machine_mode mode = GET_MODE (x);
5243 int cost = COSTS_N_INSNS (2);
5244
5245 /* Folding to a CONST_VECTOR will use extra space but there might
5246 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5247 only if it allows us to fold away multiple insns. Changing the cost
5248 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5249 because this cost will only be compared against a single insn.
5250 if (code == CONST_VECTOR)
5251 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5252 */
5253
5254 /* Use defaults for float operations. Not accurate but good enough. */
5255 if (mode == DFmode)
5256 {
5257 *total = COSTS_N_INSNS (13);
5258 return true;
5259 }
5260 if (mode == SFmode)
5261 {
5262 *total = COSTS_N_INSNS (6);
5263 return true;
5264 }
5265 switch (code)
5266 {
5267 case CONST_INT:
5268 if (satisfies_constraint_K (x))
5269 *total = 0;
5270 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5271 *total = COSTS_N_INSNS (1);
5272 else
5273 *total = COSTS_N_INSNS (3);
5274 return true;
5275
5276 case CONST:
5277 *total = COSTS_N_INSNS (3);
5278 return true;
5279
5280 case LABEL_REF:
5281 case SYMBOL_REF:
5282 *total = COSTS_N_INSNS (0);
5283 return true;
5284
5285 case CONST_DOUBLE:
5286 *total = COSTS_N_INSNS (5);
5287 return true;
5288
5289 case FLOAT_EXTEND:
5290 case FLOAT_TRUNCATE:
5291 case FLOAT:
5292 case UNSIGNED_FLOAT:
5293 case FIX:
5294 case UNSIGNED_FIX:
5295 *total = COSTS_N_INSNS (7);
5296 return true;
5297
5298 case PLUS:
5299 if (mode == TImode)
5300 {
5301 *total = COSTS_N_INSNS (9);
5302 return true;
5303 }
5304 break;
5305
5306 case MULT:
5307 cost =
5308 GET_CODE (XEXP (x, 0)) ==
5309 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5310 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5311 {
5312 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5313 {
5314 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5315 cost = COSTS_N_INSNS (14);
5316 if ((val & 0xffff) == 0)
5317 cost = COSTS_N_INSNS (9);
5318 else if (val > 0 && val < 0x10000)
5319 cost = COSTS_N_INSNS (11);
5320 }
5321 }
5322 *total = cost;
5323 return true;
5324 case DIV:
5325 case UDIV:
5326 case MOD:
5327 case UMOD:
5328 *total = COSTS_N_INSNS (20);
5329 return true;
5330 case ROTATE:
5331 case ROTATERT:
5332 case ASHIFT:
5333 case ASHIFTRT:
5334 case LSHIFTRT:
5335 *total = COSTS_N_INSNS (4);
5336 return true;
5337 case UNSPEC:
5338 if (XINT (x, 1) == UNSPEC_CONVERT)
5339 *total = COSTS_N_INSNS (0);
5340 else
5341 *total = COSTS_N_INSNS (4);
5342 return true;
5343 }
5344 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5345 if (GET_MODE_CLASS (mode) == MODE_INT
5346 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5347 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5348 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5349 *total = cost;
5350 return true;
5351 }
5352
5353 static enum machine_mode
5354 spu_unwind_word_mode (void)
5355 {
5356 return SImode;
5357 }
5358
5359 /* Decide whether we can make a sibling call to a function. DECL is the
5360 declaration of the function being targeted by the call and EXP is the
5361 CALL_EXPR representing the call. */
5362 static bool
5363 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5364 {
5365 return decl && !TARGET_LARGE_MEM;
5366 }
5367
5368 /* We need to correctly update the back chain pointer and the Available
5369 Stack Size (which is in the second slot of the sp register.) */
5370 void
5371 spu_allocate_stack (rtx op0, rtx op1)
5372 {
5373 HOST_WIDE_INT v;
5374 rtx chain = gen_reg_rtx (V4SImode);
5375 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5376 rtx sp = gen_reg_rtx (V4SImode);
5377 rtx splatted = gen_reg_rtx (V4SImode);
5378 rtx pat = gen_reg_rtx (TImode);
5379
5380 /* copy the back chain so we can save it back again. */
5381 emit_move_insn (chain, stack_bot);
5382
5383 op1 = force_reg (SImode, op1);
5384
5385 v = 0x1020300010203ll;
5386 emit_move_insn (pat, immed_double_const (v, v, TImode));
5387 emit_insn (gen_shufb (splatted, op1, op1, pat));
5388
5389 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5390 emit_insn (gen_subv4si3 (sp, sp, splatted));
5391
5392 if (flag_stack_check)
5393 {
5394 rtx avail = gen_reg_rtx(SImode);
5395 rtx result = gen_reg_rtx(SImode);
5396 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5397 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5398 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5399 }
5400
5401 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5402
5403 emit_move_insn (stack_bot, chain);
5404
5405 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5406 }
5407
5408 void
5409 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5410 {
5411 static unsigned char arr[16] =
5412 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5413 rtx temp = gen_reg_rtx (SImode);
5414 rtx temp2 = gen_reg_rtx (SImode);
5415 rtx temp3 = gen_reg_rtx (V4SImode);
5416 rtx temp4 = gen_reg_rtx (V4SImode);
5417 rtx pat = gen_reg_rtx (TImode);
5418 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5419
5420 /* Restore the backchain from the first word, sp from the second. */
5421 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5422 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5423
5424 emit_move_insn (pat, array_to_constant (TImode, arr));
5425
5426 /* Compute Available Stack Size for sp */
5427 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5428 emit_insn (gen_shufb (temp3, temp, temp, pat));
5429
5430 /* Compute Available Stack Size for back chain */
5431 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5432 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5433 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5434
5435 emit_insn (gen_addv4si3 (sp, sp, temp3));
5436 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5437 }
5438
5439 static void
5440 spu_init_libfuncs (void)
5441 {
5442 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5443 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5444 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5445 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5446 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5447 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5448 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5449 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5450 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5451 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5452 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5453 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5454
5455 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5456 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5457
5458 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5459 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5460 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5461 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5462 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5463 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5464 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5465 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5466 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5467 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5468 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5469 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5470
5471 set_optab_libfunc (smul_optab, TImode, "__multi3");
5472 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5473 set_optab_libfunc (smod_optab, TImode, "__modti3");
5474 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5475 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5476 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5477 }
5478
5479 /* Make a subreg, stripping any existing subreg. We could possibly just
5480 call simplify_subreg, but in this case we know what we want. */
5481 rtx
5482 spu_gen_subreg (enum machine_mode mode, rtx x)
5483 {
5484 if (GET_CODE (x) == SUBREG)
5485 x = SUBREG_REG (x);
5486 if (GET_MODE (x) == mode)
5487 return x;
5488 return gen_rtx_SUBREG (mode, x, 0);
5489 }
5490
5491 static bool
5492 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5493 {
5494 return (TYPE_MODE (type) == BLKmode
5495 && ((type) == 0
5496 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5497 || int_size_in_bytes (type) >
5498 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5499 }
5500 \f
5501 /* Create the built-in types and functions */
5502
5503 enum spu_function_code
5504 {
5505 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5506 #include "spu-builtins.def"
5507 #undef DEF_BUILTIN
5508 NUM_SPU_BUILTINS
5509 };
5510
5511 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5512
5513 struct spu_builtin_description spu_builtins[] = {
5514 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5515 {fcode, icode, name, type, params},
5516 #include "spu-builtins.def"
5517 #undef DEF_BUILTIN
5518 };
5519
5520 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5521
5522 /* Returns the spu builtin decl for CODE. */
5523
5524 static tree
5525 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5526 {
5527 if (code >= NUM_SPU_BUILTINS)
5528 return error_mark_node;
5529
5530 return spu_builtin_decls[code];
5531 }
5532
5533
5534 static void
5535 spu_init_builtins (void)
5536 {
5537 struct spu_builtin_description *d;
5538 unsigned int i;
5539
5540 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5541 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5542 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5543 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5544 V4SF_type_node = build_vector_type (float_type_node, 4);
5545 V2DF_type_node = build_vector_type (double_type_node, 2);
5546
5547 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5548 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5549 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5550 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5551
5552 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5553
5554 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5561 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5562 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5564 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5566
5567 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5568 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5569 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5570 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5571 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5572 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5573 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5574 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5575
5576 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5577 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5578
5579 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5580
5581 spu_builtin_types[SPU_BTI_PTR] =
5582 build_pointer_type (build_qualified_type
5583 (void_type_node,
5584 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5585
5586 /* For each builtin we build a new prototype. The tree code will make
5587 sure nodes are shared. */
5588 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5589 {
5590 tree p;
5591 char name[64]; /* build_function will make a copy. */
5592 int parm;
5593
5594 if (d->name == 0)
5595 continue;
5596
5597 /* Find last parm. */
5598 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5599 ;
5600
5601 p = void_list_node;
5602 while (parm > 1)
5603 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5604
5605 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5606
5607 sprintf (name, "__builtin_%s", d->name);
5608 spu_builtin_decls[i] =
5609 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5610 if (d->fcode == SPU_MASK_FOR_LOAD)
5611 TREE_READONLY (spu_builtin_decls[i]) = 1;
5612
5613 /* These builtins don't throw. */
5614 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5615 }
5616 }
5617
5618 void
5619 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5620 {
5621 static unsigned char arr[16] =
5622 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5623
5624 rtx temp = gen_reg_rtx (Pmode);
5625 rtx temp2 = gen_reg_rtx (V4SImode);
5626 rtx temp3 = gen_reg_rtx (V4SImode);
5627 rtx pat = gen_reg_rtx (TImode);
5628 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5629
5630 emit_move_insn (pat, array_to_constant (TImode, arr));
5631
5632 /* Restore the sp. */
5633 emit_move_insn (temp, op1);
5634 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5635
5636 /* Compute available stack size for sp. */
5637 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5638 emit_insn (gen_shufb (temp3, temp, temp, pat));
5639
5640 emit_insn (gen_addv4si3 (sp, sp, temp3));
5641 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5642 }
5643
5644 int
5645 spu_safe_dma (HOST_WIDE_INT channel)
5646 {
5647 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5648 }
5649
5650 void
5651 spu_builtin_splats (rtx ops[])
5652 {
5653 enum machine_mode mode = GET_MODE (ops[0]);
5654 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5655 {
5656 unsigned char arr[16];
5657 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5658 emit_move_insn (ops[0], array_to_constant (mode, arr));
5659 }
5660 else
5661 {
5662 rtx reg = gen_reg_rtx (TImode);
5663 rtx shuf;
5664 if (GET_CODE (ops[1]) != REG
5665 && GET_CODE (ops[1]) != SUBREG)
5666 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5667 switch (mode)
5668 {
5669 case V2DImode:
5670 case V2DFmode:
5671 shuf =
5672 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5673 TImode);
5674 break;
5675 case V4SImode:
5676 case V4SFmode:
5677 shuf =
5678 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5679 TImode);
5680 break;
5681 case V8HImode:
5682 shuf =
5683 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5684 TImode);
5685 break;
5686 case V16QImode:
5687 shuf =
5688 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5689 TImode);
5690 break;
5691 default:
5692 abort ();
5693 }
5694 emit_move_insn (reg, shuf);
5695 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5696 }
5697 }
5698
5699 void
5700 spu_builtin_extract (rtx ops[])
5701 {
5702 enum machine_mode mode;
5703 rtx rot, from, tmp;
5704
5705 mode = GET_MODE (ops[1]);
5706
5707 if (GET_CODE (ops[2]) == CONST_INT)
5708 {
5709 switch (mode)
5710 {
5711 case V16QImode:
5712 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5713 break;
5714 case V8HImode:
5715 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5716 break;
5717 case V4SFmode:
5718 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5719 break;
5720 case V4SImode:
5721 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5722 break;
5723 case V2DImode:
5724 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5725 break;
5726 case V2DFmode:
5727 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5728 break;
5729 default:
5730 abort ();
5731 }
5732 return;
5733 }
5734
5735 from = spu_gen_subreg (TImode, ops[1]);
5736 rot = gen_reg_rtx (TImode);
5737 tmp = gen_reg_rtx (SImode);
5738
5739 switch (mode)
5740 {
5741 case V16QImode:
5742 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5743 break;
5744 case V8HImode:
5745 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5746 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5747 break;
5748 case V4SFmode:
5749 case V4SImode:
5750 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5751 break;
5752 case V2DImode:
5753 case V2DFmode:
5754 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5755 break;
5756 default:
5757 abort ();
5758 }
5759 emit_insn (gen_rotqby_ti (rot, from, tmp));
5760
5761 emit_insn (gen_spu_convert (ops[0], rot));
5762 }
5763
5764 void
5765 spu_builtin_insert (rtx ops[])
5766 {
5767 enum machine_mode mode = GET_MODE (ops[0]);
5768 enum machine_mode imode = GET_MODE_INNER (mode);
5769 rtx mask = gen_reg_rtx (TImode);
5770 rtx offset;
5771
5772 if (GET_CODE (ops[3]) == CONST_INT)
5773 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5774 else
5775 {
5776 offset = gen_reg_rtx (SImode);
5777 emit_insn (gen_mulsi3
5778 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5779 }
5780 emit_insn (gen_cpat
5781 (mask, stack_pointer_rtx, offset,
5782 GEN_INT (GET_MODE_SIZE (imode))));
5783 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5784 }
5785
5786 void
5787 spu_builtin_promote (rtx ops[])
5788 {
5789 enum machine_mode mode, imode;
5790 rtx rot, from, offset;
5791 HOST_WIDE_INT pos;
5792
5793 mode = GET_MODE (ops[0]);
5794 imode = GET_MODE_INNER (mode);
5795
5796 from = gen_reg_rtx (TImode);
5797 rot = spu_gen_subreg (TImode, ops[0]);
5798
5799 emit_insn (gen_spu_convert (from, ops[1]));
5800
5801 if (GET_CODE (ops[2]) == CONST_INT)
5802 {
5803 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5804 if (GET_MODE_SIZE (imode) < 4)
5805 pos += 4 - GET_MODE_SIZE (imode);
5806 offset = GEN_INT (pos & 15);
5807 }
5808 else
5809 {
5810 offset = gen_reg_rtx (SImode);
5811 switch (mode)
5812 {
5813 case V16QImode:
5814 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5815 break;
5816 case V8HImode:
5817 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5818 emit_insn (gen_addsi3 (offset, offset, offset));
5819 break;
5820 case V4SFmode:
5821 case V4SImode:
5822 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5823 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5824 break;
5825 case V2DImode:
5826 case V2DFmode:
5827 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5828 break;
5829 default:
5830 abort ();
5831 }
5832 }
5833 emit_insn (gen_rotqby_ti (rot, from, offset));
5834 }
5835
5836 static void
5837 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5838 {
5839 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5840 rtx shuf = gen_reg_rtx (V4SImode);
5841 rtx insn = gen_reg_rtx (V4SImode);
5842 rtx shufc;
5843 rtx insnc;
5844 rtx mem;
5845
5846 fnaddr = force_reg (SImode, fnaddr);
5847 cxt = force_reg (SImode, cxt);
5848
5849 if (TARGET_LARGE_MEM)
5850 {
5851 rtx rotl = gen_reg_rtx (V4SImode);
5852 rtx mask = gen_reg_rtx (V4SImode);
5853 rtx bi = gen_reg_rtx (SImode);
5854 static unsigned char const shufa[16] = {
5855 2, 3, 0, 1, 18, 19, 16, 17,
5856 0, 1, 2, 3, 16, 17, 18, 19
5857 };
5858 static unsigned char const insna[16] = {
5859 0x41, 0, 0, 79,
5860 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5861 0x60, 0x80, 0, 79,
5862 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5863 };
5864
5865 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5866 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5867
5868 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5869 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5870 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5871 emit_insn (gen_selb (insn, insnc, rotl, mask));
5872
5873 mem = adjust_address (m_tramp, V4SImode, 0);
5874 emit_move_insn (mem, insn);
5875
5876 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5877 mem = adjust_address (m_tramp, Pmode, 16);
5878 emit_move_insn (mem, bi);
5879 }
5880 else
5881 {
5882 rtx scxt = gen_reg_rtx (SImode);
5883 rtx sfnaddr = gen_reg_rtx (SImode);
5884 static unsigned char const insna[16] = {
5885 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5886 0x30, 0, 0, 0,
5887 0, 0, 0, 0,
5888 0, 0, 0, 0
5889 };
5890
5891 shufc = gen_reg_rtx (TImode);
5892 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5893
5894 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5895 fits 18 bits and the last 4 are zeros. This will be true if
5896 the stack pointer is initialized to 0x3fff0 at program start,
5897 otherwise the ila instruction will be garbage. */
5898
5899 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5900 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5901 emit_insn (gen_cpat
5902 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5903 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5904 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5905
5906 mem = adjust_address (m_tramp, V4SImode, 0);
5907 emit_move_insn (mem, insn);
5908 }
5909 emit_insn (gen_sync ());
5910 }
5911
5912 static bool
5913 spu_warn_func_return (tree decl)
5914 {
5915 /* Naked functions are implemented entirely in assembly, including the
5916 return sequence, so suppress warnings about this. */
5917 return !spu_naked_function_p (decl);
5918 }
5919
5920 void
5921 spu_expand_sign_extend (rtx ops[])
5922 {
5923 unsigned char arr[16];
5924 rtx pat = gen_reg_rtx (TImode);
5925 rtx sign, c;
5926 int i, last;
5927 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5928 if (GET_MODE (ops[1]) == QImode)
5929 {
5930 sign = gen_reg_rtx (HImode);
5931 emit_insn (gen_extendqihi2 (sign, ops[1]));
5932 for (i = 0; i < 16; i++)
5933 arr[i] = 0x12;
5934 arr[last] = 0x13;
5935 }
5936 else
5937 {
5938 for (i = 0; i < 16; i++)
5939 arr[i] = 0x10;
5940 switch (GET_MODE (ops[1]))
5941 {
5942 case HImode:
5943 sign = gen_reg_rtx (SImode);
5944 emit_insn (gen_extendhisi2 (sign, ops[1]));
5945 arr[last] = 0x03;
5946 arr[last - 1] = 0x02;
5947 break;
5948 case SImode:
5949 sign = gen_reg_rtx (SImode);
5950 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5951 for (i = 0; i < 4; i++)
5952 arr[last - i] = 3 - i;
5953 break;
5954 case DImode:
5955 sign = gen_reg_rtx (SImode);
5956 c = gen_reg_rtx (SImode);
5957 emit_insn (gen_spu_convert (c, ops[1]));
5958 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5959 for (i = 0; i < 8; i++)
5960 arr[last - i] = 7 - i;
5961 break;
5962 default:
5963 abort ();
5964 }
5965 }
5966 emit_move_insn (pat, array_to_constant (TImode, arr));
5967 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5968 }
5969
5970 /* expand vector initialization. If there are any constant parts,
5971 load constant parts first. Then load any non-constant parts. */
5972 void
5973 spu_expand_vector_init (rtx target, rtx vals)
5974 {
5975 enum machine_mode mode = GET_MODE (target);
5976 int n_elts = GET_MODE_NUNITS (mode);
5977 int n_var = 0;
5978 bool all_same = true;
5979 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5980 int i;
5981
5982 first = XVECEXP (vals, 0, 0);
5983 for (i = 0; i < n_elts; ++i)
5984 {
5985 x = XVECEXP (vals, 0, i);
5986 if (!(CONST_INT_P (x)
5987 || GET_CODE (x) == CONST_DOUBLE
5988 || GET_CODE (x) == CONST_FIXED))
5989 ++n_var;
5990 else
5991 {
5992 if (first_constant == NULL_RTX)
5993 first_constant = x;
5994 }
5995 if (i > 0 && !rtx_equal_p (x, first))
5996 all_same = false;
5997 }
5998
5999 /* if all elements are the same, use splats to repeat elements */
6000 if (all_same)
6001 {
6002 if (!CONSTANT_P (first)
6003 && !register_operand (first, GET_MODE (x)))
6004 first = force_reg (GET_MODE (first), first);
6005 emit_insn (gen_spu_splats (target, first));
6006 return;
6007 }
6008
6009 /* load constant parts */
6010 if (n_var != n_elts)
6011 {
6012 if (n_var == 0)
6013 {
6014 emit_move_insn (target,
6015 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6016 }
6017 else
6018 {
6019 rtx constant_parts_rtx = copy_rtx (vals);
6020
6021 gcc_assert (first_constant != NULL_RTX);
6022 /* fill empty slots with the first constant, this increases
6023 our chance of using splats in the recursive call below. */
6024 for (i = 0; i < n_elts; ++i)
6025 {
6026 x = XVECEXP (constant_parts_rtx, 0, i);
6027 if (!(CONST_INT_P (x)
6028 || GET_CODE (x) == CONST_DOUBLE
6029 || GET_CODE (x) == CONST_FIXED))
6030 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6031 }
6032
6033 spu_expand_vector_init (target, constant_parts_rtx);
6034 }
6035 }
6036
6037 /* load variable parts */
6038 if (n_var != 0)
6039 {
6040 rtx insert_operands[4];
6041
6042 insert_operands[0] = target;
6043 insert_operands[2] = target;
6044 for (i = 0; i < n_elts; ++i)
6045 {
6046 x = XVECEXP (vals, 0, i);
6047 if (!(CONST_INT_P (x)
6048 || GET_CODE (x) == CONST_DOUBLE
6049 || GET_CODE (x) == CONST_FIXED))
6050 {
6051 if (!register_operand (x, GET_MODE (x)))
6052 x = force_reg (GET_MODE (x), x);
6053 insert_operands[1] = x;
6054 insert_operands[3] = GEN_INT (i);
6055 spu_builtin_insert (insert_operands);
6056 }
6057 }
6058 }
6059 }
6060
6061 /* Return insn index for the vector compare instruction for given CODE,
6062 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6063
6064 static int
6065 get_vec_cmp_insn (enum rtx_code code,
6066 enum machine_mode dest_mode,
6067 enum machine_mode op_mode)
6068
6069 {
6070 switch (code)
6071 {
6072 case EQ:
6073 if (dest_mode == V16QImode && op_mode == V16QImode)
6074 return CODE_FOR_ceq_v16qi;
6075 if (dest_mode == V8HImode && op_mode == V8HImode)
6076 return CODE_FOR_ceq_v8hi;
6077 if (dest_mode == V4SImode && op_mode == V4SImode)
6078 return CODE_FOR_ceq_v4si;
6079 if (dest_mode == V4SImode && op_mode == V4SFmode)
6080 return CODE_FOR_ceq_v4sf;
6081 if (dest_mode == V2DImode && op_mode == V2DFmode)
6082 return CODE_FOR_ceq_v2df;
6083 break;
6084 case GT:
6085 if (dest_mode == V16QImode && op_mode == V16QImode)
6086 return CODE_FOR_cgt_v16qi;
6087 if (dest_mode == V8HImode && op_mode == V8HImode)
6088 return CODE_FOR_cgt_v8hi;
6089 if (dest_mode == V4SImode && op_mode == V4SImode)
6090 return CODE_FOR_cgt_v4si;
6091 if (dest_mode == V4SImode && op_mode == V4SFmode)
6092 return CODE_FOR_cgt_v4sf;
6093 if (dest_mode == V2DImode && op_mode == V2DFmode)
6094 return CODE_FOR_cgt_v2df;
6095 break;
6096 case GTU:
6097 if (dest_mode == V16QImode && op_mode == V16QImode)
6098 return CODE_FOR_clgt_v16qi;
6099 if (dest_mode == V8HImode && op_mode == V8HImode)
6100 return CODE_FOR_clgt_v8hi;
6101 if (dest_mode == V4SImode && op_mode == V4SImode)
6102 return CODE_FOR_clgt_v4si;
6103 break;
6104 default:
6105 break;
6106 }
6107 return -1;
6108 }
6109
6110 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6111 DMODE is expected destination mode. This is a recursive function. */
6112
6113 static rtx
6114 spu_emit_vector_compare (enum rtx_code rcode,
6115 rtx op0, rtx op1,
6116 enum machine_mode dmode)
6117 {
6118 int vec_cmp_insn;
6119 rtx mask;
6120 enum machine_mode dest_mode;
6121 enum machine_mode op_mode = GET_MODE (op1);
6122
6123 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6124
6125 /* Floating point vector compare instructions uses destination V4SImode.
6126 Double floating point vector compare instructions uses destination V2DImode.
6127 Move destination to appropriate mode later. */
6128 if (dmode == V4SFmode)
6129 dest_mode = V4SImode;
6130 else if (dmode == V2DFmode)
6131 dest_mode = V2DImode;
6132 else
6133 dest_mode = dmode;
6134
6135 mask = gen_reg_rtx (dest_mode);
6136 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6137
6138 if (vec_cmp_insn == -1)
6139 {
6140 bool swap_operands = false;
6141 bool try_again = false;
6142 switch (rcode)
6143 {
6144 case LT:
6145 rcode = GT;
6146 swap_operands = true;
6147 try_again = true;
6148 break;
6149 case LTU:
6150 rcode = GTU;
6151 swap_operands = true;
6152 try_again = true;
6153 break;
6154 case NE:
6155 case UNEQ:
6156 case UNLE:
6157 case UNLT:
6158 case UNGE:
6159 case UNGT:
6160 case UNORDERED:
6161 /* Treat A != B as ~(A==B). */
6162 {
6163 enum rtx_code rev_code;
6164 enum insn_code nor_code;
6165 rtx rev_mask;
6166
6167 rev_code = reverse_condition_maybe_unordered (rcode);
6168 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6169
6170 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6171 gcc_assert (nor_code != CODE_FOR_nothing);
6172 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6173 if (dmode != dest_mode)
6174 {
6175 rtx temp = gen_reg_rtx (dest_mode);
6176 convert_move (temp, mask, 0);
6177 return temp;
6178 }
6179 return mask;
6180 }
6181 break;
6182 case GE:
6183 case GEU:
6184 case LE:
6185 case LEU:
6186 /* Try GT/GTU/LT/LTU OR EQ */
6187 {
6188 rtx c_rtx, eq_rtx;
6189 enum insn_code ior_code;
6190 enum rtx_code new_code;
6191
6192 switch (rcode)
6193 {
6194 case GE: new_code = GT; break;
6195 case GEU: new_code = GTU; break;
6196 case LE: new_code = LT; break;
6197 case LEU: new_code = LTU; break;
6198 default:
6199 gcc_unreachable ();
6200 }
6201
6202 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6203 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6204
6205 ior_code = optab_handler (ior_optab, dest_mode);
6206 gcc_assert (ior_code != CODE_FOR_nothing);
6207 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6208 if (dmode != dest_mode)
6209 {
6210 rtx temp = gen_reg_rtx (dest_mode);
6211 convert_move (temp, mask, 0);
6212 return temp;
6213 }
6214 return mask;
6215 }
6216 break;
6217 case LTGT:
6218 /* Try LT OR GT */
6219 {
6220 rtx lt_rtx, gt_rtx;
6221 enum insn_code ior_code;
6222
6223 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6224 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6225
6226 ior_code = optab_handler (ior_optab, dest_mode);
6227 gcc_assert (ior_code != CODE_FOR_nothing);
6228 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6229 if (dmode != dest_mode)
6230 {
6231 rtx temp = gen_reg_rtx (dest_mode);
6232 convert_move (temp, mask, 0);
6233 return temp;
6234 }
6235 return mask;
6236 }
6237 break;
6238 case ORDERED:
6239 /* Implement as (A==A) & (B==B) */
6240 {
6241 rtx a_rtx, b_rtx;
6242 enum insn_code and_code;
6243
6244 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6245 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6246
6247 and_code = optab_handler (and_optab, dest_mode);
6248 gcc_assert (and_code != CODE_FOR_nothing);
6249 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6250 if (dmode != dest_mode)
6251 {
6252 rtx temp = gen_reg_rtx (dest_mode);
6253 convert_move (temp, mask, 0);
6254 return temp;
6255 }
6256 return mask;
6257 }
6258 break;
6259 default:
6260 gcc_unreachable ();
6261 }
6262
6263 /* You only get two chances. */
6264 if (try_again)
6265 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6266
6267 gcc_assert (vec_cmp_insn != -1);
6268
6269 if (swap_operands)
6270 {
6271 rtx tmp;
6272 tmp = op0;
6273 op0 = op1;
6274 op1 = tmp;
6275 }
6276 }
6277
6278 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6279 if (dmode != dest_mode)
6280 {
6281 rtx temp = gen_reg_rtx (dest_mode);
6282 convert_move (temp, mask, 0);
6283 return temp;
6284 }
6285 return mask;
6286 }
6287
6288
6289 /* Emit vector conditional expression.
6290 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6291 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6292
6293 int
6294 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6295 rtx cond, rtx cc_op0, rtx cc_op1)
6296 {
6297 enum machine_mode dest_mode = GET_MODE (dest);
6298 enum rtx_code rcode = GET_CODE (cond);
6299 rtx mask;
6300
6301 /* Get the vector mask for the given relational operations. */
6302 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6303
6304 emit_insn(gen_selb (dest, op2, op1, mask));
6305
6306 return 1;
6307 }
6308
6309 static rtx
6310 spu_force_reg (enum machine_mode mode, rtx op)
6311 {
6312 rtx x, r;
6313 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6314 {
6315 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6316 || GET_MODE (op) == BLKmode)
6317 return force_reg (mode, convert_to_mode (mode, op, 0));
6318 abort ();
6319 }
6320
6321 r = force_reg (GET_MODE (op), op);
6322 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6323 {
6324 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6325 if (x)
6326 return x;
6327 }
6328
6329 x = gen_reg_rtx (mode);
6330 emit_insn (gen_spu_convert (x, r));
6331 return x;
6332 }
6333
6334 static void
6335 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6336 {
6337 HOST_WIDE_INT v = 0;
6338 int lsbits;
6339 /* Check the range of immediate operands. */
6340 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6341 {
6342 int range = p - SPU_BTI_7;
6343
6344 if (!CONSTANT_P (op))
6345 error ("%s expects an integer literal in the range [%d, %d]",
6346 d->name,
6347 spu_builtin_range[range].low, spu_builtin_range[range].high);
6348
6349 if (GET_CODE (op) == CONST
6350 && (GET_CODE (XEXP (op, 0)) == PLUS
6351 || GET_CODE (XEXP (op, 0)) == MINUS))
6352 {
6353 v = INTVAL (XEXP (XEXP (op, 0), 1));
6354 op = XEXP (XEXP (op, 0), 0);
6355 }
6356 else if (GET_CODE (op) == CONST_INT)
6357 v = INTVAL (op);
6358 else if (GET_CODE (op) == CONST_VECTOR
6359 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6360 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6361
6362 /* The default for v is 0 which is valid in every range. */
6363 if (v < spu_builtin_range[range].low
6364 || v > spu_builtin_range[range].high)
6365 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6366 d->name,
6367 spu_builtin_range[range].low, spu_builtin_range[range].high,
6368 v);
6369
6370 switch (p)
6371 {
6372 case SPU_BTI_S10_4:
6373 lsbits = 4;
6374 break;
6375 case SPU_BTI_U16_2:
6376 /* This is only used in lqa, and stqa. Even though the insns
6377 encode 16 bits of the address (all but the 2 least
6378 significant), only 14 bits are used because it is masked to
6379 be 16 byte aligned. */
6380 lsbits = 4;
6381 break;
6382 case SPU_BTI_S16_2:
6383 /* This is used for lqr and stqr. */
6384 lsbits = 2;
6385 break;
6386 default:
6387 lsbits = 0;
6388 }
6389
6390 if (GET_CODE (op) == LABEL_REF
6391 || (GET_CODE (op) == SYMBOL_REF
6392 && SYMBOL_REF_FUNCTION_P (op))
6393 || (v & ((1 << lsbits) - 1)) != 0)
6394 warning (0, "%d least significant bits of %s are ignored", lsbits,
6395 d->name);
6396 }
6397 }
6398
6399
6400 static int
6401 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6402 rtx target, rtx ops[])
6403 {
6404 enum insn_code icode = (enum insn_code) d->icode;
6405 int i = 0, a;
6406
6407 /* Expand the arguments into rtl. */
6408
6409 if (d->parm[0] != SPU_BTI_VOID)
6410 ops[i++] = target;
6411
6412 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6413 {
6414 tree arg = CALL_EXPR_ARG (exp, a);
6415 if (arg == 0)
6416 abort ();
6417 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6418 }
6419
6420 gcc_assert (i == insn_data[icode].n_generator_args);
6421 return i;
6422 }
6423
6424 static rtx
6425 spu_expand_builtin_1 (struct spu_builtin_description *d,
6426 tree exp, rtx target)
6427 {
6428 rtx pat;
6429 rtx ops[8];
6430 enum insn_code icode = (enum insn_code) d->icode;
6431 enum machine_mode mode, tmode;
6432 int i, p;
6433 int n_operands;
6434 tree return_type;
6435
6436 /* Set up ops[] with values from arglist. */
6437 n_operands = expand_builtin_args (d, exp, target, ops);
6438
6439 /* Handle the target operand which must be operand 0. */
6440 i = 0;
6441 if (d->parm[0] != SPU_BTI_VOID)
6442 {
6443
6444 /* We prefer the mode specified for the match_operand otherwise
6445 use the mode from the builtin function prototype. */
6446 tmode = insn_data[d->icode].operand[0].mode;
6447 if (tmode == VOIDmode)
6448 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6449
6450 /* Try to use target because not using it can lead to extra copies
6451 and when we are using all of the registers extra copies leads
6452 to extra spills. */
6453 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6454 ops[0] = target;
6455 else
6456 target = ops[0] = gen_reg_rtx (tmode);
6457
6458 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6459 abort ();
6460
6461 i++;
6462 }
6463
6464 if (d->fcode == SPU_MASK_FOR_LOAD)
6465 {
6466 enum machine_mode mode = insn_data[icode].operand[1].mode;
6467 tree arg;
6468 rtx addr, op, pat;
6469
6470 /* get addr */
6471 arg = CALL_EXPR_ARG (exp, 0);
6472 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6473 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6474 addr = memory_address (mode, op);
6475
6476 /* negate addr */
6477 op = gen_reg_rtx (GET_MODE (addr));
6478 emit_insn (gen_rtx_SET (VOIDmode, op,
6479 gen_rtx_NEG (GET_MODE (addr), addr)));
6480 op = gen_rtx_MEM (mode, op);
6481
6482 pat = GEN_FCN (icode) (target, op);
6483 if (!pat)
6484 return 0;
6485 emit_insn (pat);
6486 return target;
6487 }
6488
6489 /* Ignore align_hint, but still expand it's args in case they have
6490 side effects. */
6491 if (icode == CODE_FOR_spu_align_hint)
6492 return 0;
6493
6494 /* Handle the rest of the operands. */
6495 for (p = 1; i < n_operands; i++, p++)
6496 {
6497 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6498 mode = insn_data[d->icode].operand[i].mode;
6499 else
6500 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6501
6502 /* mode can be VOIDmode here for labels */
6503
6504 /* For specific intrinsics with an immediate operand, e.g.,
6505 si_ai(), we sometimes need to convert the scalar argument to a
6506 vector argument by splatting the scalar. */
6507 if (VECTOR_MODE_P (mode)
6508 && (GET_CODE (ops[i]) == CONST_INT
6509 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6510 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6511 {
6512 if (GET_CODE (ops[i]) == CONST_INT)
6513 ops[i] = spu_const (mode, INTVAL (ops[i]));
6514 else
6515 {
6516 rtx reg = gen_reg_rtx (mode);
6517 enum machine_mode imode = GET_MODE_INNER (mode);
6518 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6519 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6520 if (imode != GET_MODE (ops[i]))
6521 ops[i] = convert_to_mode (imode, ops[i],
6522 TYPE_UNSIGNED (spu_builtin_types
6523 [d->parm[i]]));
6524 emit_insn (gen_spu_splats (reg, ops[i]));
6525 ops[i] = reg;
6526 }
6527 }
6528
6529 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6530
6531 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6532 ops[i] = spu_force_reg (mode, ops[i]);
6533 }
6534
6535 switch (n_operands)
6536 {
6537 case 0:
6538 pat = GEN_FCN (icode) (0);
6539 break;
6540 case 1:
6541 pat = GEN_FCN (icode) (ops[0]);
6542 break;
6543 case 2:
6544 pat = GEN_FCN (icode) (ops[0], ops[1]);
6545 break;
6546 case 3:
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6548 break;
6549 case 4:
6550 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6551 break;
6552 case 5:
6553 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6554 break;
6555 case 6:
6556 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6557 break;
6558 default:
6559 abort ();
6560 }
6561
6562 if (!pat)
6563 abort ();
6564
6565 if (d->type == B_CALL || d->type == B_BISLED)
6566 emit_call_insn (pat);
6567 else if (d->type == B_JUMP)
6568 {
6569 emit_jump_insn (pat);
6570 emit_barrier ();
6571 }
6572 else
6573 emit_insn (pat);
6574
6575 return_type = spu_builtin_types[d->parm[0]];
6576 if (d->parm[0] != SPU_BTI_VOID
6577 && GET_MODE (target) != TYPE_MODE (return_type))
6578 {
6579 /* target is the return value. It should always be the mode of
6580 the builtin function prototype. */
6581 target = spu_force_reg (TYPE_MODE (return_type), target);
6582 }
6583
6584 return target;
6585 }
6586
6587 rtx
6588 spu_expand_builtin (tree exp,
6589 rtx target,
6590 rtx subtarget ATTRIBUTE_UNUSED,
6591 enum machine_mode mode ATTRIBUTE_UNUSED,
6592 int ignore ATTRIBUTE_UNUSED)
6593 {
6594 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6595 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6596 struct spu_builtin_description *d;
6597
6598 if (fcode < NUM_SPU_BUILTINS)
6599 {
6600 d = &spu_builtins[fcode];
6601
6602 return spu_expand_builtin_1 (d, exp, target);
6603 }
6604 abort ();
6605 }
6606
6607 /* Implement targetm.vectorize.builtin_mask_for_load. */
6608 static tree
6609 spu_builtin_mask_for_load (void)
6610 {
6611 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6612 }
6613
6614 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6615 static int
6616 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6617 tree vectype,
6618 int misalign ATTRIBUTE_UNUSED)
6619 {
6620 unsigned elements;
6621
6622 switch (type_of_cost)
6623 {
6624 case scalar_stmt:
6625 case vector_stmt:
6626 case vector_load:
6627 case vector_store:
6628 case vec_to_scalar:
6629 case scalar_to_vec:
6630 case cond_branch_not_taken:
6631 case vec_perm:
6632 case vec_promote_demote:
6633 return 1;
6634
6635 case scalar_store:
6636 return 10;
6637
6638 case scalar_load:
6639 /* Load + rotate. */
6640 return 2;
6641
6642 case unaligned_load:
6643 return 2;
6644
6645 case cond_branch_taken:
6646 return 6;
6647
6648 case vec_construct:
6649 elements = TYPE_VECTOR_SUBPARTS (vectype);
6650 return elements / 2 + 1;
6651
6652 default:
6653 gcc_unreachable ();
6654 }
6655 }
6656
6657 /* Implement targetm.vectorize.init_cost. */
6658
6659 static void *
6660 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6661 {
6662 unsigned *cost = XNEWVEC (unsigned, 3);
6663 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6664 return cost;
6665 }
6666
6667 /* Implement targetm.vectorize.add_stmt_cost. */
6668
6669 static unsigned
6670 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6671 struct _stmt_vec_info *stmt_info, int misalign,
6672 enum vect_cost_model_location where)
6673 {
6674 unsigned *cost = (unsigned *) data;
6675 unsigned retval = 0;
6676
6677 if (flag_vect_cost_model)
6678 {
6679 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6680 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6681
6682 /* Statements in an inner loop relative to the loop being
6683 vectorized are weighted more heavily. The value here is
6684 arbitrary and could potentially be improved with analysis. */
6685 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6686 count *= 50; /* FIXME. */
6687
6688 retval = (unsigned) (count * stmt_cost);
6689 cost[where] += retval;
6690 }
6691
6692 return retval;
6693 }
6694
6695 /* Implement targetm.vectorize.finish_cost. */
6696
6697 static void
6698 spu_finish_cost (void *data, unsigned *prologue_cost,
6699 unsigned *body_cost, unsigned *epilogue_cost)
6700 {
6701 unsigned *cost = (unsigned *) data;
6702 *prologue_cost = cost[vect_prologue];
6703 *body_cost = cost[vect_body];
6704 *epilogue_cost = cost[vect_epilogue];
6705 }
6706
6707 /* Implement targetm.vectorize.destroy_cost_data. */
6708
6709 static void
6710 spu_destroy_cost_data (void *data)
6711 {
6712 free (data);
6713 }
6714
6715 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6716 after applying N number of iterations. This routine does not determine
6717 how may iterations are required to reach desired alignment. */
6718
6719 static bool
6720 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6721 {
6722 if (is_packed)
6723 return false;
6724
6725 /* All other types are naturally aligned. */
6726 return true;
6727 }
6728
6729 /* Return the appropriate mode for a named address pointer. */
6730 static enum machine_mode
6731 spu_addr_space_pointer_mode (addr_space_t addrspace)
6732 {
6733 switch (addrspace)
6734 {
6735 case ADDR_SPACE_GENERIC:
6736 return ptr_mode;
6737 case ADDR_SPACE_EA:
6738 return EAmode;
6739 default:
6740 gcc_unreachable ();
6741 }
6742 }
6743
6744 /* Return the appropriate mode for a named address address. */
6745 static enum machine_mode
6746 spu_addr_space_address_mode (addr_space_t addrspace)
6747 {
6748 switch (addrspace)
6749 {
6750 case ADDR_SPACE_GENERIC:
6751 return Pmode;
6752 case ADDR_SPACE_EA:
6753 return EAmode;
6754 default:
6755 gcc_unreachable ();
6756 }
6757 }
6758
6759 /* Determine if one named address space is a subset of another. */
6760
6761 static bool
6762 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6763 {
6764 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6765 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6766
6767 if (subset == superset)
6768 return true;
6769
6770 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6771 being subsets but instead as disjoint address spaces. */
6772 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6773 return false;
6774
6775 else
6776 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6777 }
6778
6779 /* Convert from one address space to another. */
6780 static rtx
6781 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6782 {
6783 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6784 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6785
6786 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6787 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6788
6789 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6790 {
6791 rtx result, ls;
6792
6793 ls = gen_const_mem (DImode,
6794 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6795 set_mem_align (ls, 128);
6796
6797 result = gen_reg_rtx (Pmode);
6798 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6799 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6800 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6801 ls, const0_rtx, Pmode, 1);
6802
6803 emit_insn (gen_subsi3 (result, op, ls));
6804
6805 return result;
6806 }
6807
6808 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6809 {
6810 rtx result, ls;
6811
6812 ls = gen_const_mem (DImode,
6813 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6814 set_mem_align (ls, 128);
6815
6816 result = gen_reg_rtx (EAmode);
6817 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6818 op = force_reg (Pmode, op);
6819 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6820 ls, const0_rtx, EAmode, 1);
6821 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6822
6823 if (EAmode == SImode)
6824 emit_insn (gen_addsi3 (result, op, ls));
6825 else
6826 emit_insn (gen_adddi3 (result, op, ls));
6827
6828 return result;
6829 }
6830
6831 else
6832 gcc_unreachable ();
6833 }
6834
6835
6836 /* Count the total number of instructions in each pipe and return the
6837 maximum, which is used as the Minimum Iteration Interval (MII)
6838 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6839 -2 are instructions that can go in pipe0 or pipe1. */
6840 static int
6841 spu_sms_res_mii (struct ddg *g)
6842 {
6843 int i;
6844 unsigned t[4] = {0, 0, 0, 0};
6845
6846 for (i = 0; i < g->num_nodes; i++)
6847 {
6848 rtx_insn *insn = g->nodes[i].insn;
6849 int p = get_pipe (insn) + 2;
6850
6851 gcc_assert (p >= 0);
6852 gcc_assert (p < 4);
6853
6854 t[p]++;
6855 if (dump_file && INSN_P (insn))
6856 fprintf (dump_file, "i%d %s %d %d\n",
6857 INSN_UID (insn),
6858 insn_data[INSN_CODE(insn)].name,
6859 p, t[p]);
6860 }
6861 if (dump_file)
6862 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6863
6864 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6865 }
6866
6867
6868 void
6869 spu_init_expanders (void)
6870 {
6871 if (cfun)
6872 {
6873 rtx r0, r1;
6874 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6875 frame_pointer_needed is true. We don't know that until we're
6876 expanding the prologue. */
6877 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6878
6879 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6880 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6881 to be treated as aligned, so generate them here. */
6882 r0 = gen_reg_rtx (SImode);
6883 r1 = gen_reg_rtx (SImode);
6884 mark_reg_pointer (r0, 128);
6885 mark_reg_pointer (r1, 128);
6886 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6887 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6888 }
6889 }
6890
6891 static enum machine_mode
6892 spu_libgcc_cmp_return_mode (void)
6893 {
6894
6895 /* For SPU word mode is TI mode so it is better to use SImode
6896 for compare returns. */
6897 return SImode;
6898 }
6899
6900 static enum machine_mode
6901 spu_libgcc_shift_count_mode (void)
6902 {
6903 /* For SPU word mode is TI mode so it is better to use SImode
6904 for shift counts. */
6905 return SImode;
6906 }
6907
6908 /* Implement targetm.section_type_flags. */
6909 static unsigned int
6910 spu_section_type_flags (tree decl, const char *name, int reloc)
6911 {
6912 /* .toe needs to have type @nobits. */
6913 if (strcmp (name, ".toe") == 0)
6914 return SECTION_BSS;
6915 /* Don't load _ea into the current address space. */
6916 if (strcmp (name, "._ea") == 0)
6917 return SECTION_WRITE | SECTION_DEBUG;
6918 return default_section_type_flags (decl, name, reloc);
6919 }
6920
6921 /* Implement targetm.select_section. */
6922 static section *
6923 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6924 {
6925 /* Variables and constants defined in the __ea address space
6926 go into a special section named "._ea". */
6927 if (TREE_TYPE (decl) != error_mark_node
6928 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6929 {
6930 /* We might get called with string constants, but get_named_section
6931 doesn't like them as they are not DECLs. Also, we need to set
6932 flags in that case. */
6933 if (!DECL_P (decl))
6934 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6935
6936 return get_named_section (decl, "._ea", reloc);
6937 }
6938
6939 return default_elf_select_section (decl, reloc, align);
6940 }
6941
6942 /* Implement targetm.unique_section. */
6943 static void
6944 spu_unique_section (tree decl, int reloc)
6945 {
6946 /* We don't support unique section names in the __ea address
6947 space for now. */
6948 if (TREE_TYPE (decl) != error_mark_node
6949 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6950 return;
6951
6952 default_unique_section (decl, reloc);
6953 }
6954
6955 /* Generate a constant or register which contains 2^SCALE. We assume
6956 the result is valid for MODE. Currently, MODE must be V4SFmode and
6957 SCALE must be SImode. */
6958 rtx
6959 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6960 {
6961 gcc_assert (mode == V4SFmode);
6962 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6963 if (GET_CODE (scale) != CONST_INT)
6964 {
6965 /* unsigned int exp = (127 + scale) << 23;
6966 __vector float m = (__vector float) spu_splats (exp); */
6967 rtx reg = force_reg (SImode, scale);
6968 rtx exp = gen_reg_rtx (SImode);
6969 rtx mul = gen_reg_rtx (mode);
6970 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6971 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6972 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6973 return mul;
6974 }
6975 else
6976 {
6977 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6978 unsigned char arr[16];
6979 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6980 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6981 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6982 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6983 return array_to_constant (mode, arr);
6984 }
6985 }
6986
6987 /* After reload, just change the convert into a move instruction
6988 or a dead instruction. */
6989 void
6990 spu_split_convert (rtx ops[])
6991 {
6992 if (REGNO (ops[0]) == REGNO (ops[1]))
6993 emit_note (NOTE_INSN_DELETED);
6994 else
6995 {
6996 /* Use TImode always as this might help hard reg copyprop. */
6997 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6998 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6999 emit_insn (gen_move_insn (op0, op1));
7000 }
7001 }
7002
7003 void
7004 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7005 {
7006 fprintf (file, "# profile\n");
7007 fprintf (file, "brsl $75, _mcount\n");
7008 }
7009
7010 /* Implement targetm.ref_may_alias_errno. */
7011 static bool
7012 spu_ref_may_alias_errno (ao_ref *ref)
7013 {
7014 tree base = ao_ref_base (ref);
7015
7016 /* With SPU newlib, errno is defined as something like
7017 _impure_data._errno
7018 The default implementation of this target macro does not
7019 recognize such expressions, so special-code for it here. */
7020
7021 if (TREE_CODE (base) == VAR_DECL
7022 && !TREE_STATIC (base)
7023 && DECL_EXTERNAL (base)
7024 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7025 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7026 "_impure_data") == 0
7027 /* _errno is the first member of _impure_data. */
7028 && ref->offset == 0)
7029 return true;
7030
7031 return default_ref_may_alias_errno (ref);
7032 }
7033
7034 /* Output thunk to FILE that implements a C++ virtual function call (with
7035 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7036 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7037 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7038 relative to the resulting this pointer. */
7039
7040 static void
7041 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7042 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7043 tree function)
7044 {
7045 rtx op[8];
7046
7047 /* Make sure unwind info is emitted for the thunk if needed. */
7048 final_start_function (emit_barrier (), file, 1);
7049
7050 /* Operand 0 is the target function. */
7051 op[0] = XEXP (DECL_RTL (function), 0);
7052
7053 /* Operand 1 is the 'this' pointer. */
7054 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7055 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7056 else
7057 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7058
7059 /* Operands 2/3 are the low/high halfwords of delta. */
7060 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7061 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7062
7063 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7064 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7065 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7066
7067 /* Operands 6/7 are temporary registers. */
7068 op[6] = gen_rtx_REG (Pmode, 79);
7069 op[7] = gen_rtx_REG (Pmode, 78);
7070
7071 /* Add DELTA to this pointer. */
7072 if (delta)
7073 {
7074 if (delta >= -0x200 && delta < 0x200)
7075 output_asm_insn ("ai\t%1,%1,%2", op);
7076 else if (delta >= -0x8000 && delta < 0x8000)
7077 {
7078 output_asm_insn ("il\t%6,%2", op);
7079 output_asm_insn ("a\t%1,%1,%6", op);
7080 }
7081 else
7082 {
7083 output_asm_insn ("ilhu\t%6,%3", op);
7084 output_asm_insn ("iohl\t%6,%2", op);
7085 output_asm_insn ("a\t%1,%1,%6", op);
7086 }
7087 }
7088
7089 /* Perform vcall adjustment. */
7090 if (vcall_offset)
7091 {
7092 output_asm_insn ("lqd\t%7,0(%1)", op);
7093 output_asm_insn ("rotqby\t%7,%7,%1", op);
7094
7095 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7096 output_asm_insn ("ai\t%7,%7,%4", op);
7097 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7098 {
7099 output_asm_insn ("il\t%6,%4", op);
7100 output_asm_insn ("a\t%7,%7,%6", op);
7101 }
7102 else
7103 {
7104 output_asm_insn ("ilhu\t%6,%5", op);
7105 output_asm_insn ("iohl\t%6,%4", op);
7106 output_asm_insn ("a\t%7,%7,%6", op);
7107 }
7108
7109 output_asm_insn ("lqd\t%6,0(%7)", op);
7110 output_asm_insn ("rotqby\t%6,%6,%7", op);
7111 output_asm_insn ("a\t%1,%1,%6", op);
7112 }
7113
7114 /* Jump to target. */
7115 output_asm_insn ("br\t%0", op);
7116
7117 final_end_function ();
7118 }
7119
7120 /* Canonicalize a comparison from one we don't have to one we do have. */
7121 static void
7122 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7123 bool op0_preserve_value)
7124 {
7125 if (!op0_preserve_value
7126 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7127 {
7128 rtx tem = *op0;
7129 *op0 = *op1;
7130 *op1 = tem;
7131 *code = (int)swap_condition ((enum rtx_code)*code);
7132 }
7133 }
7134 \f
7135 /* Table of machine attributes. */
7136 static const struct attribute_spec spu_attribute_table[] =
7137 {
7138 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7139 affects_type_identity } */
7140 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7141 false },
7142 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7143 false },
7144 { NULL, 0, 0, false, false, false, NULL, false }
7145 };
7146
7147 /* TARGET overrides. */
7148
7149 #undef TARGET_ADDR_SPACE_POINTER_MODE
7150 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7151
7152 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7153 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7154
7155 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7156 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7157 spu_addr_space_legitimate_address_p
7158
7159 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7160 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7161
7162 #undef TARGET_ADDR_SPACE_SUBSET_P
7163 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7164
7165 #undef TARGET_ADDR_SPACE_CONVERT
7166 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7167
7168 #undef TARGET_INIT_BUILTINS
7169 #define TARGET_INIT_BUILTINS spu_init_builtins
7170 #undef TARGET_BUILTIN_DECL
7171 #define TARGET_BUILTIN_DECL spu_builtin_decl
7172
7173 #undef TARGET_EXPAND_BUILTIN
7174 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7175
7176 #undef TARGET_UNWIND_WORD_MODE
7177 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7178
7179 #undef TARGET_LEGITIMIZE_ADDRESS
7180 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7181
7182 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7183 and .quad for the debugger. When it is known that the assembler is fixed,
7184 these can be removed. */
7185 #undef TARGET_ASM_UNALIGNED_SI_OP
7186 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7187
7188 #undef TARGET_ASM_ALIGNED_DI_OP
7189 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7190
7191 /* The .8byte directive doesn't seem to work well for a 32 bit
7192 architecture. */
7193 #undef TARGET_ASM_UNALIGNED_DI_OP
7194 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7195
7196 #undef TARGET_RTX_COSTS
7197 #define TARGET_RTX_COSTS spu_rtx_costs
7198
7199 #undef TARGET_ADDRESS_COST
7200 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7201
7202 #undef TARGET_SCHED_ISSUE_RATE
7203 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7204
7205 #undef TARGET_SCHED_INIT_GLOBAL
7206 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7207
7208 #undef TARGET_SCHED_INIT
7209 #define TARGET_SCHED_INIT spu_sched_init
7210
7211 #undef TARGET_SCHED_VARIABLE_ISSUE
7212 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7213
7214 #undef TARGET_SCHED_REORDER
7215 #define TARGET_SCHED_REORDER spu_sched_reorder
7216
7217 #undef TARGET_SCHED_REORDER2
7218 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7219
7220 #undef TARGET_SCHED_ADJUST_COST
7221 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7222
7223 #undef TARGET_ATTRIBUTE_TABLE
7224 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7225
7226 #undef TARGET_ASM_INTEGER
7227 #define TARGET_ASM_INTEGER spu_assemble_integer
7228
7229 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7230 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7231
7232 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7233 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7234
7235 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7236 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7237
7238 #undef TARGET_ASM_GLOBALIZE_LABEL
7239 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7240
7241 #undef TARGET_PASS_BY_REFERENCE
7242 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7243
7244 #undef TARGET_FUNCTION_ARG
7245 #define TARGET_FUNCTION_ARG spu_function_arg
7246
7247 #undef TARGET_FUNCTION_ARG_ADVANCE
7248 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7249
7250 #undef TARGET_MUST_PASS_IN_STACK
7251 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7252
7253 #undef TARGET_BUILD_BUILTIN_VA_LIST
7254 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7255
7256 #undef TARGET_EXPAND_BUILTIN_VA_START
7257 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7258
7259 #undef TARGET_SETUP_INCOMING_VARARGS
7260 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7261
7262 #undef TARGET_MACHINE_DEPENDENT_REORG
7263 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7264
7265 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7266 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7267
7268 #undef TARGET_INIT_LIBFUNCS
7269 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7270
7271 #undef TARGET_RETURN_IN_MEMORY
7272 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7273
7274 #undef TARGET_ENCODE_SECTION_INFO
7275 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7276
7277 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7278 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7279
7280 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7281 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7282
7283 #undef TARGET_VECTORIZE_INIT_COST
7284 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7285
7286 #undef TARGET_VECTORIZE_ADD_STMT_COST
7287 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7288
7289 #undef TARGET_VECTORIZE_FINISH_COST
7290 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7291
7292 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7293 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7294
7295 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7296 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7297
7298 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7299 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7300
7301 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7302 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7303
7304 #undef TARGET_SCHED_SMS_RES_MII
7305 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7306
7307 #undef TARGET_SECTION_TYPE_FLAGS
7308 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7309
7310 #undef TARGET_ASM_SELECT_SECTION
7311 #define TARGET_ASM_SELECT_SECTION spu_select_section
7312
7313 #undef TARGET_ASM_UNIQUE_SECTION
7314 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7315
7316 #undef TARGET_LEGITIMATE_ADDRESS_P
7317 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7318
7319 #undef TARGET_LEGITIMATE_CONSTANT_P
7320 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7321
7322 #undef TARGET_TRAMPOLINE_INIT
7323 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7324
7325 #undef TARGET_WARN_FUNC_RETURN
7326 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7327
7328 #undef TARGET_OPTION_OVERRIDE
7329 #define TARGET_OPTION_OVERRIDE spu_option_override
7330
7331 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7332 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7333
7334 #undef TARGET_REF_MAY_ALIAS_ERRNO
7335 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7336
7337 #undef TARGET_ASM_OUTPUT_MI_THUNK
7338 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7339 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7340 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7341
7342 /* Variable tracking should be run after all optimizations which
7343 change order of insns. It also needs a valid CFG. */
7344 #undef TARGET_DELAY_VARTRACK
7345 #define TARGET_DELAY_VARTRACK true
7346
7347 #undef TARGET_CANONICALIZE_COMPARISON
7348 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7349
7350 #undef TARGET_CAN_USE_DOLOOP_P
7351 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7352
7353 struct gcc_target targetm = TARGET_INITIALIZER;
7354
7355 #include "gt-spu.h"