]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
gimple.h: Remove all includes.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2013 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "tree.h"
31 #include "stringpool.h"
32 #include "stor-layout.h"
33 #include "calls.h"
34 #include "varasm.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "except.h"
38 #include "function.h"
39 #include "output.h"
40 #include "basic-block.h"
41 #include "diagnostic-core.h"
42 #include "ggc.h"
43 #include "hashtab.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "reload.h"
49 #include "sched-int.h"
50 #include "params.h"
51 #include "machmode.h"
52 #include "pointer-set.h"
53 #include "hash-table.h"
54 #include "tree-ssa-alias.h"
55 #include "internal-fn.h"
56 #include "gimple-fold.h"
57 #include "tree-eh.h"
58 #include "gimple-expr.h"
59 #include "is-a.h"
60 #include "gimple.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "ddg.h"
64 #include "sbitmap.h"
65 #include "timevar.h"
66 #include "df.h"
67 #include "dumpfile.h"
68 #include "cfgloop.h"
69
70 /* Builtin types, data and prototypes. */
71
72 enum spu_builtin_type_index
73 {
74 SPU_BTI_END_OF_PARAMS,
75
76 /* We create new type nodes for these. */
77 SPU_BTI_V16QI,
78 SPU_BTI_V8HI,
79 SPU_BTI_V4SI,
80 SPU_BTI_V2DI,
81 SPU_BTI_V4SF,
82 SPU_BTI_V2DF,
83 SPU_BTI_UV16QI,
84 SPU_BTI_UV8HI,
85 SPU_BTI_UV4SI,
86 SPU_BTI_UV2DI,
87
88 /* A 16-byte type. (Implemented with V16QI_type_node) */
89 SPU_BTI_QUADWORD,
90
91 /* These all correspond to intSI_type_node */
92 SPU_BTI_7,
93 SPU_BTI_S7,
94 SPU_BTI_U7,
95 SPU_BTI_S10,
96 SPU_BTI_S10_4,
97 SPU_BTI_U14,
98 SPU_BTI_16,
99 SPU_BTI_S16,
100 SPU_BTI_S16_2,
101 SPU_BTI_U16,
102 SPU_BTI_U16_2,
103 SPU_BTI_U18,
104
105 /* These correspond to the standard types */
106 SPU_BTI_INTQI,
107 SPU_BTI_INTHI,
108 SPU_BTI_INTSI,
109 SPU_BTI_INTDI,
110
111 SPU_BTI_UINTQI,
112 SPU_BTI_UINTHI,
113 SPU_BTI_UINTSI,
114 SPU_BTI_UINTDI,
115
116 SPU_BTI_FLOAT,
117 SPU_BTI_DOUBLE,
118
119 SPU_BTI_VOID,
120 SPU_BTI_PTR,
121
122 SPU_BTI_MAX
123 };
124
125 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
126 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
127 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
128 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
129 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
130 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
131 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
132 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
133 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
134 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
135
136 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
137
138 struct spu_builtin_range
139 {
140 int low, high;
141 };
142
143 static struct spu_builtin_range spu_builtin_range[] = {
144 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
145 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
146 {0ll, 0x7fll}, /* SPU_BTI_U7 */
147 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
148 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
149 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
150 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
151 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
152 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
153 {0ll, 0xffffll}, /* SPU_BTI_U16 */
154 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
155 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
156 };
157
158 \f
159 /* Target specific attribute specifications. */
160 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
161
162 /* Prototypes and external defs. */
163 static int get_pipe (rtx insn);
164 static int spu_naked_function_p (tree func);
165 static int mem_is_padded_component_ref (rtx x);
166 static void fix_range (const char *);
167 static rtx spu_expand_load (rtx, rtx, rtx, int);
168
169 /* Which instruction set architecture to use. */
170 int spu_arch;
171 /* Which cpu are we tuning for. */
172 int spu_tune;
173
174 /* The hardware requires 8 insns between a hint and the branch it
175 effects. This variable describes how many rtl instructions the
176 compiler needs to see before inserting a hint, and then the compiler
177 will insert enough nops to make it at least 8 insns. The default is
178 for the compiler to allow up to 2 nops be emitted. The nops are
179 inserted in pairs, so we round down. */
180 int spu_hint_dist = (8*4) - (2*4);
181
182 enum spu_immediate {
183 SPU_NONE,
184 SPU_IL,
185 SPU_ILA,
186 SPU_ILH,
187 SPU_ILHU,
188 SPU_ORI,
189 SPU_ORHI,
190 SPU_ORBI,
191 SPU_IOHL
192 };
193 enum immediate_class
194 {
195 IC_POOL, /* constant pool */
196 IC_IL1, /* one il* instruction */
197 IC_IL2, /* both ilhu and iohl instructions */
198 IC_IL1s, /* one il* instruction */
199 IC_IL2s, /* both ilhu and iohl instructions */
200 IC_FSMBI, /* the fsmbi instruction */
201 IC_CPAT, /* one of the c*d instructions */
202 IC_FSMBI2 /* fsmbi plus 1 other instruction */
203 };
204
205 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
206 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
207 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
208 static enum immediate_class classify_immediate (rtx op,
209 enum machine_mode mode);
210
211 /* Pointer mode for __ea references. */
212 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
213
214 \f
215 /* Define the structure for the machine field in struct function. */
216 struct GTY(()) machine_function
217 {
218 /* Register to use for PIC accesses. */
219 rtx pic_reg;
220 };
221
222 /* How to allocate a 'struct machine_function'. */
223 static struct machine_function *
224 spu_init_machine_status (void)
225 {
226 return ggc_alloc_cleared_machine_function ();
227 }
228
229 /* Implement TARGET_OPTION_OVERRIDE. */
230 static void
231 spu_option_override (void)
232 {
233 /* Set up function hooks. */
234 init_machine_status = spu_init_machine_status;
235
236 /* Small loops will be unpeeled at -O3. For SPU it is more important
237 to keep code small by default. */
238 if (!flag_unroll_loops && !flag_peel_loops)
239 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
240 global_options.x_param_values,
241 global_options_set.x_param_values);
242
243 flag_omit_frame_pointer = 1;
244
245 /* Functions must be 8 byte aligned so we correctly handle dual issue */
246 if (align_functions < 8)
247 align_functions = 8;
248
249 spu_hint_dist = 8*4 - spu_max_nops*4;
250 if (spu_hint_dist < 0)
251 spu_hint_dist = 0;
252
253 if (spu_fixed_range_string)
254 fix_range (spu_fixed_range_string);
255
256 /* Determine processor architectural level. */
257 if (spu_arch_string)
258 {
259 if (strcmp (&spu_arch_string[0], "cell") == 0)
260 spu_arch = PROCESSOR_CELL;
261 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
262 spu_arch = PROCESSOR_CELLEDP;
263 else
264 error ("bad value (%s) for -march= switch", spu_arch_string);
265 }
266
267 /* Determine processor to tune for. */
268 if (spu_tune_string)
269 {
270 if (strcmp (&spu_tune_string[0], "cell") == 0)
271 spu_tune = PROCESSOR_CELL;
272 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
273 spu_tune = PROCESSOR_CELLEDP;
274 else
275 error ("bad value (%s) for -mtune= switch", spu_tune_string);
276 }
277
278 /* Change defaults according to the processor architecture. */
279 if (spu_arch == PROCESSOR_CELLEDP)
280 {
281 /* If no command line option has been otherwise specified, change
282 the default to -mno-safe-hints on celledp -- only the original
283 Cell/B.E. processors require this workaround. */
284 if (!(target_flags_explicit & MASK_SAFE_HINTS))
285 target_flags &= ~MASK_SAFE_HINTS;
286 }
287
288 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
289 }
290 \f
291 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
292 struct attribute_spec.handler. */
293
294 /* True if MODE is valid for the target. By "valid", we mean able to
295 be manipulated in non-trivial ways. In particular, this means all
296 the arithmetic is supported. */
297 static bool
298 spu_scalar_mode_supported_p (enum machine_mode mode)
299 {
300 switch (mode)
301 {
302 case QImode:
303 case HImode:
304 case SImode:
305 case SFmode:
306 case DImode:
307 case TImode:
308 case DFmode:
309 return true;
310
311 default:
312 return false;
313 }
314 }
315
316 /* Similarly for vector modes. "Supported" here is less strict. At
317 least some operations are supported; need to check optabs or builtins
318 for further details. */
319 static bool
320 spu_vector_mode_supported_p (enum machine_mode mode)
321 {
322 switch (mode)
323 {
324 case V16QImode:
325 case V8HImode:
326 case V4SImode:
327 case V2DImode:
328 case V4SFmode:
329 case V2DFmode:
330 return true;
331
332 default:
333 return false;
334 }
335 }
336
337 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
338 least significant bytes of the outer mode. This function returns
339 TRUE for the SUBREG's where this is correct. */
340 int
341 valid_subreg (rtx op)
342 {
343 enum machine_mode om = GET_MODE (op);
344 enum machine_mode im = GET_MODE (SUBREG_REG (op));
345 return om != VOIDmode && im != VOIDmode
346 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
347 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
348 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
349 }
350
351 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
352 and adjust the start offset. */
353 static rtx
354 adjust_operand (rtx op, HOST_WIDE_INT * start)
355 {
356 enum machine_mode mode;
357 int op_size;
358 /* Strip any paradoxical SUBREG. */
359 if (GET_CODE (op) == SUBREG
360 && (GET_MODE_BITSIZE (GET_MODE (op))
361 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
362 {
363 if (start)
364 *start -=
365 GET_MODE_BITSIZE (GET_MODE (op)) -
366 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
367 op = SUBREG_REG (op);
368 }
369 /* If it is smaller than SI, assure a SUBREG */
370 op_size = GET_MODE_BITSIZE (GET_MODE (op));
371 if (op_size < 32)
372 {
373 if (start)
374 *start += 32 - op_size;
375 op_size = 32;
376 }
377 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
378 mode = mode_for_size (op_size, MODE_INT, 0);
379 if (mode != GET_MODE (op))
380 op = gen_rtx_SUBREG (mode, op, 0);
381 return op;
382 }
383
384 void
385 spu_expand_extv (rtx ops[], int unsignedp)
386 {
387 rtx dst = ops[0], src = ops[1];
388 HOST_WIDE_INT width = INTVAL (ops[2]);
389 HOST_WIDE_INT start = INTVAL (ops[3]);
390 HOST_WIDE_INT align_mask;
391 rtx s0, s1, mask, r0;
392
393 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
394
395 if (MEM_P (src))
396 {
397 /* First, determine if we need 1 TImode load or 2. We need only 1
398 if the bits being extracted do not cross the alignment boundary
399 as determined by the MEM and its address. */
400
401 align_mask = -MEM_ALIGN (src);
402 if ((start & align_mask) == ((start + width - 1) & align_mask))
403 {
404 /* Alignment is sufficient for 1 load. */
405 s0 = gen_reg_rtx (TImode);
406 r0 = spu_expand_load (s0, 0, src, start / 8);
407 start &= 7;
408 if (r0)
409 emit_insn (gen_rotqby_ti (s0, s0, r0));
410 }
411 else
412 {
413 /* Need 2 loads. */
414 s0 = gen_reg_rtx (TImode);
415 s1 = gen_reg_rtx (TImode);
416 r0 = spu_expand_load (s0, s1, src, start / 8);
417 start &= 7;
418
419 gcc_assert (start + width <= 128);
420 if (r0)
421 {
422 rtx r1 = gen_reg_rtx (SImode);
423 mask = gen_reg_rtx (TImode);
424 emit_move_insn (mask, GEN_INT (-1));
425 emit_insn (gen_rotqby_ti (s0, s0, r0));
426 emit_insn (gen_rotqby_ti (s1, s1, r0));
427 if (GET_CODE (r0) == CONST_INT)
428 r1 = GEN_INT (INTVAL (r0) & 15);
429 else
430 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
431 emit_insn (gen_shlqby_ti (mask, mask, r1));
432 emit_insn (gen_selb (s0, s1, s0, mask));
433 }
434 }
435
436 }
437 else if (GET_CODE (src) == SUBREG)
438 {
439 rtx r = SUBREG_REG (src);
440 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
441 s0 = gen_reg_rtx (TImode);
442 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
443 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
444 else
445 emit_move_insn (s0, src);
446 }
447 else
448 {
449 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
450 s0 = gen_reg_rtx (TImode);
451 emit_move_insn (s0, src);
452 }
453
454 /* Now s0 is TImode and contains the bits to extract at start. */
455
456 if (start)
457 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
458
459 if (128 - width)
460 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
461
462 emit_move_insn (dst, s0);
463 }
464
465 void
466 spu_expand_insv (rtx ops[])
467 {
468 HOST_WIDE_INT width = INTVAL (ops[1]);
469 HOST_WIDE_INT start = INTVAL (ops[2]);
470 HOST_WIDE_INT maskbits;
471 enum machine_mode dst_mode;
472 rtx dst = ops[0], src = ops[3];
473 int dst_size;
474 rtx mask;
475 rtx shift_reg;
476 int shift;
477
478
479 if (GET_CODE (ops[0]) == MEM)
480 dst = gen_reg_rtx (TImode);
481 else
482 dst = adjust_operand (dst, &start);
483 dst_mode = GET_MODE (dst);
484 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
485
486 if (CONSTANT_P (src))
487 {
488 enum machine_mode m =
489 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
490 src = force_reg (m, convert_to_mode (m, src, 0));
491 }
492 src = adjust_operand (src, 0);
493
494 mask = gen_reg_rtx (dst_mode);
495 shift_reg = gen_reg_rtx (dst_mode);
496 shift = dst_size - start - width;
497
498 /* It's not safe to use subreg here because the compiler assumes
499 that the SUBREG_REG is right justified in the SUBREG. */
500 convert_move (shift_reg, src, 1);
501
502 if (shift > 0)
503 {
504 switch (dst_mode)
505 {
506 case SImode:
507 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
508 break;
509 case DImode:
510 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
511 break;
512 case TImode:
513 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
514 break;
515 default:
516 abort ();
517 }
518 }
519 else if (shift < 0)
520 abort ();
521
522 switch (dst_size)
523 {
524 case 32:
525 maskbits = (-1ll << (32 - width - start));
526 if (start)
527 maskbits += (1ll << (32 - start));
528 emit_move_insn (mask, GEN_INT (maskbits));
529 break;
530 case 64:
531 maskbits = (-1ll << (64 - width - start));
532 if (start)
533 maskbits += (1ll << (64 - start));
534 emit_move_insn (mask, GEN_INT (maskbits));
535 break;
536 case 128:
537 {
538 unsigned char arr[16];
539 int i = start / 8;
540 memset (arr, 0, sizeof (arr));
541 arr[i] = 0xff >> (start & 7);
542 for (i++; i <= (start + width - 1) / 8; i++)
543 arr[i] = 0xff;
544 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
545 emit_move_insn (mask, array_to_constant (TImode, arr));
546 }
547 break;
548 default:
549 abort ();
550 }
551 if (GET_CODE (ops[0]) == MEM)
552 {
553 rtx low = gen_reg_rtx (SImode);
554 rtx rotl = gen_reg_rtx (SImode);
555 rtx mask0 = gen_reg_rtx (TImode);
556 rtx addr;
557 rtx addr0;
558 rtx addr1;
559 rtx mem;
560
561 addr = force_reg (Pmode, XEXP (ops[0], 0));
562 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
563 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
564 emit_insn (gen_negsi2 (rotl, low));
565 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
566 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
567 mem = change_address (ops[0], TImode, addr0);
568 set_mem_alias_set (mem, 0);
569 emit_move_insn (dst, mem);
570 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
571 if (start + width > MEM_ALIGN (ops[0]))
572 {
573 rtx shl = gen_reg_rtx (SImode);
574 rtx mask1 = gen_reg_rtx (TImode);
575 rtx dst1 = gen_reg_rtx (TImode);
576 rtx mem1;
577 addr1 = plus_constant (Pmode, addr, 16);
578 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
579 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
580 emit_insn (gen_shlqby_ti (mask1, mask, shl));
581 mem1 = change_address (ops[0], TImode, addr1);
582 set_mem_alias_set (mem1, 0);
583 emit_move_insn (dst1, mem1);
584 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
585 emit_move_insn (mem1, dst1);
586 }
587 emit_move_insn (mem, dst);
588 }
589 else
590 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
591 }
592
593
594 int
595 spu_expand_block_move (rtx ops[])
596 {
597 HOST_WIDE_INT bytes, align, offset;
598 rtx src, dst, sreg, dreg, target;
599 int i;
600 if (GET_CODE (ops[2]) != CONST_INT
601 || GET_CODE (ops[3]) != CONST_INT
602 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
603 return 0;
604
605 bytes = INTVAL (ops[2]);
606 align = INTVAL (ops[3]);
607
608 if (bytes <= 0)
609 return 1;
610
611 dst = ops[0];
612 src = ops[1];
613
614 if (align == 16)
615 {
616 for (offset = 0; offset + 16 <= bytes; offset += 16)
617 {
618 dst = adjust_address (ops[0], V16QImode, offset);
619 src = adjust_address (ops[1], V16QImode, offset);
620 emit_move_insn (dst, src);
621 }
622 if (offset < bytes)
623 {
624 rtx mask;
625 unsigned char arr[16] = { 0 };
626 for (i = 0; i < bytes - offset; i++)
627 arr[i] = 0xff;
628 dst = adjust_address (ops[0], V16QImode, offset);
629 src = adjust_address (ops[1], V16QImode, offset);
630 mask = gen_reg_rtx (V16QImode);
631 sreg = gen_reg_rtx (V16QImode);
632 dreg = gen_reg_rtx (V16QImode);
633 target = gen_reg_rtx (V16QImode);
634 emit_move_insn (mask, array_to_constant (V16QImode, arr));
635 emit_move_insn (dreg, dst);
636 emit_move_insn (sreg, src);
637 emit_insn (gen_selb (target, dreg, sreg, mask));
638 emit_move_insn (dst, target);
639 }
640 return 1;
641 }
642 return 0;
643 }
644
645 enum spu_comp_code
646 { SPU_EQ, SPU_GT, SPU_GTU };
647
648 int spu_comp_icode[12][3] = {
649 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
650 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
651 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
652 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
653 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
654 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
655 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
656 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
657 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
658 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
659 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
660 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
661 };
662
663 /* Generate a compare for CODE. Return a brand-new rtx that represents
664 the result of the compare. GCC can figure this out too if we don't
665 provide all variations of compares, but GCC always wants to use
666 WORD_MODE, we can generate better code in most cases if we do it
667 ourselves. */
668 void
669 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
670 {
671 int reverse_compare = 0;
672 int reverse_test = 0;
673 rtx compare_result, eq_result;
674 rtx comp_rtx, eq_rtx;
675 enum machine_mode comp_mode;
676 enum machine_mode op_mode;
677 enum spu_comp_code scode, eq_code;
678 enum insn_code ior_code;
679 enum rtx_code code = GET_CODE (cmp);
680 rtx op0 = XEXP (cmp, 0);
681 rtx op1 = XEXP (cmp, 1);
682 int index;
683 int eq_test = 0;
684
685 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
686 and so on, to keep the constant in operand 1. */
687 if (GET_CODE (op1) == CONST_INT)
688 {
689 HOST_WIDE_INT val = INTVAL (op1) - 1;
690 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
691 switch (code)
692 {
693 case GE:
694 op1 = GEN_INT (val);
695 code = GT;
696 break;
697 case LT:
698 op1 = GEN_INT (val);
699 code = LE;
700 break;
701 case GEU:
702 op1 = GEN_INT (val);
703 code = GTU;
704 break;
705 case LTU:
706 op1 = GEN_INT (val);
707 code = LEU;
708 break;
709 default:
710 break;
711 }
712 }
713
714 /* However, if we generate an integer result, performing a reverse test
715 would require an extra negation, so avoid that where possible. */
716 if (GET_CODE (op1) == CONST_INT && is_set == 1)
717 {
718 HOST_WIDE_INT val = INTVAL (op1) + 1;
719 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
720 switch (code)
721 {
722 case LE:
723 op1 = GEN_INT (val);
724 code = LT;
725 break;
726 case LEU:
727 op1 = GEN_INT (val);
728 code = LTU;
729 break;
730 default:
731 break;
732 }
733 }
734
735 comp_mode = SImode;
736 op_mode = GET_MODE (op0);
737
738 switch (code)
739 {
740 case GE:
741 scode = SPU_GT;
742 if (HONOR_NANS (op_mode))
743 {
744 reverse_compare = 0;
745 reverse_test = 0;
746 eq_test = 1;
747 eq_code = SPU_EQ;
748 }
749 else
750 {
751 reverse_compare = 1;
752 reverse_test = 1;
753 }
754 break;
755 case LE:
756 scode = SPU_GT;
757 if (HONOR_NANS (op_mode))
758 {
759 reverse_compare = 1;
760 reverse_test = 0;
761 eq_test = 1;
762 eq_code = SPU_EQ;
763 }
764 else
765 {
766 reverse_compare = 0;
767 reverse_test = 1;
768 }
769 break;
770 case LT:
771 reverse_compare = 1;
772 reverse_test = 0;
773 scode = SPU_GT;
774 break;
775 case GEU:
776 reverse_compare = 1;
777 reverse_test = 1;
778 scode = SPU_GTU;
779 break;
780 case LEU:
781 reverse_compare = 0;
782 reverse_test = 1;
783 scode = SPU_GTU;
784 break;
785 case LTU:
786 reverse_compare = 1;
787 reverse_test = 0;
788 scode = SPU_GTU;
789 break;
790 case NE:
791 reverse_compare = 0;
792 reverse_test = 1;
793 scode = SPU_EQ;
794 break;
795
796 case EQ:
797 scode = SPU_EQ;
798 break;
799 case GT:
800 scode = SPU_GT;
801 break;
802 case GTU:
803 scode = SPU_GTU;
804 break;
805 default:
806 scode = SPU_EQ;
807 break;
808 }
809
810 switch (op_mode)
811 {
812 case QImode:
813 index = 0;
814 comp_mode = QImode;
815 break;
816 case HImode:
817 index = 1;
818 comp_mode = HImode;
819 break;
820 case SImode:
821 index = 2;
822 break;
823 case DImode:
824 index = 3;
825 break;
826 case TImode:
827 index = 4;
828 break;
829 case SFmode:
830 index = 5;
831 break;
832 case DFmode:
833 index = 6;
834 break;
835 case V16QImode:
836 index = 7;
837 comp_mode = op_mode;
838 break;
839 case V8HImode:
840 index = 8;
841 comp_mode = op_mode;
842 break;
843 case V4SImode:
844 index = 9;
845 comp_mode = op_mode;
846 break;
847 case V4SFmode:
848 index = 10;
849 comp_mode = V4SImode;
850 break;
851 case V2DFmode:
852 index = 11;
853 comp_mode = V2DImode;
854 break;
855 case V2DImode:
856 default:
857 abort ();
858 }
859
860 if (GET_MODE (op1) == DFmode
861 && (scode != SPU_GT && scode != SPU_EQ))
862 abort ();
863
864 if (is_set == 0 && op1 == const0_rtx
865 && (GET_MODE (op0) == SImode
866 || GET_MODE (op0) == HImode
867 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
868 {
869 /* Don't need to set a register with the result when we are
870 comparing against zero and branching. */
871 reverse_test = !reverse_test;
872 compare_result = op0;
873 }
874 else
875 {
876 compare_result = gen_reg_rtx (comp_mode);
877
878 if (reverse_compare)
879 {
880 rtx t = op1;
881 op1 = op0;
882 op0 = t;
883 }
884
885 if (spu_comp_icode[index][scode] == 0)
886 abort ();
887
888 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
889 (op0, op_mode))
890 op0 = force_reg (op_mode, op0);
891 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
892 (op1, op_mode))
893 op1 = force_reg (op_mode, op1);
894 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
895 op0, op1);
896 if (comp_rtx == 0)
897 abort ();
898 emit_insn (comp_rtx);
899
900 if (eq_test)
901 {
902 eq_result = gen_reg_rtx (comp_mode);
903 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
904 op0, op1);
905 if (eq_rtx == 0)
906 abort ();
907 emit_insn (eq_rtx);
908 ior_code = optab_handler (ior_optab, comp_mode);
909 gcc_assert (ior_code != CODE_FOR_nothing);
910 emit_insn (GEN_FCN (ior_code)
911 (compare_result, compare_result, eq_result));
912 }
913 }
914
915 if (is_set == 0)
916 {
917 rtx bcomp;
918 rtx loc_ref;
919
920 /* We don't have branch on QI compare insns, so we convert the
921 QI compare result to a HI result. */
922 if (comp_mode == QImode)
923 {
924 rtx old_res = compare_result;
925 compare_result = gen_reg_rtx (HImode);
926 comp_mode = HImode;
927 emit_insn (gen_extendqihi2 (compare_result, old_res));
928 }
929
930 if (reverse_test)
931 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
932 else
933 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
934
935 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
936 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
937 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
938 loc_ref, pc_rtx)));
939 }
940 else if (is_set == 2)
941 {
942 rtx target = operands[0];
943 int compare_size = GET_MODE_BITSIZE (comp_mode);
944 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
945 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
946 rtx select_mask;
947 rtx op_t = operands[2];
948 rtx op_f = operands[3];
949
950 /* The result of the comparison can be SI, HI or QI mode. Create a
951 mask based on that result. */
952 if (target_size > compare_size)
953 {
954 select_mask = gen_reg_rtx (mode);
955 emit_insn (gen_extend_compare (select_mask, compare_result));
956 }
957 else if (target_size < compare_size)
958 select_mask =
959 gen_rtx_SUBREG (mode, compare_result,
960 (compare_size - target_size) / BITS_PER_UNIT);
961 else if (comp_mode != mode)
962 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
963 else
964 select_mask = compare_result;
965
966 if (GET_MODE (target) != GET_MODE (op_t)
967 || GET_MODE (target) != GET_MODE (op_f))
968 abort ();
969
970 if (reverse_test)
971 emit_insn (gen_selb (target, op_t, op_f, select_mask));
972 else
973 emit_insn (gen_selb (target, op_f, op_t, select_mask));
974 }
975 else
976 {
977 rtx target = operands[0];
978 if (reverse_test)
979 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
980 gen_rtx_NOT (comp_mode, compare_result)));
981 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
982 emit_insn (gen_extendhisi2 (target, compare_result));
983 else if (GET_MODE (target) == SImode
984 && GET_MODE (compare_result) == QImode)
985 emit_insn (gen_extend_compare (target, compare_result));
986 else
987 emit_move_insn (target, compare_result);
988 }
989 }
990
991 HOST_WIDE_INT
992 const_double_to_hwint (rtx x)
993 {
994 HOST_WIDE_INT val;
995 REAL_VALUE_TYPE rv;
996 if (GET_MODE (x) == SFmode)
997 {
998 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
999 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1000 }
1001 else if (GET_MODE (x) == DFmode)
1002 {
1003 long l[2];
1004 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1005 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1006 val = l[0];
1007 val = (val << 32) | (l[1] & 0xffffffff);
1008 }
1009 else
1010 abort ();
1011 return val;
1012 }
1013
1014 rtx
1015 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1016 {
1017 long tv[2];
1018 REAL_VALUE_TYPE rv;
1019 gcc_assert (mode == SFmode || mode == DFmode);
1020
1021 if (mode == SFmode)
1022 tv[0] = (v << 32) >> 32;
1023 else if (mode == DFmode)
1024 {
1025 tv[1] = (v << 32) >> 32;
1026 tv[0] = v >> 32;
1027 }
1028 real_from_target (&rv, tv, mode);
1029 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1030 }
1031
1032 void
1033 print_operand_address (FILE * file, register rtx addr)
1034 {
1035 rtx reg;
1036 rtx offset;
1037
1038 if (GET_CODE (addr) == AND
1039 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1040 && INTVAL (XEXP (addr, 1)) == -16)
1041 addr = XEXP (addr, 0);
1042
1043 switch (GET_CODE (addr))
1044 {
1045 case REG:
1046 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1047 break;
1048
1049 case PLUS:
1050 reg = XEXP (addr, 0);
1051 offset = XEXP (addr, 1);
1052 if (GET_CODE (offset) == REG)
1053 {
1054 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1055 reg_names[REGNO (offset)]);
1056 }
1057 else if (GET_CODE (offset) == CONST_INT)
1058 {
1059 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1060 INTVAL (offset), reg_names[REGNO (reg)]);
1061 }
1062 else
1063 abort ();
1064 break;
1065
1066 case CONST:
1067 case LABEL_REF:
1068 case SYMBOL_REF:
1069 case CONST_INT:
1070 output_addr_const (file, addr);
1071 break;
1072
1073 default:
1074 debug_rtx (addr);
1075 abort ();
1076 }
1077 }
1078
1079 void
1080 print_operand (FILE * file, rtx x, int code)
1081 {
1082 enum machine_mode mode = GET_MODE (x);
1083 HOST_WIDE_INT val;
1084 unsigned char arr[16];
1085 int xcode = GET_CODE (x);
1086 int i, info;
1087 if (GET_MODE (x) == VOIDmode)
1088 switch (code)
1089 {
1090 case 'L': /* 128 bits, signed */
1091 case 'm': /* 128 bits, signed */
1092 case 'T': /* 128 bits, signed */
1093 case 't': /* 128 bits, signed */
1094 mode = TImode;
1095 break;
1096 case 'K': /* 64 bits, signed */
1097 case 'k': /* 64 bits, signed */
1098 case 'D': /* 64 bits, signed */
1099 case 'd': /* 64 bits, signed */
1100 mode = DImode;
1101 break;
1102 case 'J': /* 32 bits, signed */
1103 case 'j': /* 32 bits, signed */
1104 case 's': /* 32 bits, signed */
1105 case 'S': /* 32 bits, signed */
1106 mode = SImode;
1107 break;
1108 }
1109 switch (code)
1110 {
1111
1112 case 'j': /* 32 bits, signed */
1113 case 'k': /* 64 bits, signed */
1114 case 'm': /* 128 bits, signed */
1115 if (xcode == CONST_INT
1116 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1117 {
1118 gcc_assert (logical_immediate_p (x, mode));
1119 constant_to_array (mode, x, arr);
1120 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1121 val = trunc_int_for_mode (val, SImode);
1122 switch (which_logical_immediate (val))
1123 {
1124 case SPU_ORI:
1125 break;
1126 case SPU_ORHI:
1127 fprintf (file, "h");
1128 break;
1129 case SPU_ORBI:
1130 fprintf (file, "b");
1131 break;
1132 default:
1133 gcc_unreachable();
1134 }
1135 }
1136 else
1137 gcc_unreachable();
1138 return;
1139
1140 case 'J': /* 32 bits, signed */
1141 case 'K': /* 64 bits, signed */
1142 case 'L': /* 128 bits, signed */
1143 if (xcode == CONST_INT
1144 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1145 {
1146 gcc_assert (logical_immediate_p (x, mode)
1147 || iohl_immediate_p (x, mode));
1148 constant_to_array (mode, x, arr);
1149 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1150 val = trunc_int_for_mode (val, SImode);
1151 switch (which_logical_immediate (val))
1152 {
1153 case SPU_ORI:
1154 case SPU_IOHL:
1155 break;
1156 case SPU_ORHI:
1157 val = trunc_int_for_mode (val, HImode);
1158 break;
1159 case SPU_ORBI:
1160 val = trunc_int_for_mode (val, QImode);
1161 break;
1162 default:
1163 gcc_unreachable();
1164 }
1165 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1166 }
1167 else
1168 gcc_unreachable();
1169 return;
1170
1171 case 't': /* 128 bits, signed */
1172 case 'd': /* 64 bits, signed */
1173 case 's': /* 32 bits, signed */
1174 if (CONSTANT_P (x))
1175 {
1176 enum immediate_class c = classify_immediate (x, mode);
1177 switch (c)
1178 {
1179 case IC_IL1:
1180 constant_to_array (mode, x, arr);
1181 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1182 val = trunc_int_for_mode (val, SImode);
1183 switch (which_immediate_load (val))
1184 {
1185 case SPU_IL:
1186 break;
1187 case SPU_ILA:
1188 fprintf (file, "a");
1189 break;
1190 case SPU_ILH:
1191 fprintf (file, "h");
1192 break;
1193 case SPU_ILHU:
1194 fprintf (file, "hu");
1195 break;
1196 default:
1197 gcc_unreachable ();
1198 }
1199 break;
1200 case IC_CPAT:
1201 constant_to_array (mode, x, arr);
1202 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1203 if (info == 1)
1204 fprintf (file, "b");
1205 else if (info == 2)
1206 fprintf (file, "h");
1207 else if (info == 4)
1208 fprintf (file, "w");
1209 else if (info == 8)
1210 fprintf (file, "d");
1211 break;
1212 case IC_IL1s:
1213 if (xcode == CONST_VECTOR)
1214 {
1215 x = CONST_VECTOR_ELT (x, 0);
1216 xcode = GET_CODE (x);
1217 }
1218 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1219 fprintf (file, "a");
1220 else if (xcode == HIGH)
1221 fprintf (file, "hu");
1222 break;
1223 case IC_FSMBI:
1224 case IC_FSMBI2:
1225 case IC_IL2:
1226 case IC_IL2s:
1227 case IC_POOL:
1228 abort ();
1229 }
1230 }
1231 else
1232 gcc_unreachable ();
1233 return;
1234
1235 case 'T': /* 128 bits, signed */
1236 case 'D': /* 64 bits, signed */
1237 case 'S': /* 32 bits, signed */
1238 if (CONSTANT_P (x))
1239 {
1240 enum immediate_class c = classify_immediate (x, mode);
1241 switch (c)
1242 {
1243 case IC_IL1:
1244 constant_to_array (mode, x, arr);
1245 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1246 val = trunc_int_for_mode (val, SImode);
1247 switch (which_immediate_load (val))
1248 {
1249 case SPU_IL:
1250 case SPU_ILA:
1251 break;
1252 case SPU_ILH:
1253 case SPU_ILHU:
1254 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1255 break;
1256 default:
1257 gcc_unreachable ();
1258 }
1259 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1260 break;
1261 case IC_FSMBI:
1262 constant_to_array (mode, x, arr);
1263 val = 0;
1264 for (i = 0; i < 16; i++)
1265 {
1266 val <<= 1;
1267 val |= arr[i] & 1;
1268 }
1269 print_operand (file, GEN_INT (val), 0);
1270 break;
1271 case IC_CPAT:
1272 constant_to_array (mode, x, arr);
1273 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1274 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1275 break;
1276 case IC_IL1s:
1277 if (xcode == HIGH)
1278 x = XEXP (x, 0);
1279 if (GET_CODE (x) == CONST_VECTOR)
1280 x = CONST_VECTOR_ELT (x, 0);
1281 output_addr_const (file, x);
1282 if (xcode == HIGH)
1283 fprintf (file, "@h");
1284 break;
1285 case IC_IL2:
1286 case IC_IL2s:
1287 case IC_FSMBI2:
1288 case IC_POOL:
1289 abort ();
1290 }
1291 }
1292 else
1293 gcc_unreachable ();
1294 return;
1295
1296 case 'C':
1297 if (xcode == CONST_INT)
1298 {
1299 /* Only 4 least significant bits are relevant for generate
1300 control word instructions. */
1301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1302 return;
1303 }
1304 break;
1305
1306 case 'M': /* print code for c*d */
1307 if (GET_CODE (x) == CONST_INT)
1308 switch (INTVAL (x))
1309 {
1310 case 1:
1311 fprintf (file, "b");
1312 break;
1313 case 2:
1314 fprintf (file, "h");
1315 break;
1316 case 4:
1317 fprintf (file, "w");
1318 break;
1319 case 8:
1320 fprintf (file, "d");
1321 break;
1322 default:
1323 gcc_unreachable();
1324 }
1325 else
1326 gcc_unreachable();
1327 return;
1328
1329 case 'N': /* Negate the operand */
1330 if (xcode == CONST_INT)
1331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1332 else if (xcode == CONST_VECTOR)
1333 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1334 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1335 return;
1336
1337 case 'I': /* enable/disable interrupts */
1338 if (xcode == CONST_INT)
1339 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1340 return;
1341
1342 case 'b': /* branch modifiers */
1343 if (xcode == REG)
1344 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1345 else if (COMPARISON_P (x))
1346 fprintf (file, "%s", xcode == NE ? "n" : "");
1347 return;
1348
1349 case 'i': /* indirect call */
1350 if (xcode == MEM)
1351 {
1352 if (GET_CODE (XEXP (x, 0)) == REG)
1353 /* Used in indirect function calls. */
1354 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1355 else
1356 output_address (XEXP (x, 0));
1357 }
1358 return;
1359
1360 case 'p': /* load/store */
1361 if (xcode == MEM)
1362 {
1363 x = XEXP (x, 0);
1364 xcode = GET_CODE (x);
1365 }
1366 if (xcode == AND)
1367 {
1368 x = XEXP (x, 0);
1369 xcode = GET_CODE (x);
1370 }
1371 if (xcode == REG)
1372 fprintf (file, "d");
1373 else if (xcode == CONST_INT)
1374 fprintf (file, "a");
1375 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1376 fprintf (file, "r");
1377 else if (xcode == PLUS || xcode == LO_SUM)
1378 {
1379 if (GET_CODE (XEXP (x, 1)) == REG)
1380 fprintf (file, "x");
1381 else
1382 fprintf (file, "d");
1383 }
1384 return;
1385
1386 case 'e':
1387 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1388 val &= 0x7;
1389 output_addr_const (file, GEN_INT (val));
1390 return;
1391
1392 case 'f':
1393 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1394 val &= 0x1f;
1395 output_addr_const (file, GEN_INT (val));
1396 return;
1397
1398 case 'g':
1399 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1400 val &= 0x3f;
1401 output_addr_const (file, GEN_INT (val));
1402 return;
1403
1404 case 'h':
1405 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1406 val = (val >> 3) & 0x1f;
1407 output_addr_const (file, GEN_INT (val));
1408 return;
1409
1410 case 'E':
1411 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1412 val = -val;
1413 val &= 0x7;
1414 output_addr_const (file, GEN_INT (val));
1415 return;
1416
1417 case 'F':
1418 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1419 val = -val;
1420 val &= 0x1f;
1421 output_addr_const (file, GEN_INT (val));
1422 return;
1423
1424 case 'G':
1425 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1426 val = -val;
1427 val &= 0x3f;
1428 output_addr_const (file, GEN_INT (val));
1429 return;
1430
1431 case 'H':
1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433 val = -(val & -8ll);
1434 val = (val >> 3) & 0x1f;
1435 output_addr_const (file, GEN_INT (val));
1436 return;
1437
1438 case 'v':
1439 case 'w':
1440 constant_to_array (mode, x, arr);
1441 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1442 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1443 return;
1444
1445 case 0:
1446 if (xcode == REG)
1447 fprintf (file, "%s", reg_names[REGNO (x)]);
1448 else if (xcode == MEM)
1449 output_address (XEXP (x, 0));
1450 else if (xcode == CONST_VECTOR)
1451 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1452 else
1453 output_addr_const (file, x);
1454 return;
1455
1456 /* unused letters
1457 o qr u yz
1458 AB OPQR UVWXYZ */
1459 default:
1460 output_operand_lossage ("invalid %%xn code");
1461 }
1462 gcc_unreachable ();
1463 }
1464
1465 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1466 caller saved register. For leaf functions it is more efficient to
1467 use a volatile register because we won't need to save and restore the
1468 pic register. This routine is only valid after register allocation
1469 is completed, so we can pick an unused register. */
1470 static rtx
1471 get_pic_reg (void)
1472 {
1473 if (!reload_completed && !reload_in_progress)
1474 abort ();
1475
1476 /* If we've already made the decision, we need to keep with it. Once we've
1477 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1478 return true since the register is now live; this should not cause us to
1479 "switch back" to using pic_offset_table_rtx. */
1480 if (!cfun->machine->pic_reg)
1481 {
1482 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1483 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1484 else
1485 cfun->machine->pic_reg = pic_offset_table_rtx;
1486 }
1487
1488 return cfun->machine->pic_reg;
1489 }
1490
1491 /* Split constant addresses to handle cases that are too large.
1492 Add in the pic register when in PIC mode.
1493 Split immediates that require more than 1 instruction. */
1494 int
1495 spu_split_immediate (rtx * ops)
1496 {
1497 enum machine_mode mode = GET_MODE (ops[0]);
1498 enum immediate_class c = classify_immediate (ops[1], mode);
1499
1500 switch (c)
1501 {
1502 case IC_IL2:
1503 {
1504 unsigned char arrhi[16];
1505 unsigned char arrlo[16];
1506 rtx to, temp, hi, lo;
1507 int i;
1508 enum machine_mode imode = mode;
1509 /* We need to do reals as ints because the constant used in the
1510 IOR might not be a legitimate real constant. */
1511 imode = int_mode_for_mode (mode);
1512 constant_to_array (mode, ops[1], arrhi);
1513 if (imode != mode)
1514 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1515 else
1516 to = ops[0];
1517 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1518 for (i = 0; i < 16; i += 4)
1519 {
1520 arrlo[i + 2] = arrhi[i + 2];
1521 arrlo[i + 3] = arrhi[i + 3];
1522 arrlo[i + 0] = arrlo[i + 1] = 0;
1523 arrhi[i + 2] = arrhi[i + 3] = 0;
1524 }
1525 hi = array_to_constant (imode, arrhi);
1526 lo = array_to_constant (imode, arrlo);
1527 emit_move_insn (temp, hi);
1528 emit_insn (gen_rtx_SET
1529 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1530 return 1;
1531 }
1532 case IC_FSMBI2:
1533 {
1534 unsigned char arr_fsmbi[16];
1535 unsigned char arr_andbi[16];
1536 rtx to, reg_fsmbi, reg_and;
1537 int i;
1538 enum machine_mode imode = mode;
1539 /* We need to do reals as ints because the constant used in the
1540 * AND might not be a legitimate real constant. */
1541 imode = int_mode_for_mode (mode);
1542 constant_to_array (mode, ops[1], arr_fsmbi);
1543 if (imode != mode)
1544 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1545 else
1546 to = ops[0];
1547 for (i = 0; i < 16; i++)
1548 if (arr_fsmbi[i] != 0)
1549 {
1550 arr_andbi[0] = arr_fsmbi[i];
1551 arr_fsmbi[i] = 0xff;
1552 }
1553 for (i = 1; i < 16; i++)
1554 arr_andbi[i] = arr_andbi[0];
1555 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1556 reg_and = array_to_constant (imode, arr_andbi);
1557 emit_move_insn (to, reg_fsmbi);
1558 emit_insn (gen_rtx_SET
1559 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1560 return 1;
1561 }
1562 case IC_POOL:
1563 if (reload_in_progress || reload_completed)
1564 {
1565 rtx mem = force_const_mem (mode, ops[1]);
1566 if (TARGET_LARGE_MEM)
1567 {
1568 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1569 emit_move_insn (addr, XEXP (mem, 0));
1570 mem = replace_equiv_address (mem, addr);
1571 }
1572 emit_move_insn (ops[0], mem);
1573 return 1;
1574 }
1575 break;
1576 case IC_IL1s:
1577 case IC_IL2s:
1578 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1579 {
1580 if (c == IC_IL2s)
1581 {
1582 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1583 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1584 }
1585 else if (flag_pic)
1586 emit_insn (gen_pic (ops[0], ops[1]));
1587 if (flag_pic)
1588 {
1589 rtx pic_reg = get_pic_reg ();
1590 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1591 }
1592 return flag_pic || c == IC_IL2s;
1593 }
1594 break;
1595 case IC_IL1:
1596 case IC_FSMBI:
1597 case IC_CPAT:
1598 break;
1599 }
1600 return 0;
1601 }
1602
1603 /* SAVING is TRUE when we are generating the actual load and store
1604 instructions for REGNO. When determining the size of the stack
1605 needed for saving register we must allocate enough space for the
1606 worst case, because we don't always have the information early enough
1607 to not allocate it. But we can at least eliminate the actual loads
1608 and stores during the prologue/epilogue. */
1609 static int
1610 need_to_save_reg (int regno, int saving)
1611 {
1612 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1613 return 1;
1614 if (flag_pic
1615 && regno == PIC_OFFSET_TABLE_REGNUM
1616 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1617 return 1;
1618 return 0;
1619 }
1620
1621 /* This function is only correct starting with local register
1622 allocation */
1623 int
1624 spu_saved_regs_size (void)
1625 {
1626 int reg_save_size = 0;
1627 int regno;
1628
1629 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1630 if (need_to_save_reg (regno, 0))
1631 reg_save_size += 0x10;
1632 return reg_save_size;
1633 }
1634
1635 static rtx
1636 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1637 {
1638 rtx reg = gen_rtx_REG (V4SImode, regno);
1639 rtx mem =
1640 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1641 return emit_insn (gen_movv4si (mem, reg));
1642 }
1643
1644 static rtx
1645 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1646 {
1647 rtx reg = gen_rtx_REG (V4SImode, regno);
1648 rtx mem =
1649 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1650 return emit_insn (gen_movv4si (reg, mem));
1651 }
1652
1653 /* This happens after reload, so we need to expand it. */
1654 static rtx
1655 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1656 {
1657 rtx insn;
1658 if (satisfies_constraint_K (GEN_INT (imm)))
1659 {
1660 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1661 }
1662 else
1663 {
1664 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1665 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1666 if (REGNO (src) == REGNO (scratch))
1667 abort ();
1668 }
1669 return insn;
1670 }
1671
1672 /* Return nonzero if this function is known to have a null epilogue. */
1673
1674 int
1675 direct_return (void)
1676 {
1677 if (reload_completed)
1678 {
1679 if (cfun->static_chain_decl == 0
1680 && (spu_saved_regs_size ()
1681 + get_frame_size ()
1682 + crtl->outgoing_args_size
1683 + crtl->args.pretend_args_size == 0)
1684 && crtl->is_leaf)
1685 return 1;
1686 }
1687 return 0;
1688 }
1689
1690 /*
1691 The stack frame looks like this:
1692 +-------------+
1693 | incoming |
1694 | args |
1695 AP -> +-------------+
1696 | $lr save |
1697 +-------------+
1698 prev SP | back chain |
1699 +-------------+
1700 | var args |
1701 | reg save | crtl->args.pretend_args_size bytes
1702 +-------------+
1703 | ... |
1704 | saved regs | spu_saved_regs_size() bytes
1705 FP -> +-------------+
1706 | ... |
1707 | vars | get_frame_size() bytes
1708 HFP -> +-------------+
1709 | ... |
1710 | outgoing |
1711 | args | crtl->outgoing_args_size bytes
1712 +-------------+
1713 | $lr of next |
1714 | frame |
1715 +-------------+
1716 | back chain |
1717 SP -> +-------------+
1718
1719 */
1720 void
1721 spu_expand_prologue (void)
1722 {
1723 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1724 HOST_WIDE_INT total_size;
1725 HOST_WIDE_INT saved_regs_size;
1726 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1727 rtx scratch_reg_0, scratch_reg_1;
1728 rtx insn, real;
1729
1730 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1731 cfun->machine->pic_reg = pic_offset_table_rtx;
1732
1733 if (spu_naked_function_p (current_function_decl))
1734 return;
1735
1736 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1737 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1738
1739 saved_regs_size = spu_saved_regs_size ();
1740 total_size = size + saved_regs_size
1741 + crtl->outgoing_args_size
1742 + crtl->args.pretend_args_size;
1743
1744 if (!crtl->is_leaf
1745 || cfun->calls_alloca || total_size > 0)
1746 total_size += STACK_POINTER_OFFSET;
1747
1748 /* Save this first because code after this might use the link
1749 register as a scratch register. */
1750 if (!crtl->is_leaf)
1751 {
1752 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1753 RTX_FRAME_RELATED_P (insn) = 1;
1754 }
1755
1756 if (total_size > 0)
1757 {
1758 offset = -crtl->args.pretend_args_size;
1759 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1760 if (need_to_save_reg (regno, 1))
1761 {
1762 offset -= 16;
1763 insn = frame_emit_store (regno, sp_reg, offset);
1764 RTX_FRAME_RELATED_P (insn) = 1;
1765 }
1766 }
1767
1768 if (flag_pic && cfun->machine->pic_reg)
1769 {
1770 rtx pic_reg = cfun->machine->pic_reg;
1771 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1772 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1773 }
1774
1775 if (total_size > 0)
1776 {
1777 if (flag_stack_check)
1778 {
1779 /* We compare against total_size-1 because
1780 ($sp >= total_size) <=> ($sp > total_size-1) */
1781 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1782 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1783 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1784 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1785 {
1786 emit_move_insn (scratch_v4si, size_v4si);
1787 size_v4si = scratch_v4si;
1788 }
1789 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1790 emit_insn (gen_vec_extractv4si
1791 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1792 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1793 }
1794
1795 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1796 the value of the previous $sp because we save it as the back
1797 chain. */
1798 if (total_size <= 2000)
1799 {
1800 /* In this case we save the back chain first. */
1801 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1802 insn =
1803 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1804 }
1805 else
1806 {
1807 insn = emit_move_insn (scratch_reg_0, sp_reg);
1808 insn =
1809 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1810 }
1811 RTX_FRAME_RELATED_P (insn) = 1;
1812 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1813 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1814
1815 if (total_size > 2000)
1816 {
1817 /* Save the back chain ptr */
1818 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1819 }
1820
1821 if (frame_pointer_needed)
1822 {
1823 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1824 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1825 + crtl->outgoing_args_size;
1826 /* Set the new frame_pointer */
1827 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1828 RTX_FRAME_RELATED_P (insn) = 1;
1829 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1830 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1831 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1832 }
1833 }
1834
1835 if (flag_stack_usage_info)
1836 current_function_static_stack_size = total_size;
1837 }
1838
1839 void
1840 spu_expand_epilogue (bool sibcall_p)
1841 {
1842 int size = get_frame_size (), offset, regno;
1843 HOST_WIDE_INT saved_regs_size, total_size;
1844 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1845 rtx scratch_reg_0;
1846
1847 if (spu_naked_function_p (current_function_decl))
1848 return;
1849
1850 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1851
1852 saved_regs_size = spu_saved_regs_size ();
1853 total_size = size + saved_regs_size
1854 + crtl->outgoing_args_size
1855 + crtl->args.pretend_args_size;
1856
1857 if (!crtl->is_leaf
1858 || cfun->calls_alloca || total_size > 0)
1859 total_size += STACK_POINTER_OFFSET;
1860
1861 if (total_size > 0)
1862 {
1863 if (cfun->calls_alloca)
1864 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1865 else
1866 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1867
1868
1869 if (saved_regs_size > 0)
1870 {
1871 offset = -crtl->args.pretend_args_size;
1872 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1873 if (need_to_save_reg (regno, 1))
1874 {
1875 offset -= 0x10;
1876 frame_emit_load (regno, sp_reg, offset);
1877 }
1878 }
1879 }
1880
1881 if (!crtl->is_leaf)
1882 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1883
1884 if (!sibcall_p)
1885 {
1886 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1887 emit_jump_insn (gen__return ());
1888 }
1889 }
1890
1891 rtx
1892 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1893 {
1894 if (count != 0)
1895 return 0;
1896 /* This is inefficient because it ends up copying to a save-register
1897 which then gets saved even though $lr has already been saved. But
1898 it does generate better code for leaf functions and we don't need
1899 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1900 used for __builtin_return_address anyway, so maybe we don't care if
1901 it's inefficient. */
1902 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1903 }
1904 \f
1905
1906 /* Given VAL, generate a constant appropriate for MODE.
1907 If MODE is a vector mode, every element will be VAL.
1908 For TImode, VAL will be zero extended to 128 bits. */
1909 rtx
1910 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1911 {
1912 rtx inner;
1913 rtvec v;
1914 int units, i;
1915
1916 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1917 || GET_MODE_CLASS (mode) == MODE_FLOAT
1918 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1919 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1920
1921 if (GET_MODE_CLASS (mode) == MODE_INT)
1922 return immed_double_const (val, 0, mode);
1923
1924 /* val is the bit representation of the float */
1925 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1926 return hwint_to_const_double (mode, val);
1927
1928 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1929 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1930 else
1931 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1932
1933 units = GET_MODE_NUNITS (mode);
1934
1935 v = rtvec_alloc (units);
1936
1937 for (i = 0; i < units; ++i)
1938 RTVEC_ELT (v, i) = inner;
1939
1940 return gen_rtx_CONST_VECTOR (mode, v);
1941 }
1942
1943 /* Create a MODE vector constant from 4 ints. */
1944 rtx
1945 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1946 {
1947 unsigned char arr[16];
1948 arr[0] = (a >> 24) & 0xff;
1949 arr[1] = (a >> 16) & 0xff;
1950 arr[2] = (a >> 8) & 0xff;
1951 arr[3] = (a >> 0) & 0xff;
1952 arr[4] = (b >> 24) & 0xff;
1953 arr[5] = (b >> 16) & 0xff;
1954 arr[6] = (b >> 8) & 0xff;
1955 arr[7] = (b >> 0) & 0xff;
1956 arr[8] = (c >> 24) & 0xff;
1957 arr[9] = (c >> 16) & 0xff;
1958 arr[10] = (c >> 8) & 0xff;
1959 arr[11] = (c >> 0) & 0xff;
1960 arr[12] = (d >> 24) & 0xff;
1961 arr[13] = (d >> 16) & 0xff;
1962 arr[14] = (d >> 8) & 0xff;
1963 arr[15] = (d >> 0) & 0xff;
1964 return array_to_constant(mode, arr);
1965 }
1966 \f
1967 /* branch hint stuff */
1968
1969 /* An array of these is used to propagate hints to predecessor blocks. */
1970 struct spu_bb_info
1971 {
1972 rtx prop_jump; /* propagated from another block */
1973 int bb_index; /* the original block. */
1974 };
1975 static struct spu_bb_info *spu_bb_info;
1976
1977 #define STOP_HINT_P(INSN) \
1978 (CALL_P(INSN) \
1979 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1980 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1981
1982 /* 1 when RTX is a hinted branch or its target. We keep track of
1983 what has been hinted so the safe-hint code can test it easily. */
1984 #define HINTED_P(RTX) \
1985 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1986
1987 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1988 #define SCHED_ON_EVEN_P(RTX) \
1989 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1990
1991 /* Emit a nop for INSN such that the two will dual issue. This assumes
1992 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1993 We check for TImode to handle a MULTI1 insn which has dual issued its
1994 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1995 static void
1996 emit_nop_for_insn (rtx insn)
1997 {
1998 int p;
1999 rtx new_insn;
2000
2001 /* We need to handle JUMP_TABLE_DATA separately. */
2002 if (JUMP_TABLE_DATA_P (insn))
2003 {
2004 new_insn = emit_insn_after (gen_lnop(), insn);
2005 recog_memoized (new_insn);
2006 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2007 return;
2008 }
2009
2010 p = get_pipe (insn);
2011 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2012 new_insn = emit_insn_after (gen_lnop (), insn);
2013 else if (p == 1 && GET_MODE (insn) == TImode)
2014 {
2015 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2016 PUT_MODE (new_insn, TImode);
2017 PUT_MODE (insn, VOIDmode);
2018 }
2019 else
2020 new_insn = emit_insn_after (gen_lnop (), insn);
2021 recog_memoized (new_insn);
2022 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2023 }
2024
2025 /* Insert nops in basic blocks to meet dual issue alignment
2026 requirements. Also make sure hbrp and hint instructions are at least
2027 one cycle apart, possibly inserting a nop. */
2028 static void
2029 pad_bb(void)
2030 {
2031 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2032 int length;
2033 int addr;
2034
2035 /* This sets up INSN_ADDRESSES. */
2036 shorten_branches (get_insns ());
2037
2038 /* Keep track of length added by nops. */
2039 length = 0;
2040
2041 prev_insn = 0;
2042 insn = get_insns ();
2043 if (!active_insn_p (insn))
2044 insn = next_active_insn (insn);
2045 for (; insn; insn = next_insn)
2046 {
2047 next_insn = next_active_insn (insn);
2048 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2049 || INSN_CODE (insn) == CODE_FOR_hbr)
2050 {
2051 if (hbr_insn)
2052 {
2053 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2054 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2055 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2056 || (a1 - a0 == 4))
2057 {
2058 prev_insn = emit_insn_before (gen_lnop (), insn);
2059 PUT_MODE (prev_insn, GET_MODE (insn));
2060 PUT_MODE (insn, TImode);
2061 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2062 length += 4;
2063 }
2064 }
2065 hbr_insn = insn;
2066 }
2067 if (INSN_CODE (insn) == CODE_FOR_blockage)
2068 {
2069 if (GET_MODE (insn) == TImode)
2070 PUT_MODE (next_insn, TImode);
2071 insn = next_insn;
2072 next_insn = next_active_insn (insn);
2073 }
2074 addr = INSN_ADDRESSES (INSN_UID (insn));
2075 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2076 {
2077 if (((addr + length) & 7) != 0)
2078 {
2079 emit_nop_for_insn (prev_insn);
2080 length += 4;
2081 }
2082 }
2083 else if (GET_MODE (insn) == TImode
2084 && ((next_insn && GET_MODE (next_insn) != TImode)
2085 || get_attr_type (insn) == TYPE_MULTI0)
2086 && ((addr + length) & 7) != 0)
2087 {
2088 /* prev_insn will always be set because the first insn is
2089 always 8-byte aligned. */
2090 emit_nop_for_insn (prev_insn);
2091 length += 4;
2092 }
2093 prev_insn = insn;
2094 }
2095 }
2096
2097 \f
2098 /* Routines for branch hints. */
2099
2100 static void
2101 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2102 int distance, sbitmap blocks)
2103 {
2104 rtx branch_label = 0;
2105 rtx hint;
2106 rtx insn;
2107 rtx table;
2108
2109 if (before == 0 || branch == 0 || target == 0)
2110 return;
2111
2112 /* While scheduling we require hints to be no further than 600, so
2113 we need to enforce that here too */
2114 if (distance > 600)
2115 return;
2116
2117 /* If we have a Basic block note, emit it after the basic block note. */
2118 if (NOTE_INSN_BASIC_BLOCK_P (before))
2119 before = NEXT_INSN (before);
2120
2121 branch_label = gen_label_rtx ();
2122 LABEL_NUSES (branch_label)++;
2123 LABEL_PRESERVE_P (branch_label) = 1;
2124 insn = emit_label_before (branch_label, branch);
2125 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2126 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2127
2128 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2129 recog_memoized (hint);
2130 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2131 HINTED_P (branch) = 1;
2132
2133 if (GET_CODE (target) == LABEL_REF)
2134 HINTED_P (XEXP (target, 0)) = 1;
2135 else if (tablejump_p (branch, 0, &table))
2136 {
2137 rtvec vec;
2138 int j;
2139 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2140 vec = XVEC (PATTERN (table), 0);
2141 else
2142 vec = XVEC (PATTERN (table), 1);
2143 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2144 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2145 }
2146
2147 if (distance >= 588)
2148 {
2149 /* Make sure the hint isn't scheduled any earlier than this point,
2150 which could make it too far for the branch offest to fit */
2151 insn = emit_insn_before (gen_blockage (), hint);
2152 recog_memoized (insn);
2153 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2154 }
2155 else if (distance <= 8 * 4)
2156 {
2157 /* To guarantee at least 8 insns between the hint and branch we
2158 insert nops. */
2159 int d;
2160 for (d = distance; d < 8 * 4; d += 4)
2161 {
2162 insn =
2163 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2164 recog_memoized (insn);
2165 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2166 }
2167
2168 /* Make sure any nops inserted aren't scheduled before the hint. */
2169 insn = emit_insn_after (gen_blockage (), hint);
2170 recog_memoized (insn);
2171 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2172
2173 /* Make sure any nops inserted aren't scheduled after the call. */
2174 if (CALL_P (branch) && distance < 8 * 4)
2175 {
2176 insn = emit_insn_before (gen_blockage (), branch);
2177 recog_memoized (insn);
2178 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2179 }
2180 }
2181 }
2182
2183 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2184 the rtx for the branch target. */
2185 static rtx
2186 get_branch_target (rtx branch)
2187 {
2188 if (JUMP_P (branch))
2189 {
2190 rtx set, src;
2191
2192 /* Return statements */
2193 if (GET_CODE (PATTERN (branch)) == RETURN)
2194 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2195
2196 /* ASM GOTOs. */
2197 if (extract_asm_operands (PATTERN (branch)) != NULL)
2198 return NULL;
2199
2200 set = single_set (branch);
2201 src = SET_SRC (set);
2202 if (GET_CODE (SET_DEST (set)) != PC)
2203 abort ();
2204
2205 if (GET_CODE (src) == IF_THEN_ELSE)
2206 {
2207 rtx lab = 0;
2208 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2209 if (note)
2210 {
2211 /* If the more probable case is not a fall through, then
2212 try a branch hint. */
2213 int prob = XINT (note, 0);
2214 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2215 && GET_CODE (XEXP (src, 1)) != PC)
2216 lab = XEXP (src, 1);
2217 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2218 && GET_CODE (XEXP (src, 2)) != PC)
2219 lab = XEXP (src, 2);
2220 }
2221 if (lab)
2222 {
2223 if (GET_CODE (lab) == RETURN)
2224 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2225 return lab;
2226 }
2227 return 0;
2228 }
2229
2230 return src;
2231 }
2232 else if (CALL_P (branch))
2233 {
2234 rtx call;
2235 /* All of our call patterns are in a PARALLEL and the CALL is
2236 the first pattern in the PARALLEL. */
2237 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2238 abort ();
2239 call = XVECEXP (PATTERN (branch), 0, 0);
2240 if (GET_CODE (call) == SET)
2241 call = SET_SRC (call);
2242 if (GET_CODE (call) != CALL)
2243 abort ();
2244 return XEXP (XEXP (call, 0), 0);
2245 }
2246 return 0;
2247 }
2248
2249 /* The special $hbr register is used to prevent the insn scheduler from
2250 moving hbr insns across instructions which invalidate them. It
2251 should only be used in a clobber, and this function searches for
2252 insns which clobber it. */
2253 static bool
2254 insn_clobbers_hbr (rtx insn)
2255 {
2256 if (INSN_P (insn)
2257 && GET_CODE (PATTERN (insn)) == PARALLEL)
2258 {
2259 rtx parallel = PATTERN (insn);
2260 rtx clobber;
2261 int j;
2262 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2263 {
2264 clobber = XVECEXP (parallel, 0, j);
2265 if (GET_CODE (clobber) == CLOBBER
2266 && GET_CODE (XEXP (clobber, 0)) == REG
2267 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2268 return 1;
2269 }
2270 }
2271 return 0;
2272 }
2273
2274 /* Search up to 32 insns starting at FIRST:
2275 - at any kind of hinted branch, just return
2276 - at any unconditional branch in the first 15 insns, just return
2277 - at a call or indirect branch, after the first 15 insns, force it to
2278 an even address and return
2279 - at any unconditional branch, after the first 15 insns, force it to
2280 an even address.
2281 At then end of the search, insert an hbrp within 4 insns of FIRST,
2282 and an hbrp within 16 instructions of FIRST.
2283 */
2284 static void
2285 insert_hbrp_for_ilb_runout (rtx first)
2286 {
2287 rtx insn, before_4 = 0, before_16 = 0;
2288 int addr = 0, length, first_addr = -1;
2289 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2290 int insert_lnop_after = 0;
2291 for (insn = first; insn; insn = NEXT_INSN (insn))
2292 if (INSN_P (insn))
2293 {
2294 if (first_addr == -1)
2295 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2296 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2297 length = get_attr_length (insn);
2298
2299 if (before_4 == 0 && addr + length >= 4 * 4)
2300 before_4 = insn;
2301 /* We test for 14 instructions because the first hbrp will add
2302 up to 2 instructions. */
2303 if (before_16 == 0 && addr + length >= 14 * 4)
2304 before_16 = insn;
2305
2306 if (INSN_CODE (insn) == CODE_FOR_hbr)
2307 {
2308 /* Make sure an hbrp is at least 2 cycles away from a hint.
2309 Insert an lnop after the hbrp when necessary. */
2310 if (before_4 == 0 && addr > 0)
2311 {
2312 before_4 = insn;
2313 insert_lnop_after |= 1;
2314 }
2315 else if (before_4 && addr <= 4 * 4)
2316 insert_lnop_after |= 1;
2317 if (before_16 == 0 && addr > 10 * 4)
2318 {
2319 before_16 = insn;
2320 insert_lnop_after |= 2;
2321 }
2322 else if (before_16 && addr <= 14 * 4)
2323 insert_lnop_after |= 2;
2324 }
2325
2326 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2327 {
2328 if (addr < hbrp_addr0)
2329 hbrp_addr0 = addr;
2330 else if (addr < hbrp_addr1)
2331 hbrp_addr1 = addr;
2332 }
2333
2334 if (CALL_P (insn) || JUMP_P (insn))
2335 {
2336 if (HINTED_P (insn))
2337 return;
2338
2339 /* Any branch after the first 15 insns should be on an even
2340 address to avoid a special case branch. There might be
2341 some nops and/or hbrps inserted, so we test after 10
2342 insns. */
2343 if (addr > 10 * 4)
2344 SCHED_ON_EVEN_P (insn) = 1;
2345 }
2346
2347 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2348 return;
2349
2350
2351 if (addr + length >= 32 * 4)
2352 {
2353 gcc_assert (before_4 && before_16);
2354 if (hbrp_addr0 > 4 * 4)
2355 {
2356 insn =
2357 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2358 recog_memoized (insn);
2359 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2360 INSN_ADDRESSES_NEW (insn,
2361 INSN_ADDRESSES (INSN_UID (before_4)));
2362 PUT_MODE (insn, GET_MODE (before_4));
2363 PUT_MODE (before_4, TImode);
2364 if (insert_lnop_after & 1)
2365 {
2366 insn = emit_insn_before (gen_lnop (), before_4);
2367 recog_memoized (insn);
2368 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2369 INSN_ADDRESSES_NEW (insn,
2370 INSN_ADDRESSES (INSN_UID (before_4)));
2371 PUT_MODE (insn, TImode);
2372 }
2373 }
2374 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2375 && hbrp_addr1 > 16 * 4)
2376 {
2377 insn =
2378 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2379 recog_memoized (insn);
2380 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2381 INSN_ADDRESSES_NEW (insn,
2382 INSN_ADDRESSES (INSN_UID (before_16)));
2383 PUT_MODE (insn, GET_MODE (before_16));
2384 PUT_MODE (before_16, TImode);
2385 if (insert_lnop_after & 2)
2386 {
2387 insn = emit_insn_before (gen_lnop (), before_16);
2388 recog_memoized (insn);
2389 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2390 INSN_ADDRESSES_NEW (insn,
2391 INSN_ADDRESSES (INSN_UID
2392 (before_16)));
2393 PUT_MODE (insn, TImode);
2394 }
2395 }
2396 return;
2397 }
2398 }
2399 else if (BARRIER_P (insn))
2400 return;
2401
2402 }
2403
2404 /* The SPU might hang when it executes 48 inline instructions after a
2405 hinted branch jumps to its hinted target. The beginning of a
2406 function and the return from a call might have been hinted, and
2407 must be handled as well. To prevent a hang we insert 2 hbrps. The
2408 first should be within 6 insns of the branch target. The second
2409 should be within 22 insns of the branch target. When determining
2410 if hbrps are necessary, we look for only 32 inline instructions,
2411 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2412 when inserting new hbrps, we insert them within 4 and 16 insns of
2413 the target. */
2414 static void
2415 insert_hbrp (void)
2416 {
2417 rtx insn;
2418 if (TARGET_SAFE_HINTS)
2419 {
2420 shorten_branches (get_insns ());
2421 /* Insert hbrp at beginning of function */
2422 insn = next_active_insn (get_insns ());
2423 if (insn)
2424 insert_hbrp_for_ilb_runout (insn);
2425 /* Insert hbrp after hinted targets. */
2426 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2427 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2428 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2429 }
2430 }
2431
2432 static int in_spu_reorg;
2433
2434 static void
2435 spu_var_tracking (void)
2436 {
2437 if (flag_var_tracking)
2438 {
2439 df_analyze ();
2440 timevar_push (TV_VAR_TRACKING);
2441 variable_tracking_main ();
2442 timevar_pop (TV_VAR_TRACKING);
2443 df_finish_pass (false);
2444 }
2445 }
2446
2447 /* Insert branch hints. There are no branch optimizations after this
2448 pass, so it's safe to set our branch hints now. */
2449 static void
2450 spu_machine_dependent_reorg (void)
2451 {
2452 sbitmap blocks;
2453 basic_block bb;
2454 rtx branch, insn;
2455 rtx branch_target = 0;
2456 int branch_addr = 0, insn_addr, required_dist = 0;
2457 int i;
2458 unsigned int j;
2459
2460 if (!TARGET_BRANCH_HINTS || optimize == 0)
2461 {
2462 /* We still do it for unoptimized code because an external
2463 function might have hinted a call or return. */
2464 compute_bb_for_insn ();
2465 insert_hbrp ();
2466 pad_bb ();
2467 spu_var_tracking ();
2468 free_bb_for_insn ();
2469 return;
2470 }
2471
2472 blocks = sbitmap_alloc (last_basic_block);
2473 bitmap_clear (blocks);
2474
2475 in_spu_reorg = 1;
2476 compute_bb_for_insn ();
2477
2478 /* (Re-)discover loops so that bb->loop_father can be used
2479 in the analysis below. */
2480 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2481
2482 compact_blocks ();
2483
2484 spu_bb_info =
2485 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2486 sizeof (struct spu_bb_info));
2487
2488 /* We need exact insn addresses and lengths. */
2489 shorten_branches (get_insns ());
2490
2491 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2492 {
2493 bb = BASIC_BLOCK (i);
2494 branch = 0;
2495 if (spu_bb_info[i].prop_jump)
2496 {
2497 branch = spu_bb_info[i].prop_jump;
2498 branch_target = get_branch_target (branch);
2499 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2500 required_dist = spu_hint_dist;
2501 }
2502 /* Search from end of a block to beginning. In this loop, find
2503 jumps which need a branch and emit them only when:
2504 - it's an indirect branch and we're at the insn which sets
2505 the register
2506 - we're at an insn that will invalidate the hint. e.g., a
2507 call, another hint insn, inline asm that clobbers $hbr, and
2508 some inlined operations (divmodsi4). Don't consider jumps
2509 because they are only at the end of a block and are
2510 considered when we are deciding whether to propagate
2511 - we're getting too far away from the branch. The hbr insns
2512 only have a signed 10 bit offset
2513 We go back as far as possible so the branch will be considered
2514 for propagation when we get to the beginning of the block. */
2515 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2516 {
2517 if (INSN_P (insn))
2518 {
2519 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2520 if (branch
2521 && ((GET_CODE (branch_target) == REG
2522 && set_of (branch_target, insn) != NULL_RTX)
2523 || insn_clobbers_hbr (insn)
2524 || branch_addr - insn_addr > 600))
2525 {
2526 rtx next = NEXT_INSN (insn);
2527 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2528 if (insn != BB_END (bb)
2529 && branch_addr - next_addr >= required_dist)
2530 {
2531 if (dump_file)
2532 fprintf (dump_file,
2533 "hint for %i in block %i before %i\n",
2534 INSN_UID (branch), bb->index,
2535 INSN_UID (next));
2536 spu_emit_branch_hint (next, branch, branch_target,
2537 branch_addr - next_addr, blocks);
2538 }
2539 branch = 0;
2540 }
2541
2542 /* JUMP_P will only be true at the end of a block. When
2543 branch is already set it means we've previously decided
2544 to propagate a hint for that branch into this block. */
2545 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2546 {
2547 branch = 0;
2548 if ((branch_target = get_branch_target (insn)))
2549 {
2550 branch = insn;
2551 branch_addr = insn_addr;
2552 required_dist = spu_hint_dist;
2553 }
2554 }
2555 }
2556 if (insn == BB_HEAD (bb))
2557 break;
2558 }
2559
2560 if (branch)
2561 {
2562 /* If we haven't emitted a hint for this branch yet, it might
2563 be profitable to emit it in one of the predecessor blocks,
2564 especially for loops. */
2565 rtx bbend;
2566 basic_block prev = 0, prop = 0, prev2 = 0;
2567 int loop_exit = 0, simple_loop = 0;
2568 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2569
2570 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2571 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2572 prev = EDGE_PRED (bb, j)->src;
2573 else
2574 prev2 = EDGE_PRED (bb, j)->src;
2575
2576 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2577 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2578 loop_exit = 1;
2579 else if (EDGE_SUCC (bb, j)->dest == bb)
2580 simple_loop = 1;
2581
2582 /* If this branch is a loop exit then propagate to previous
2583 fallthru block. This catches the cases when it is a simple
2584 loop or when there is an initial branch into the loop. */
2585 if (prev && (loop_exit || simple_loop)
2586 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2587 prop = prev;
2588
2589 /* If there is only one adjacent predecessor. Don't propagate
2590 outside this loop. */
2591 else if (prev && single_pred_p (bb)
2592 && prev->loop_father == bb->loop_father)
2593 prop = prev;
2594
2595 /* If this is the JOIN block of a simple IF-THEN then
2596 propagate the hint to the HEADER block. */
2597 else if (prev && prev2
2598 && EDGE_COUNT (bb->preds) == 2
2599 && EDGE_COUNT (prev->preds) == 1
2600 && EDGE_PRED (prev, 0)->src == prev2
2601 && prev2->loop_father == bb->loop_father
2602 && GET_CODE (branch_target) != REG)
2603 prop = prev;
2604
2605 /* Don't propagate when:
2606 - this is a simple loop and the hint would be too far
2607 - this is not a simple loop and there are 16 insns in
2608 this block already
2609 - the predecessor block ends in a branch that will be
2610 hinted
2611 - the predecessor block ends in an insn that invalidates
2612 the hint */
2613 if (prop
2614 && prop->index >= 0
2615 && (bbend = BB_END (prop))
2616 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2617 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2618 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2619 {
2620 if (dump_file)
2621 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2622 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2623 bb->index, prop->index, bb_loop_depth (bb),
2624 INSN_UID (branch), loop_exit, simple_loop,
2625 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2626
2627 spu_bb_info[prop->index].prop_jump = branch;
2628 spu_bb_info[prop->index].bb_index = i;
2629 }
2630 else if (branch_addr - next_addr >= required_dist)
2631 {
2632 if (dump_file)
2633 fprintf (dump_file, "hint for %i in block %i before %i\n",
2634 INSN_UID (branch), bb->index,
2635 INSN_UID (NEXT_INSN (insn)));
2636 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2637 branch_addr - next_addr, blocks);
2638 }
2639 branch = 0;
2640 }
2641 }
2642 free (spu_bb_info);
2643
2644 if (!bitmap_empty_p (blocks))
2645 find_many_sub_basic_blocks (blocks);
2646
2647 /* We have to schedule to make sure alignment is ok. */
2648 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2649
2650 /* The hints need to be scheduled, so call it again. */
2651 schedule_insns ();
2652 df_finish_pass (true);
2653
2654 insert_hbrp ();
2655
2656 pad_bb ();
2657
2658 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2659 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2660 {
2661 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2662 between its branch label and the branch . We don't move the
2663 label because GCC expects it at the beginning of the block. */
2664 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2665 rtx label_ref = XVECEXP (unspec, 0, 0);
2666 rtx label = XEXP (label_ref, 0);
2667 rtx branch;
2668 int offset = 0;
2669 for (branch = NEXT_INSN (label);
2670 !JUMP_P (branch) && !CALL_P (branch);
2671 branch = NEXT_INSN (branch))
2672 if (NONJUMP_INSN_P (branch))
2673 offset += get_attr_length (branch);
2674 if (offset > 0)
2675 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2676 }
2677
2678 spu_var_tracking ();
2679
2680 loop_optimizer_finalize ();
2681
2682 free_bb_for_insn ();
2683
2684 in_spu_reorg = 0;
2685 }
2686 \f
2687
2688 /* Insn scheduling routines, primarily for dual issue. */
2689 static int
2690 spu_sched_issue_rate (void)
2691 {
2692 return 2;
2693 }
2694
2695 static int
2696 uses_ls_unit(rtx insn)
2697 {
2698 rtx set = single_set (insn);
2699 if (set != 0
2700 && (GET_CODE (SET_DEST (set)) == MEM
2701 || GET_CODE (SET_SRC (set)) == MEM))
2702 return 1;
2703 return 0;
2704 }
2705
2706 static int
2707 get_pipe (rtx insn)
2708 {
2709 enum attr_type t;
2710 /* Handle inline asm */
2711 if (INSN_CODE (insn) == -1)
2712 return -1;
2713 t = get_attr_type (insn);
2714 switch (t)
2715 {
2716 case TYPE_CONVERT:
2717 return -2;
2718 case TYPE_MULTI0:
2719 return -1;
2720
2721 case TYPE_FX2:
2722 case TYPE_FX3:
2723 case TYPE_SPR:
2724 case TYPE_NOP:
2725 case TYPE_FXB:
2726 case TYPE_FPD:
2727 case TYPE_FP6:
2728 case TYPE_FP7:
2729 return 0;
2730
2731 case TYPE_LNOP:
2732 case TYPE_SHUF:
2733 case TYPE_LOAD:
2734 case TYPE_STORE:
2735 case TYPE_BR:
2736 case TYPE_MULTI1:
2737 case TYPE_HBR:
2738 case TYPE_IPREFETCH:
2739 return 1;
2740 default:
2741 abort ();
2742 }
2743 }
2744
2745
2746 /* haifa-sched.c has a static variable that keeps track of the current
2747 cycle. It is passed to spu_sched_reorder, and we record it here for
2748 use by spu_sched_variable_issue. It won't be accurate if the
2749 scheduler updates it's clock_var between the two calls. */
2750 static int clock_var;
2751
2752 /* This is used to keep track of insn alignment. Set to 0 at the
2753 beginning of each block and increased by the "length" attr of each
2754 insn scheduled. */
2755 static int spu_sched_length;
2756
2757 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2758 ready list appropriately in spu_sched_reorder(). */
2759 static int pipe0_clock;
2760 static int pipe1_clock;
2761
2762 static int prev_clock_var;
2763
2764 static int prev_priority;
2765
2766 /* The SPU needs to load the next ilb sometime during the execution of
2767 the previous ilb. There is a potential conflict if every cycle has a
2768 load or store. To avoid the conflict we make sure the load/store
2769 unit is free for at least one cycle during the execution of insns in
2770 the previous ilb. */
2771 static int spu_ls_first;
2772 static int prev_ls_clock;
2773
2774 static void
2775 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2776 int max_ready ATTRIBUTE_UNUSED)
2777 {
2778 spu_sched_length = 0;
2779 }
2780
2781 static void
2782 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2783 int max_ready ATTRIBUTE_UNUSED)
2784 {
2785 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2786 {
2787 /* When any block might be at least 8-byte aligned, assume they
2788 will all be at least 8-byte aligned to make sure dual issue
2789 works out correctly. */
2790 spu_sched_length = 0;
2791 }
2792 spu_ls_first = INT_MAX;
2793 clock_var = -1;
2794 prev_ls_clock = -1;
2795 pipe0_clock = -1;
2796 pipe1_clock = -1;
2797 prev_clock_var = -1;
2798 prev_priority = -1;
2799 }
2800
2801 static int
2802 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2803 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2804 {
2805 int len;
2806 int p;
2807 if (GET_CODE (PATTERN (insn)) == USE
2808 || GET_CODE (PATTERN (insn)) == CLOBBER
2809 || (len = get_attr_length (insn)) == 0)
2810 return more;
2811
2812 spu_sched_length += len;
2813
2814 /* Reset on inline asm */
2815 if (INSN_CODE (insn) == -1)
2816 {
2817 spu_ls_first = INT_MAX;
2818 pipe0_clock = -1;
2819 pipe1_clock = -1;
2820 return 0;
2821 }
2822 p = get_pipe (insn);
2823 if (p == 0)
2824 pipe0_clock = clock_var;
2825 else
2826 pipe1_clock = clock_var;
2827
2828 if (in_spu_reorg)
2829 {
2830 if (clock_var - prev_ls_clock > 1
2831 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2832 spu_ls_first = INT_MAX;
2833 if (uses_ls_unit (insn))
2834 {
2835 if (spu_ls_first == INT_MAX)
2836 spu_ls_first = spu_sched_length;
2837 prev_ls_clock = clock_var;
2838 }
2839
2840 /* The scheduler hasn't inserted the nop, but we will later on.
2841 Include those nops in spu_sched_length. */
2842 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2843 spu_sched_length += 4;
2844 prev_clock_var = clock_var;
2845
2846 /* more is -1 when called from spu_sched_reorder for new insns
2847 that don't have INSN_PRIORITY */
2848 if (more >= 0)
2849 prev_priority = INSN_PRIORITY (insn);
2850 }
2851
2852 /* Always try issuing more insns. spu_sched_reorder will decide
2853 when the cycle should be advanced. */
2854 return 1;
2855 }
2856
2857 /* This function is called for both TARGET_SCHED_REORDER and
2858 TARGET_SCHED_REORDER2. */
2859 static int
2860 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2861 rtx *ready, int *nreadyp, int clock)
2862 {
2863 int i, nready = *nreadyp;
2864 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2865 rtx insn;
2866
2867 clock_var = clock;
2868
2869 if (nready <= 0 || pipe1_clock >= clock)
2870 return 0;
2871
2872 /* Find any rtl insns that don't generate assembly insns and schedule
2873 them first. */
2874 for (i = nready - 1; i >= 0; i--)
2875 {
2876 insn = ready[i];
2877 if (INSN_CODE (insn) == -1
2878 || INSN_CODE (insn) == CODE_FOR_blockage
2879 || (INSN_P (insn) && get_attr_length (insn) == 0))
2880 {
2881 ready[i] = ready[nready - 1];
2882 ready[nready - 1] = insn;
2883 return 1;
2884 }
2885 }
2886
2887 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2888 for (i = 0; i < nready; i++)
2889 if (INSN_CODE (ready[i]) != -1)
2890 {
2891 insn = ready[i];
2892 switch (get_attr_type (insn))
2893 {
2894 default:
2895 case TYPE_MULTI0:
2896 case TYPE_CONVERT:
2897 case TYPE_FX2:
2898 case TYPE_FX3:
2899 case TYPE_SPR:
2900 case TYPE_NOP:
2901 case TYPE_FXB:
2902 case TYPE_FPD:
2903 case TYPE_FP6:
2904 case TYPE_FP7:
2905 pipe_0 = i;
2906 break;
2907 case TYPE_LOAD:
2908 case TYPE_STORE:
2909 pipe_ls = i;
2910 case TYPE_LNOP:
2911 case TYPE_SHUF:
2912 case TYPE_BR:
2913 case TYPE_MULTI1:
2914 case TYPE_HBR:
2915 pipe_1 = i;
2916 break;
2917 case TYPE_IPREFETCH:
2918 pipe_hbrp = i;
2919 break;
2920 }
2921 }
2922
2923 /* In the first scheduling phase, schedule loads and stores together
2924 to increase the chance they will get merged during postreload CSE. */
2925 if (!reload_completed && pipe_ls >= 0)
2926 {
2927 insn = ready[pipe_ls];
2928 ready[pipe_ls] = ready[nready - 1];
2929 ready[nready - 1] = insn;
2930 return 1;
2931 }
2932
2933 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2934 if (pipe_hbrp >= 0)
2935 pipe_1 = pipe_hbrp;
2936
2937 /* When we have loads/stores in every cycle of the last 15 insns and
2938 we are about to schedule another load/store, emit an hbrp insn
2939 instead. */
2940 if (in_spu_reorg
2941 && spu_sched_length - spu_ls_first >= 4 * 15
2942 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2943 {
2944 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2945 recog_memoized (insn);
2946 if (pipe0_clock < clock)
2947 PUT_MODE (insn, TImode);
2948 spu_sched_variable_issue (file, verbose, insn, -1);
2949 return 0;
2950 }
2951
2952 /* In general, we want to emit nops to increase dual issue, but dual
2953 issue isn't faster when one of the insns could be scheduled later
2954 without effecting the critical path. We look at INSN_PRIORITY to
2955 make a good guess, but it isn't perfect so -mdual-nops=n can be
2956 used to effect it. */
2957 if (in_spu_reorg && spu_dual_nops < 10)
2958 {
2959 /* When we are at an even address and we are not issuing nops to
2960 improve scheduling then we need to advance the cycle. */
2961 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2962 && (spu_dual_nops == 0
2963 || (pipe_1 != -1
2964 && prev_priority >
2965 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2966 return 0;
2967
2968 /* When at an odd address, schedule the highest priority insn
2969 without considering pipeline. */
2970 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2971 && (spu_dual_nops == 0
2972 || (prev_priority >
2973 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2974 return 1;
2975 }
2976
2977
2978 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2979 pipe0 insn in the ready list, schedule it. */
2980 if (pipe0_clock < clock && pipe_0 >= 0)
2981 schedule_i = pipe_0;
2982
2983 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2984 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2985 else
2986 schedule_i = pipe_1;
2987
2988 if (schedule_i > -1)
2989 {
2990 insn = ready[schedule_i];
2991 ready[schedule_i] = ready[nready - 1];
2992 ready[nready - 1] = insn;
2993 return 1;
2994 }
2995 return 0;
2996 }
2997
2998 /* INSN is dependent on DEP_INSN. */
2999 static int
3000 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3001 {
3002 rtx set;
3003
3004 /* The blockage pattern is used to prevent instructions from being
3005 moved across it and has no cost. */
3006 if (INSN_CODE (insn) == CODE_FOR_blockage
3007 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3008 return 0;
3009
3010 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3011 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3012 return 0;
3013
3014 /* Make sure hbrps are spread out. */
3015 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3016 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3017 return 8;
3018
3019 /* Make sure hints and hbrps are 2 cycles apart. */
3020 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3021 || INSN_CODE (insn) == CODE_FOR_hbr)
3022 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3023 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3024 return 2;
3025
3026 /* An hbrp has no real dependency on other insns. */
3027 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3028 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3029 return 0;
3030
3031 /* Assuming that it is unlikely an argument register will be used in
3032 the first cycle of the called function, we reduce the cost for
3033 slightly better scheduling of dep_insn. When not hinted, the
3034 mispredicted branch would hide the cost as well. */
3035 if (CALL_P (insn))
3036 {
3037 rtx target = get_branch_target (insn);
3038 if (GET_CODE (target) != REG || !set_of (target, insn))
3039 return cost - 2;
3040 return cost;
3041 }
3042
3043 /* And when returning from a function, let's assume the return values
3044 are completed sooner too. */
3045 if (CALL_P (dep_insn))
3046 return cost - 2;
3047
3048 /* Make sure an instruction that loads from the back chain is schedule
3049 away from the return instruction so a hint is more likely to get
3050 issued. */
3051 if (INSN_CODE (insn) == CODE_FOR__return
3052 && (set = single_set (dep_insn))
3053 && GET_CODE (SET_DEST (set)) == REG
3054 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3055 return 20;
3056
3057 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3058 scheduler makes every insn in a block anti-dependent on the final
3059 jump_insn. We adjust here so higher cost insns will get scheduled
3060 earlier. */
3061 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3062 return insn_cost (dep_insn) - 3;
3063
3064 return cost;
3065 }
3066 \f
3067 /* Create a CONST_DOUBLE from a string. */
3068 rtx
3069 spu_float_const (const char *string, enum machine_mode mode)
3070 {
3071 REAL_VALUE_TYPE value;
3072 value = REAL_VALUE_ATOF (string, mode);
3073 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3074 }
3075
3076 int
3077 spu_constant_address_p (rtx x)
3078 {
3079 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3080 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3081 || GET_CODE (x) == HIGH);
3082 }
3083
3084 static enum spu_immediate
3085 which_immediate_load (HOST_WIDE_INT val)
3086 {
3087 gcc_assert (val == trunc_int_for_mode (val, SImode));
3088
3089 if (val >= -0x8000 && val <= 0x7fff)
3090 return SPU_IL;
3091 if (val >= 0 && val <= 0x3ffff)
3092 return SPU_ILA;
3093 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3094 return SPU_ILH;
3095 if ((val & 0xffff) == 0)
3096 return SPU_ILHU;
3097
3098 return SPU_NONE;
3099 }
3100
3101 /* Return true when OP can be loaded by one of the il instructions, or
3102 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3103 int
3104 immediate_load_p (rtx op, enum machine_mode mode)
3105 {
3106 if (CONSTANT_P (op))
3107 {
3108 enum immediate_class c = classify_immediate (op, mode);
3109 return c == IC_IL1 || c == IC_IL1s
3110 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3111 }
3112 return 0;
3113 }
3114
3115 /* Return true if the first SIZE bytes of arr is a constant that can be
3116 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3117 represent the size and offset of the instruction to use. */
3118 static int
3119 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3120 {
3121 int cpat, run, i, start;
3122 cpat = 1;
3123 run = 0;
3124 start = -1;
3125 for (i = 0; i < size && cpat; i++)
3126 if (arr[i] != i+16)
3127 {
3128 if (!run)
3129 {
3130 start = i;
3131 if (arr[i] == 3)
3132 run = 1;
3133 else if (arr[i] == 2 && arr[i+1] == 3)
3134 run = 2;
3135 else if (arr[i] == 0)
3136 {
3137 while (arr[i+run] == run && i+run < 16)
3138 run++;
3139 if (run != 4 && run != 8)
3140 cpat = 0;
3141 }
3142 else
3143 cpat = 0;
3144 if ((i & (run-1)) != 0)
3145 cpat = 0;
3146 i += run;
3147 }
3148 else
3149 cpat = 0;
3150 }
3151 if (cpat && (run || size < 16))
3152 {
3153 if (run == 0)
3154 run = 1;
3155 if (prun)
3156 *prun = run;
3157 if (pstart)
3158 *pstart = start == -1 ? 16-run : start;
3159 return 1;
3160 }
3161 return 0;
3162 }
3163
3164 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3165 it into a register. MODE is only valid when OP is a CONST_INT. */
3166 static enum immediate_class
3167 classify_immediate (rtx op, enum machine_mode mode)
3168 {
3169 HOST_WIDE_INT val;
3170 unsigned char arr[16];
3171 int i, j, repeated, fsmbi, repeat;
3172
3173 gcc_assert (CONSTANT_P (op));
3174
3175 if (GET_MODE (op) != VOIDmode)
3176 mode = GET_MODE (op);
3177
3178 /* A V4SI const_vector with all identical symbols is ok. */
3179 if (!flag_pic
3180 && mode == V4SImode
3181 && GET_CODE (op) == CONST_VECTOR
3182 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3183 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3184 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3185 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3186 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3187 op = CONST_VECTOR_ELT (op, 0);
3188
3189 switch (GET_CODE (op))
3190 {
3191 case SYMBOL_REF:
3192 case LABEL_REF:
3193 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3194
3195 case CONST:
3196 /* We can never know if the resulting address fits in 18 bits and can be
3197 loaded with ila. For now, assume the address will not overflow if
3198 the displacement is "small" (fits 'K' constraint). */
3199 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3200 {
3201 rtx sym = XEXP (XEXP (op, 0), 0);
3202 rtx cst = XEXP (XEXP (op, 0), 1);
3203
3204 if (GET_CODE (sym) == SYMBOL_REF
3205 && GET_CODE (cst) == CONST_INT
3206 && satisfies_constraint_K (cst))
3207 return IC_IL1s;
3208 }
3209 return IC_IL2s;
3210
3211 case HIGH:
3212 return IC_IL1s;
3213
3214 case CONST_VECTOR:
3215 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3216 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3217 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3218 return IC_POOL;
3219 /* Fall through. */
3220
3221 case CONST_INT:
3222 case CONST_DOUBLE:
3223 constant_to_array (mode, op, arr);
3224
3225 /* Check that each 4-byte slot is identical. */
3226 repeated = 1;
3227 for (i = 4; i < 16; i += 4)
3228 for (j = 0; j < 4; j++)
3229 if (arr[j] != arr[i + j])
3230 repeated = 0;
3231
3232 if (repeated)
3233 {
3234 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3235 val = trunc_int_for_mode (val, SImode);
3236
3237 if (which_immediate_load (val) != SPU_NONE)
3238 return IC_IL1;
3239 }
3240
3241 /* Any mode of 2 bytes or smaller can be loaded with an il
3242 instruction. */
3243 gcc_assert (GET_MODE_SIZE (mode) > 2);
3244
3245 fsmbi = 1;
3246 repeat = 0;
3247 for (i = 0; i < 16 && fsmbi; i++)
3248 if (arr[i] != 0 && repeat == 0)
3249 repeat = arr[i];
3250 else if (arr[i] != 0 && arr[i] != repeat)
3251 fsmbi = 0;
3252 if (fsmbi)
3253 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3254
3255 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3256 return IC_CPAT;
3257
3258 if (repeated)
3259 return IC_IL2;
3260
3261 return IC_POOL;
3262 default:
3263 break;
3264 }
3265 gcc_unreachable ();
3266 }
3267
3268 static enum spu_immediate
3269 which_logical_immediate (HOST_WIDE_INT val)
3270 {
3271 gcc_assert (val == trunc_int_for_mode (val, SImode));
3272
3273 if (val >= -0x200 && val <= 0x1ff)
3274 return SPU_ORI;
3275 if (val >= 0 && val <= 0xffff)
3276 return SPU_IOHL;
3277 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3278 {
3279 val = trunc_int_for_mode (val, HImode);
3280 if (val >= -0x200 && val <= 0x1ff)
3281 return SPU_ORHI;
3282 if ((val & 0xff) == ((val >> 8) & 0xff))
3283 {
3284 val = trunc_int_for_mode (val, QImode);
3285 if (val >= -0x200 && val <= 0x1ff)
3286 return SPU_ORBI;
3287 }
3288 }
3289 return SPU_NONE;
3290 }
3291
3292 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3293 CONST_DOUBLEs. */
3294 static int
3295 const_vector_immediate_p (rtx x)
3296 {
3297 int i;
3298 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3299 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3300 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3301 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3302 return 0;
3303 return 1;
3304 }
3305
3306 int
3307 logical_immediate_p (rtx op, enum machine_mode mode)
3308 {
3309 HOST_WIDE_INT val;
3310 unsigned char arr[16];
3311 int i, j;
3312
3313 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3314 || GET_CODE (op) == CONST_VECTOR);
3315
3316 if (GET_CODE (op) == CONST_VECTOR
3317 && !const_vector_immediate_p (op))
3318 return 0;
3319
3320 if (GET_MODE (op) != VOIDmode)
3321 mode = GET_MODE (op);
3322
3323 constant_to_array (mode, op, arr);
3324
3325 /* Check that bytes are repeated. */
3326 for (i = 4; i < 16; i += 4)
3327 for (j = 0; j < 4; j++)
3328 if (arr[j] != arr[i + j])
3329 return 0;
3330
3331 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3332 val = trunc_int_for_mode (val, SImode);
3333
3334 i = which_logical_immediate (val);
3335 return i != SPU_NONE && i != SPU_IOHL;
3336 }
3337
3338 int
3339 iohl_immediate_p (rtx op, enum machine_mode mode)
3340 {
3341 HOST_WIDE_INT val;
3342 unsigned char arr[16];
3343 int i, j;
3344
3345 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3346 || GET_CODE (op) == CONST_VECTOR);
3347
3348 if (GET_CODE (op) == CONST_VECTOR
3349 && !const_vector_immediate_p (op))
3350 return 0;
3351
3352 if (GET_MODE (op) != VOIDmode)
3353 mode = GET_MODE (op);
3354
3355 constant_to_array (mode, op, arr);
3356
3357 /* Check that bytes are repeated. */
3358 for (i = 4; i < 16; i += 4)
3359 for (j = 0; j < 4; j++)
3360 if (arr[j] != arr[i + j])
3361 return 0;
3362
3363 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3364 val = trunc_int_for_mode (val, SImode);
3365
3366 return val >= 0 && val <= 0xffff;
3367 }
3368
3369 int
3370 arith_immediate_p (rtx op, enum machine_mode mode,
3371 HOST_WIDE_INT low, HOST_WIDE_INT high)
3372 {
3373 HOST_WIDE_INT val;
3374 unsigned char arr[16];
3375 int bytes, i, j;
3376
3377 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3378 || GET_CODE (op) == CONST_VECTOR);
3379
3380 if (GET_CODE (op) == CONST_VECTOR
3381 && !const_vector_immediate_p (op))
3382 return 0;
3383
3384 if (GET_MODE (op) != VOIDmode)
3385 mode = GET_MODE (op);
3386
3387 constant_to_array (mode, op, arr);
3388
3389 if (VECTOR_MODE_P (mode))
3390 mode = GET_MODE_INNER (mode);
3391
3392 bytes = GET_MODE_SIZE (mode);
3393 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3394
3395 /* Check that bytes are repeated. */
3396 for (i = bytes; i < 16; i += bytes)
3397 for (j = 0; j < bytes; j++)
3398 if (arr[j] != arr[i + j])
3399 return 0;
3400
3401 val = arr[0];
3402 for (j = 1; j < bytes; j++)
3403 val = (val << 8) | arr[j];
3404
3405 val = trunc_int_for_mode (val, mode);
3406
3407 return val >= low && val <= high;
3408 }
3409
3410 /* TRUE when op is an immediate and an exact power of 2, and given that
3411 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3412 all entries must be the same. */
3413 bool
3414 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3415 {
3416 enum machine_mode int_mode;
3417 HOST_WIDE_INT val;
3418 unsigned char arr[16];
3419 int bytes, i, j;
3420
3421 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3422 || GET_CODE (op) == CONST_VECTOR);
3423
3424 if (GET_CODE (op) == CONST_VECTOR
3425 && !const_vector_immediate_p (op))
3426 return 0;
3427
3428 if (GET_MODE (op) != VOIDmode)
3429 mode = GET_MODE (op);
3430
3431 constant_to_array (mode, op, arr);
3432
3433 if (VECTOR_MODE_P (mode))
3434 mode = GET_MODE_INNER (mode);
3435
3436 bytes = GET_MODE_SIZE (mode);
3437 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3438
3439 /* Check that bytes are repeated. */
3440 for (i = bytes; i < 16; i += bytes)
3441 for (j = 0; j < bytes; j++)
3442 if (arr[j] != arr[i + j])
3443 return 0;
3444
3445 val = arr[0];
3446 for (j = 1; j < bytes; j++)
3447 val = (val << 8) | arr[j];
3448
3449 val = trunc_int_for_mode (val, int_mode);
3450
3451 /* Currently, we only handle SFmode */
3452 gcc_assert (mode == SFmode);
3453 if (mode == SFmode)
3454 {
3455 int exp = (val >> 23) - 127;
3456 return val > 0 && (val & 0x007fffff) == 0
3457 && exp >= low && exp <= high;
3458 }
3459 return FALSE;
3460 }
3461
3462 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3463
3464 static int
3465 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3466 {
3467 rtx x = *px;
3468 tree decl;
3469
3470 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3471 {
3472 rtx plus = XEXP (x, 0);
3473 rtx op0 = XEXP (plus, 0);
3474 rtx op1 = XEXP (plus, 1);
3475 if (GET_CODE (op1) == CONST_INT)
3476 x = op0;
3477 }
3478
3479 return (GET_CODE (x) == SYMBOL_REF
3480 && (decl = SYMBOL_REF_DECL (x)) != 0
3481 && TREE_CODE (decl) == VAR_DECL
3482 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3483 }
3484
3485 /* We accept:
3486 - any 32-bit constant (SImode, SFmode)
3487 - any constant that can be generated with fsmbi (any mode)
3488 - a 64-bit constant where the high and low bits are identical
3489 (DImode, DFmode)
3490 - a 128-bit constant where the four 32-bit words match. */
3491 bool
3492 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3493 {
3494 if (GET_CODE (x) == HIGH)
3495 x = XEXP (x, 0);
3496
3497 /* Reject any __ea qualified reference. These can't appear in
3498 instructions but must be forced to the constant pool. */
3499 if (for_each_rtx (&x, ea_symbol_ref, 0))
3500 return 0;
3501
3502 /* V4SI with all identical symbols is valid. */
3503 if (!flag_pic
3504 && mode == V4SImode
3505 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3506 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3507 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3508 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3509 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3510 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3511
3512 if (GET_CODE (x) == CONST_VECTOR
3513 && !const_vector_immediate_p (x))
3514 return 0;
3515 return 1;
3516 }
3517
3518 /* Valid address are:
3519 - symbol_ref, label_ref, const
3520 - reg
3521 - reg + const_int, where const_int is 16 byte aligned
3522 - reg + reg, alignment doesn't matter
3523 The alignment matters in the reg+const case because lqd and stqd
3524 ignore the 4 least significant bits of the const. We only care about
3525 16 byte modes because the expand phase will change all smaller MEM
3526 references to TImode. */
3527 static bool
3528 spu_legitimate_address_p (enum machine_mode mode,
3529 rtx x, bool reg_ok_strict)
3530 {
3531 int aligned = GET_MODE_SIZE (mode) >= 16;
3532 if (aligned
3533 && GET_CODE (x) == AND
3534 && GET_CODE (XEXP (x, 1)) == CONST_INT
3535 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3536 x = XEXP (x, 0);
3537 switch (GET_CODE (x))
3538 {
3539 case LABEL_REF:
3540 return !TARGET_LARGE_MEM;
3541
3542 case SYMBOL_REF:
3543 case CONST:
3544 /* Keep __ea references until reload so that spu_expand_mov can see them
3545 in MEMs. */
3546 if (ea_symbol_ref (&x, 0))
3547 return !reload_in_progress && !reload_completed;
3548 return !TARGET_LARGE_MEM;
3549
3550 case CONST_INT:
3551 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3552
3553 case SUBREG:
3554 x = XEXP (x, 0);
3555 if (REG_P (x))
3556 return 0;
3557
3558 case REG:
3559 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3560
3561 case PLUS:
3562 case LO_SUM:
3563 {
3564 rtx op0 = XEXP (x, 0);
3565 rtx op1 = XEXP (x, 1);
3566 if (GET_CODE (op0) == SUBREG)
3567 op0 = XEXP (op0, 0);
3568 if (GET_CODE (op1) == SUBREG)
3569 op1 = XEXP (op1, 0);
3570 if (GET_CODE (op0) == REG
3571 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3572 && GET_CODE (op1) == CONST_INT
3573 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3574 /* If virtual registers are involved, the displacement will
3575 change later on anyway, so checking would be premature.
3576 Reload will make sure the final displacement after
3577 register elimination is OK. */
3578 || op0 == arg_pointer_rtx
3579 || op0 == frame_pointer_rtx
3580 || op0 == virtual_stack_vars_rtx)
3581 && (!aligned || (INTVAL (op1) & 15) == 0))
3582 return TRUE;
3583 if (GET_CODE (op0) == REG
3584 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3585 && GET_CODE (op1) == REG
3586 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3587 return TRUE;
3588 }
3589 break;
3590
3591 default:
3592 break;
3593 }
3594 return FALSE;
3595 }
3596
3597 /* Like spu_legitimate_address_p, except with named addresses. */
3598 static bool
3599 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3600 bool reg_ok_strict, addr_space_t as)
3601 {
3602 if (as == ADDR_SPACE_EA)
3603 return (REG_P (x) && (GET_MODE (x) == EAmode));
3604
3605 else if (as != ADDR_SPACE_GENERIC)
3606 gcc_unreachable ();
3607
3608 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3609 }
3610
3611 /* When the address is reg + const_int, force the const_int into a
3612 register. */
3613 static rtx
3614 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3615 enum machine_mode mode ATTRIBUTE_UNUSED)
3616 {
3617 rtx op0, op1;
3618 /* Make sure both operands are registers. */
3619 if (GET_CODE (x) == PLUS)
3620 {
3621 op0 = XEXP (x, 0);
3622 op1 = XEXP (x, 1);
3623 if (ALIGNED_SYMBOL_REF_P (op0))
3624 {
3625 op0 = force_reg (Pmode, op0);
3626 mark_reg_pointer (op0, 128);
3627 }
3628 else if (GET_CODE (op0) != REG)
3629 op0 = force_reg (Pmode, op0);
3630 if (ALIGNED_SYMBOL_REF_P (op1))
3631 {
3632 op1 = force_reg (Pmode, op1);
3633 mark_reg_pointer (op1, 128);
3634 }
3635 else if (GET_CODE (op1) != REG)
3636 op1 = force_reg (Pmode, op1);
3637 x = gen_rtx_PLUS (Pmode, op0, op1);
3638 }
3639 return x;
3640 }
3641
3642 /* Like spu_legitimate_address, except with named address support. */
3643 static rtx
3644 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3645 addr_space_t as)
3646 {
3647 if (as != ADDR_SPACE_GENERIC)
3648 return x;
3649
3650 return spu_legitimize_address (x, oldx, mode);
3651 }
3652
3653 /* Reload reg + const_int for out-of-range displacements. */
3654 rtx
3655 spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3656 int opnum, int type)
3657 {
3658 bool removed_and = false;
3659
3660 if (GET_CODE (ad) == AND
3661 && CONST_INT_P (XEXP (ad, 1))
3662 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3663 {
3664 ad = XEXP (ad, 0);
3665 removed_and = true;
3666 }
3667
3668 if (GET_CODE (ad) == PLUS
3669 && REG_P (XEXP (ad, 0))
3670 && CONST_INT_P (XEXP (ad, 1))
3671 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3672 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3673 {
3674 /* Unshare the sum. */
3675 ad = copy_rtx (ad);
3676
3677 /* Reload the displacement. */
3678 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3679 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3680 opnum, (enum reload_type) type);
3681
3682 /* Add back AND for alignment if we stripped it. */
3683 if (removed_and)
3684 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3685
3686 return ad;
3687 }
3688
3689 return NULL_RTX;
3690 }
3691
3692 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3693 struct attribute_spec.handler. */
3694 static tree
3695 spu_handle_fndecl_attribute (tree * node,
3696 tree name,
3697 tree args ATTRIBUTE_UNUSED,
3698 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3699 {
3700 if (TREE_CODE (*node) != FUNCTION_DECL)
3701 {
3702 warning (0, "%qE attribute only applies to functions",
3703 name);
3704 *no_add_attrs = true;
3705 }
3706
3707 return NULL_TREE;
3708 }
3709
3710 /* Handle the "vector" attribute. */
3711 static tree
3712 spu_handle_vector_attribute (tree * node, tree name,
3713 tree args ATTRIBUTE_UNUSED,
3714 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3715 {
3716 tree type = *node, result = NULL_TREE;
3717 enum machine_mode mode;
3718 int unsigned_p;
3719
3720 while (POINTER_TYPE_P (type)
3721 || TREE_CODE (type) == FUNCTION_TYPE
3722 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3723 type = TREE_TYPE (type);
3724
3725 mode = TYPE_MODE (type);
3726
3727 unsigned_p = TYPE_UNSIGNED (type);
3728 switch (mode)
3729 {
3730 case DImode:
3731 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3732 break;
3733 case SImode:
3734 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3735 break;
3736 case HImode:
3737 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3738 break;
3739 case QImode:
3740 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3741 break;
3742 case SFmode:
3743 result = V4SF_type_node;
3744 break;
3745 case DFmode:
3746 result = V2DF_type_node;
3747 break;
3748 default:
3749 break;
3750 }
3751
3752 /* Propagate qualifiers attached to the element type
3753 onto the vector type. */
3754 if (result && result != type && TYPE_QUALS (type))
3755 result = build_qualified_type (result, TYPE_QUALS (type));
3756
3757 *no_add_attrs = true; /* No need to hang on to the attribute. */
3758
3759 if (!result)
3760 warning (0, "%qE attribute ignored", name);
3761 else
3762 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3763
3764 return NULL_TREE;
3765 }
3766
3767 /* Return nonzero if FUNC is a naked function. */
3768 static int
3769 spu_naked_function_p (tree func)
3770 {
3771 tree a;
3772
3773 if (TREE_CODE (func) != FUNCTION_DECL)
3774 abort ();
3775
3776 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3777 return a != NULL_TREE;
3778 }
3779
3780 int
3781 spu_initial_elimination_offset (int from, int to)
3782 {
3783 int saved_regs_size = spu_saved_regs_size ();
3784 int sp_offset = 0;
3785 if (!crtl->is_leaf || crtl->outgoing_args_size
3786 || get_frame_size () || saved_regs_size)
3787 sp_offset = STACK_POINTER_OFFSET;
3788 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3789 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3790 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3791 return get_frame_size ();
3792 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3793 return sp_offset + crtl->outgoing_args_size
3794 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3795 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3796 return get_frame_size () + saved_regs_size + sp_offset;
3797 else
3798 gcc_unreachable ();
3799 }
3800
3801 rtx
3802 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3803 {
3804 enum machine_mode mode = TYPE_MODE (type);
3805 int byte_size = ((mode == BLKmode)
3806 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3807
3808 /* Make sure small structs are left justified in a register. */
3809 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3810 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3811 {
3812 enum machine_mode smode;
3813 rtvec v;
3814 int i;
3815 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3816 int n = byte_size / UNITS_PER_WORD;
3817 v = rtvec_alloc (nregs);
3818 for (i = 0; i < n; i++)
3819 {
3820 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3821 gen_rtx_REG (TImode,
3822 FIRST_RETURN_REGNUM
3823 + i),
3824 GEN_INT (UNITS_PER_WORD * i));
3825 byte_size -= UNITS_PER_WORD;
3826 }
3827
3828 if (n < nregs)
3829 {
3830 if (byte_size < 4)
3831 byte_size = 4;
3832 smode =
3833 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3834 RTVEC_ELT (v, n) =
3835 gen_rtx_EXPR_LIST (VOIDmode,
3836 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3837 GEN_INT (UNITS_PER_WORD * n));
3838 }
3839 return gen_rtx_PARALLEL (mode, v);
3840 }
3841 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3842 }
3843
3844 static rtx
3845 spu_function_arg (cumulative_args_t cum_v,
3846 enum machine_mode mode,
3847 const_tree type, bool named ATTRIBUTE_UNUSED)
3848 {
3849 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3850 int byte_size;
3851
3852 if (*cum >= MAX_REGISTER_ARGS)
3853 return 0;
3854
3855 byte_size = ((mode == BLKmode)
3856 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3857
3858 /* The ABI does not allow parameters to be passed partially in
3859 reg and partially in stack. */
3860 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3861 return 0;
3862
3863 /* Make sure small structs are left justified in a register. */
3864 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3865 && byte_size < UNITS_PER_WORD && byte_size > 0)
3866 {
3867 enum machine_mode smode;
3868 rtx gr_reg;
3869 if (byte_size < 4)
3870 byte_size = 4;
3871 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3872 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3873 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3874 const0_rtx);
3875 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3876 }
3877 else
3878 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3879 }
3880
3881 static void
3882 spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
3883 const_tree type, bool named ATTRIBUTE_UNUSED)
3884 {
3885 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3886
3887 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3888 ? 1
3889 : mode == BLKmode
3890 ? ((int_size_in_bytes (type) + 15) / 16)
3891 : mode == VOIDmode
3892 ? 1
3893 : HARD_REGNO_NREGS (cum, mode));
3894 }
3895
3896 /* Variable sized types are passed by reference. */
3897 static bool
3898 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3899 enum machine_mode mode ATTRIBUTE_UNUSED,
3900 const_tree type, bool named ATTRIBUTE_UNUSED)
3901 {
3902 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3903 }
3904 \f
3905
3906 /* Var args. */
3907
3908 /* Create and return the va_list datatype.
3909
3910 On SPU, va_list is an array type equivalent to
3911
3912 typedef struct __va_list_tag
3913 {
3914 void *__args __attribute__((__aligned(16)));
3915 void *__skip __attribute__((__aligned(16)));
3916
3917 } va_list[1];
3918
3919 where __args points to the arg that will be returned by the next
3920 va_arg(), and __skip points to the previous stack frame such that
3921 when __args == __skip we should advance __args by 32 bytes. */
3922 static tree
3923 spu_build_builtin_va_list (void)
3924 {
3925 tree f_args, f_skip, record, type_decl;
3926 bool owp;
3927
3928 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3929
3930 type_decl =
3931 build_decl (BUILTINS_LOCATION,
3932 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3933
3934 f_args = build_decl (BUILTINS_LOCATION,
3935 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3936 f_skip = build_decl (BUILTINS_LOCATION,
3937 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3938
3939 DECL_FIELD_CONTEXT (f_args) = record;
3940 DECL_ALIGN (f_args) = 128;
3941 DECL_USER_ALIGN (f_args) = 1;
3942
3943 DECL_FIELD_CONTEXT (f_skip) = record;
3944 DECL_ALIGN (f_skip) = 128;
3945 DECL_USER_ALIGN (f_skip) = 1;
3946
3947 TYPE_STUB_DECL (record) = type_decl;
3948 TYPE_NAME (record) = type_decl;
3949 TYPE_FIELDS (record) = f_args;
3950 DECL_CHAIN (f_args) = f_skip;
3951
3952 /* We know this is being padded and we want it too. It is an internal
3953 type so hide the warnings from the user. */
3954 owp = warn_padded;
3955 warn_padded = false;
3956
3957 layout_type (record);
3958
3959 warn_padded = owp;
3960
3961 /* The correct type is an array type of one element. */
3962 return build_array_type (record, build_index_type (size_zero_node));
3963 }
3964
3965 /* Implement va_start by filling the va_list structure VALIST.
3966 NEXTARG points to the first anonymous stack argument.
3967
3968 The following global variables are used to initialize
3969 the va_list structure:
3970
3971 crtl->args.info;
3972 the CUMULATIVE_ARGS for this function
3973
3974 crtl->args.arg_offset_rtx:
3975 holds the offset of the first anonymous stack argument
3976 (relative to the virtual arg pointer). */
3977
3978 static void
3979 spu_va_start (tree valist, rtx nextarg)
3980 {
3981 tree f_args, f_skip;
3982 tree args, skip, t;
3983
3984 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3985 f_skip = DECL_CHAIN (f_args);
3986
3987 valist = build_simple_mem_ref (valist);
3988 args =
3989 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3990 skip =
3991 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3992
3993 /* Find the __args area. */
3994 t = make_tree (TREE_TYPE (args), nextarg);
3995 if (crtl->args.pretend_args_size > 0)
3996 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3997 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3998 TREE_SIDE_EFFECTS (t) = 1;
3999 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4000
4001 /* Find the __skip area. */
4002 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4003 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4004 - STACK_POINTER_OFFSET));
4005 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4006 TREE_SIDE_EFFECTS (t) = 1;
4007 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4008 }
4009
4010 /* Gimplify va_arg by updating the va_list structure
4011 VALIST as required to retrieve an argument of type
4012 TYPE, and returning that argument.
4013
4014 ret = va_arg(VALIST, TYPE);
4015
4016 generates code equivalent to:
4017
4018 paddedsize = (sizeof(TYPE) + 15) & -16;
4019 if (VALIST.__args + paddedsize > VALIST.__skip
4020 && VALIST.__args <= VALIST.__skip)
4021 addr = VALIST.__skip + 32;
4022 else
4023 addr = VALIST.__args;
4024 VALIST.__args = addr + paddedsize;
4025 ret = *(TYPE *)addr;
4026 */
4027 static tree
4028 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4029 gimple_seq * post_p ATTRIBUTE_UNUSED)
4030 {
4031 tree f_args, f_skip;
4032 tree args, skip;
4033 HOST_WIDE_INT size, rsize;
4034 tree addr, tmp;
4035 bool pass_by_reference_p;
4036
4037 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4038 f_skip = DECL_CHAIN (f_args);
4039
4040 valist = build_simple_mem_ref (valist);
4041 args =
4042 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4043 skip =
4044 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4045
4046 addr = create_tmp_var (ptr_type_node, "va_arg");
4047
4048 /* if an object is dynamically sized, a pointer to it is passed
4049 instead of the object itself. */
4050 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4051 false);
4052 if (pass_by_reference_p)
4053 type = build_pointer_type (type);
4054 size = int_size_in_bytes (type);
4055 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4056
4057 /* build conditional expression to calculate addr. The expression
4058 will be gimplified later. */
4059 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4060 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4061 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4062 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4063 unshare_expr (skip)));
4064
4065 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4066 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4067 unshare_expr (args));
4068
4069 gimplify_assign (addr, tmp, pre_p);
4070
4071 /* update VALIST.__args */
4072 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4073 gimplify_assign (unshare_expr (args), tmp, pre_p);
4074
4075 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4076 addr);
4077
4078 if (pass_by_reference_p)
4079 addr = build_va_arg_indirect_ref (addr);
4080
4081 return build_va_arg_indirect_ref (addr);
4082 }
4083
4084 /* Save parameter registers starting with the register that corresponds
4085 to the first unnamed parameters. If the first unnamed parameter is
4086 in the stack then save no registers. Set pretend_args_size to the
4087 amount of space needed to save the registers. */
4088 static void
4089 spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4090 tree type, int *pretend_size, int no_rtl)
4091 {
4092 if (!no_rtl)
4093 {
4094 rtx tmp;
4095 int regno;
4096 int offset;
4097 int ncum = *get_cumulative_args (cum);
4098
4099 /* cum currently points to the last named argument, we want to
4100 start at the next argument. */
4101 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4102
4103 offset = -STACK_POINTER_OFFSET;
4104 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4105 {
4106 tmp = gen_frame_mem (V4SImode,
4107 plus_constant (Pmode, virtual_incoming_args_rtx,
4108 offset));
4109 emit_move_insn (tmp,
4110 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4111 offset += 16;
4112 }
4113 *pretend_size = offset + STACK_POINTER_OFFSET;
4114 }
4115 }
4116 \f
4117 static void
4118 spu_conditional_register_usage (void)
4119 {
4120 if (flag_pic)
4121 {
4122 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4123 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4124 }
4125 }
4126
4127 /* This is called any time we inspect the alignment of a register for
4128 addresses. */
4129 static int
4130 reg_aligned_for_addr (rtx x)
4131 {
4132 int regno =
4133 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4134 return REGNO_POINTER_ALIGN (regno) >= 128;
4135 }
4136
4137 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4138 into its SYMBOL_REF_FLAGS. */
4139 static void
4140 spu_encode_section_info (tree decl, rtx rtl, int first)
4141 {
4142 default_encode_section_info (decl, rtl, first);
4143
4144 /* If a variable has a forced alignment to < 16 bytes, mark it with
4145 SYMBOL_FLAG_ALIGN1. */
4146 if (TREE_CODE (decl) == VAR_DECL
4147 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4148 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4149 }
4150
4151 /* Return TRUE if we are certain the mem refers to a complete object
4152 which is both 16-byte aligned and padded to a 16-byte boundary. This
4153 would make it safe to store with a single instruction.
4154 We guarantee the alignment and padding for static objects by aligning
4155 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4156 FIXME: We currently cannot guarantee this for objects on the stack
4157 because assign_parm_setup_stack calls assign_stack_local with the
4158 alignment of the parameter mode and in that case the alignment never
4159 gets adjusted by LOCAL_ALIGNMENT. */
4160 static int
4161 store_with_one_insn_p (rtx mem)
4162 {
4163 enum machine_mode mode = GET_MODE (mem);
4164 rtx addr = XEXP (mem, 0);
4165 if (mode == BLKmode)
4166 return 0;
4167 if (GET_MODE_SIZE (mode) >= 16)
4168 return 1;
4169 /* Only static objects. */
4170 if (GET_CODE (addr) == SYMBOL_REF)
4171 {
4172 /* We use the associated declaration to make sure the access is
4173 referring to the whole object.
4174 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4175 if it is necessary. Will there be cases where one exists, and
4176 the other does not? Will there be cases where both exist, but
4177 have different types? */
4178 tree decl = MEM_EXPR (mem);
4179 if (decl
4180 && TREE_CODE (decl) == VAR_DECL
4181 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4182 return 1;
4183 decl = SYMBOL_REF_DECL (addr);
4184 if (decl
4185 && TREE_CODE (decl) == VAR_DECL
4186 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4187 return 1;
4188 }
4189 return 0;
4190 }
4191
4192 /* Return 1 when the address is not valid for a simple load and store as
4193 required by the '_mov*' patterns. We could make this less strict
4194 for loads, but we prefer mem's to look the same so they are more
4195 likely to be merged. */
4196 static int
4197 address_needs_split (rtx mem)
4198 {
4199 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4200 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4201 || !(store_with_one_insn_p (mem)
4202 || mem_is_padded_component_ref (mem))))
4203 return 1;
4204
4205 return 0;
4206 }
4207
4208 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4209 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4210 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4211
4212 /* MEM is known to be an __ea qualified memory access. Emit a call to
4213 fetch the ppu memory to local store, and return its address in local
4214 store. */
4215
4216 static void
4217 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4218 {
4219 if (is_store)
4220 {
4221 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4222 if (!cache_fetch_dirty)
4223 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4224 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4225 2, ea_addr, EAmode, ndirty, SImode);
4226 }
4227 else
4228 {
4229 if (!cache_fetch)
4230 cache_fetch = init_one_libfunc ("__cache_fetch");
4231 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4232 1, ea_addr, EAmode);
4233 }
4234 }
4235
4236 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4237 dirty bit marking, inline.
4238
4239 The cache control data structure is an array of
4240
4241 struct __cache_tag_array
4242 {
4243 unsigned int tag_lo[4];
4244 unsigned int tag_hi[4];
4245 void *data_pointer[4];
4246 int reserved[4];
4247 vector unsigned short dirty_bits[4];
4248 } */
4249
4250 static void
4251 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4252 {
4253 rtx ea_addr_si;
4254 HOST_WIDE_INT v;
4255 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4256 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4257 rtx index_mask = gen_reg_rtx (SImode);
4258 rtx tag_arr = gen_reg_rtx (Pmode);
4259 rtx splat_mask = gen_reg_rtx (TImode);
4260 rtx splat = gen_reg_rtx (V4SImode);
4261 rtx splat_hi = NULL_RTX;
4262 rtx tag_index = gen_reg_rtx (Pmode);
4263 rtx block_off = gen_reg_rtx (SImode);
4264 rtx tag_addr = gen_reg_rtx (Pmode);
4265 rtx tag = gen_reg_rtx (V4SImode);
4266 rtx cache_tag = gen_reg_rtx (V4SImode);
4267 rtx cache_tag_hi = NULL_RTX;
4268 rtx cache_ptrs = gen_reg_rtx (TImode);
4269 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4270 rtx tag_equal = gen_reg_rtx (V4SImode);
4271 rtx tag_equal_hi = NULL_RTX;
4272 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4273 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4274 rtx eq_index = gen_reg_rtx (SImode);
4275 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4276
4277 if (spu_ea_model != 32)
4278 {
4279 splat_hi = gen_reg_rtx (V4SImode);
4280 cache_tag_hi = gen_reg_rtx (V4SImode);
4281 tag_equal_hi = gen_reg_rtx (V4SImode);
4282 }
4283
4284 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4285 emit_move_insn (tag_arr, tag_arr_sym);
4286 v = 0x0001020300010203LL;
4287 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4288 ea_addr_si = ea_addr;
4289 if (spu_ea_model != 32)
4290 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4291
4292 /* tag_index = ea_addr & (tag_array_size - 128) */
4293 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4294
4295 /* splat ea_addr to all 4 slots. */
4296 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4297 /* Similarly for high 32 bits of ea_addr. */
4298 if (spu_ea_model != 32)
4299 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4300
4301 /* block_off = ea_addr & 127 */
4302 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4303
4304 /* tag_addr = tag_arr + tag_index */
4305 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4306
4307 /* Read cache tags. */
4308 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4309 if (spu_ea_model != 32)
4310 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4311 plus_constant (Pmode,
4312 tag_addr, 16)));
4313
4314 /* tag = ea_addr & -128 */
4315 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4316
4317 /* Read all four cache data pointers. */
4318 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4319 plus_constant (Pmode,
4320 tag_addr, 32)));
4321
4322 /* Compare tags. */
4323 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4324 if (spu_ea_model != 32)
4325 {
4326 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4327 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4328 }
4329
4330 /* At most one of the tags compare equal, so tag_equal has one
4331 32-bit slot set to all 1's, with the other slots all zero.
4332 gbb picks off low bit from each byte in the 128-bit registers,
4333 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4334 we have a hit. */
4335 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4336 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4337
4338 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4339 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4340
4341 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4342 (rotating eq_index mod 16 bytes). */
4343 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4344 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4345
4346 /* Add block offset to form final data address. */
4347 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4348
4349 /* Check that we did hit. */
4350 hit_label = gen_label_rtx ();
4351 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4352 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4353 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4354 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4355 hit_ref, pc_rtx)));
4356 /* Say that this branch is very likely to happen. */
4357 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4358 add_int_reg_note (insn, REG_BR_PROB, v);
4359
4360 ea_load_store (mem, is_store, ea_addr, data_addr);
4361 cont_label = gen_label_rtx ();
4362 emit_jump_insn (gen_jump (cont_label));
4363 emit_barrier ();
4364
4365 emit_label (hit_label);
4366
4367 if (is_store)
4368 {
4369 HOST_WIDE_INT v_hi;
4370 rtx dirty_bits = gen_reg_rtx (TImode);
4371 rtx dirty_off = gen_reg_rtx (SImode);
4372 rtx dirty_128 = gen_reg_rtx (TImode);
4373 rtx neg_block_off = gen_reg_rtx (SImode);
4374
4375 /* Set up mask with one dirty bit per byte of the mem we are
4376 writing, starting from top bit. */
4377 v_hi = v = -1;
4378 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4379 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4380 {
4381 v_hi = v;
4382 v = 0;
4383 }
4384 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4385
4386 /* Form index into cache dirty_bits. eq_index is one of
4387 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4388 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4389 offset to each of the four dirty_bits elements. */
4390 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4391
4392 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4393
4394 /* Rotate bit mask to proper bit. */
4395 emit_insn (gen_negsi2 (neg_block_off, block_off));
4396 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4397 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4398
4399 /* Or in the new dirty bits. */
4400 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4401
4402 /* Store. */
4403 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4404 }
4405
4406 emit_label (cont_label);
4407 }
4408
4409 static rtx
4410 expand_ea_mem (rtx mem, bool is_store)
4411 {
4412 rtx ea_addr;
4413 rtx data_addr = gen_reg_rtx (Pmode);
4414 rtx new_mem;
4415
4416 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4417 if (optimize_size || optimize == 0)
4418 ea_load_store (mem, is_store, ea_addr, data_addr);
4419 else
4420 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4421
4422 if (ea_alias_set == -1)
4423 ea_alias_set = new_alias_set ();
4424
4425 /* We generate a new MEM RTX to refer to the copy of the data
4426 in the cache. We do not copy memory attributes (except the
4427 alignment) from the original MEM, as they may no longer apply
4428 to the cache copy. */
4429 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4430 set_mem_alias_set (new_mem, ea_alias_set);
4431 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4432
4433 return new_mem;
4434 }
4435
4436 int
4437 spu_expand_mov (rtx * ops, enum machine_mode mode)
4438 {
4439 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4440 {
4441 /* Perform the move in the destination SUBREG's inner mode. */
4442 ops[0] = SUBREG_REG (ops[0]);
4443 mode = GET_MODE (ops[0]);
4444 ops[1] = gen_lowpart_common (mode, ops[1]);
4445 gcc_assert (ops[1]);
4446 }
4447
4448 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4449 {
4450 rtx from = SUBREG_REG (ops[1]);
4451 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4452
4453 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4454 && GET_MODE_CLASS (imode) == MODE_INT
4455 && subreg_lowpart_p (ops[1]));
4456
4457 if (GET_MODE_SIZE (imode) < 4)
4458 imode = SImode;
4459 if (imode != GET_MODE (from))
4460 from = gen_rtx_SUBREG (imode, from, 0);
4461
4462 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4463 {
4464 enum insn_code icode = convert_optab_handler (trunc_optab,
4465 mode, imode);
4466 emit_insn (GEN_FCN (icode) (ops[0], from));
4467 }
4468 else
4469 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4470 return 1;
4471 }
4472
4473 /* At least one of the operands needs to be a register. */
4474 if ((reload_in_progress | reload_completed) == 0
4475 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4476 {
4477 rtx temp = force_reg (mode, ops[1]);
4478 emit_move_insn (ops[0], temp);
4479 return 1;
4480 }
4481 if (reload_in_progress || reload_completed)
4482 {
4483 if (CONSTANT_P (ops[1]))
4484 return spu_split_immediate (ops);
4485 return 0;
4486 }
4487
4488 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4489 extend them. */
4490 if (GET_CODE (ops[1]) == CONST_INT)
4491 {
4492 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4493 if (val != INTVAL (ops[1]))
4494 {
4495 emit_move_insn (ops[0], GEN_INT (val));
4496 return 1;
4497 }
4498 }
4499 if (MEM_P (ops[0]))
4500 {
4501 if (MEM_ADDR_SPACE (ops[0]))
4502 ops[0] = expand_ea_mem (ops[0], true);
4503 return spu_split_store (ops);
4504 }
4505 if (MEM_P (ops[1]))
4506 {
4507 if (MEM_ADDR_SPACE (ops[1]))
4508 ops[1] = expand_ea_mem (ops[1], false);
4509 return spu_split_load (ops);
4510 }
4511
4512 return 0;
4513 }
4514
4515 static void
4516 spu_convert_move (rtx dst, rtx src)
4517 {
4518 enum machine_mode mode = GET_MODE (dst);
4519 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4520 rtx reg;
4521 gcc_assert (GET_MODE (src) == TImode);
4522 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4523 emit_insn (gen_rtx_SET (VOIDmode, reg,
4524 gen_rtx_TRUNCATE (int_mode,
4525 gen_rtx_LSHIFTRT (TImode, src,
4526 GEN_INT (int_mode == DImode ? 64 : 96)))));
4527 if (int_mode != mode)
4528 {
4529 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4530 emit_move_insn (dst, reg);
4531 }
4532 }
4533
4534 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4535 the address from SRC and SRC+16. Return a REG or CONST_INT that
4536 specifies how many bytes to rotate the loaded registers, plus any
4537 extra from EXTRA_ROTQBY. The address and rotate amounts are
4538 normalized to improve merging of loads and rotate computations. */
4539 static rtx
4540 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4541 {
4542 rtx addr = XEXP (src, 0);
4543 rtx p0, p1, rot, addr0, addr1;
4544 int rot_amt;
4545
4546 rot = 0;
4547 rot_amt = 0;
4548
4549 if (MEM_ALIGN (src) >= 128)
4550 /* Address is already aligned; simply perform a TImode load. */ ;
4551 else if (GET_CODE (addr) == PLUS)
4552 {
4553 /* 8 cases:
4554 aligned reg + aligned reg => lqx
4555 aligned reg + unaligned reg => lqx, rotqby
4556 aligned reg + aligned const => lqd
4557 aligned reg + unaligned const => lqd, rotqbyi
4558 unaligned reg + aligned reg => lqx, rotqby
4559 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4560 unaligned reg + aligned const => lqd, rotqby
4561 unaligned reg + unaligned const -> not allowed by legitimate address
4562 */
4563 p0 = XEXP (addr, 0);
4564 p1 = XEXP (addr, 1);
4565 if (!reg_aligned_for_addr (p0))
4566 {
4567 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4568 {
4569 rot = gen_reg_rtx (SImode);
4570 emit_insn (gen_addsi3 (rot, p0, p1));
4571 }
4572 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4573 {
4574 if (INTVAL (p1) > 0
4575 && REG_POINTER (p0)
4576 && INTVAL (p1) * BITS_PER_UNIT
4577 < REGNO_POINTER_ALIGN (REGNO (p0)))
4578 {
4579 rot = gen_reg_rtx (SImode);
4580 emit_insn (gen_addsi3 (rot, p0, p1));
4581 addr = p0;
4582 }
4583 else
4584 {
4585 rtx x = gen_reg_rtx (SImode);
4586 emit_move_insn (x, p1);
4587 if (!spu_arith_operand (p1, SImode))
4588 p1 = x;
4589 rot = gen_reg_rtx (SImode);
4590 emit_insn (gen_addsi3 (rot, p0, p1));
4591 addr = gen_rtx_PLUS (Pmode, p0, x);
4592 }
4593 }
4594 else
4595 rot = p0;
4596 }
4597 else
4598 {
4599 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4600 {
4601 rot_amt = INTVAL (p1) & 15;
4602 if (INTVAL (p1) & -16)
4603 {
4604 p1 = GEN_INT (INTVAL (p1) & -16);
4605 addr = gen_rtx_PLUS (SImode, p0, p1);
4606 }
4607 else
4608 addr = p0;
4609 }
4610 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4611 rot = p1;
4612 }
4613 }
4614 else if (REG_P (addr))
4615 {
4616 if (!reg_aligned_for_addr (addr))
4617 rot = addr;
4618 }
4619 else if (GET_CODE (addr) == CONST)
4620 {
4621 if (GET_CODE (XEXP (addr, 0)) == PLUS
4622 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4623 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4624 {
4625 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4626 if (rot_amt & -16)
4627 addr = gen_rtx_CONST (Pmode,
4628 gen_rtx_PLUS (Pmode,
4629 XEXP (XEXP (addr, 0), 0),
4630 GEN_INT (rot_amt & -16)));
4631 else
4632 addr = XEXP (XEXP (addr, 0), 0);
4633 }
4634 else
4635 {
4636 rot = gen_reg_rtx (Pmode);
4637 emit_move_insn (rot, addr);
4638 }
4639 }
4640 else if (GET_CODE (addr) == CONST_INT)
4641 {
4642 rot_amt = INTVAL (addr);
4643 addr = GEN_INT (rot_amt & -16);
4644 }
4645 else if (!ALIGNED_SYMBOL_REF_P (addr))
4646 {
4647 rot = gen_reg_rtx (Pmode);
4648 emit_move_insn (rot, addr);
4649 }
4650
4651 rot_amt += extra_rotby;
4652
4653 rot_amt &= 15;
4654
4655 if (rot && rot_amt)
4656 {
4657 rtx x = gen_reg_rtx (SImode);
4658 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4659 rot = x;
4660 rot_amt = 0;
4661 }
4662 if (!rot && rot_amt)
4663 rot = GEN_INT (rot_amt);
4664
4665 addr0 = copy_rtx (addr);
4666 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4667 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4668
4669 if (dst1)
4670 {
4671 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4672 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4673 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4674 }
4675
4676 return rot;
4677 }
4678
4679 int
4680 spu_split_load (rtx * ops)
4681 {
4682 enum machine_mode mode = GET_MODE (ops[0]);
4683 rtx addr, load, rot;
4684 int rot_amt;
4685
4686 if (GET_MODE_SIZE (mode) >= 16)
4687 return 0;
4688
4689 addr = XEXP (ops[1], 0);
4690 gcc_assert (GET_CODE (addr) != AND);
4691
4692 if (!address_needs_split (ops[1]))
4693 {
4694 ops[1] = change_address (ops[1], TImode, addr);
4695 load = gen_reg_rtx (TImode);
4696 emit_insn (gen__movti (load, ops[1]));
4697 spu_convert_move (ops[0], load);
4698 return 1;
4699 }
4700
4701 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4702
4703 load = gen_reg_rtx (TImode);
4704 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4705
4706 if (rot)
4707 emit_insn (gen_rotqby_ti (load, load, rot));
4708
4709 spu_convert_move (ops[0], load);
4710 return 1;
4711 }
4712
4713 int
4714 spu_split_store (rtx * ops)
4715 {
4716 enum machine_mode mode = GET_MODE (ops[0]);
4717 rtx reg;
4718 rtx addr, p0, p1, p1_lo, smem;
4719 int aform;
4720 int scalar;
4721
4722 if (GET_MODE_SIZE (mode) >= 16)
4723 return 0;
4724
4725 addr = XEXP (ops[0], 0);
4726 gcc_assert (GET_CODE (addr) != AND);
4727
4728 if (!address_needs_split (ops[0]))
4729 {
4730 reg = gen_reg_rtx (TImode);
4731 emit_insn (gen_spu_convert (reg, ops[1]));
4732 ops[0] = change_address (ops[0], TImode, addr);
4733 emit_move_insn (ops[0], reg);
4734 return 1;
4735 }
4736
4737 if (GET_CODE (addr) == PLUS)
4738 {
4739 /* 8 cases:
4740 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4741 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4742 aligned reg + aligned const => lqd, c?d, shuf, stqx
4743 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4744 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4745 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4746 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4747 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4748 */
4749 aform = 0;
4750 p0 = XEXP (addr, 0);
4751 p1 = p1_lo = XEXP (addr, 1);
4752 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4753 {
4754 p1_lo = GEN_INT (INTVAL (p1) & 15);
4755 if (reg_aligned_for_addr (p0))
4756 {
4757 p1 = GEN_INT (INTVAL (p1) & -16);
4758 if (p1 == const0_rtx)
4759 addr = p0;
4760 else
4761 addr = gen_rtx_PLUS (SImode, p0, p1);
4762 }
4763 else
4764 {
4765 rtx x = gen_reg_rtx (SImode);
4766 emit_move_insn (x, p1);
4767 addr = gen_rtx_PLUS (SImode, p0, x);
4768 }
4769 }
4770 }
4771 else if (REG_P (addr))
4772 {
4773 aform = 0;
4774 p0 = addr;
4775 p1 = p1_lo = const0_rtx;
4776 }
4777 else
4778 {
4779 aform = 1;
4780 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4781 p1 = 0; /* aform doesn't use p1 */
4782 p1_lo = addr;
4783 if (ALIGNED_SYMBOL_REF_P (addr))
4784 p1_lo = const0_rtx;
4785 else if (GET_CODE (addr) == CONST
4786 && GET_CODE (XEXP (addr, 0)) == PLUS
4787 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4788 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4789 {
4790 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4791 if ((v & -16) != 0)
4792 addr = gen_rtx_CONST (Pmode,
4793 gen_rtx_PLUS (Pmode,
4794 XEXP (XEXP (addr, 0), 0),
4795 GEN_INT (v & -16)));
4796 else
4797 addr = XEXP (XEXP (addr, 0), 0);
4798 p1_lo = GEN_INT (v & 15);
4799 }
4800 else if (GET_CODE (addr) == CONST_INT)
4801 {
4802 p1_lo = GEN_INT (INTVAL (addr) & 15);
4803 addr = GEN_INT (INTVAL (addr) & -16);
4804 }
4805 else
4806 {
4807 p1_lo = gen_reg_rtx (SImode);
4808 emit_move_insn (p1_lo, addr);
4809 }
4810 }
4811
4812 gcc_assert (aform == 0 || aform == 1);
4813 reg = gen_reg_rtx (TImode);
4814
4815 scalar = store_with_one_insn_p (ops[0]);
4816 if (!scalar)
4817 {
4818 /* We could copy the flags from the ops[0] MEM to mem here,
4819 We don't because we want this load to be optimized away if
4820 possible, and copying the flags will prevent that in certain
4821 cases, e.g. consider the volatile flag. */
4822
4823 rtx pat = gen_reg_rtx (TImode);
4824 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4825 set_mem_alias_set (lmem, 0);
4826 emit_insn (gen_movti (reg, lmem));
4827
4828 if (!p0 || reg_aligned_for_addr (p0))
4829 p0 = stack_pointer_rtx;
4830 if (!p1_lo)
4831 p1_lo = const0_rtx;
4832
4833 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4834 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4835 }
4836 else
4837 {
4838 if (GET_CODE (ops[1]) == REG)
4839 emit_insn (gen_spu_convert (reg, ops[1]));
4840 else if (GET_CODE (ops[1]) == SUBREG)
4841 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4842 else
4843 abort ();
4844 }
4845
4846 if (GET_MODE_SIZE (mode) < 4 && scalar)
4847 emit_insn (gen_ashlti3
4848 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4849
4850 smem = change_address (ops[0], TImode, copy_rtx (addr));
4851 /* We can't use the previous alias set because the memory has changed
4852 size and can potentially overlap objects of other types. */
4853 set_mem_alias_set (smem, 0);
4854
4855 emit_insn (gen_movti (smem, reg));
4856 return 1;
4857 }
4858
4859 /* Return TRUE if X is MEM which is a struct member reference
4860 and the member can safely be loaded and stored with a single
4861 instruction because it is padded. */
4862 static int
4863 mem_is_padded_component_ref (rtx x)
4864 {
4865 tree t = MEM_EXPR (x);
4866 tree r;
4867 if (!t || TREE_CODE (t) != COMPONENT_REF)
4868 return 0;
4869 t = TREE_OPERAND (t, 1);
4870 if (!t || TREE_CODE (t) != FIELD_DECL
4871 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4872 return 0;
4873 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4874 r = DECL_FIELD_CONTEXT (t);
4875 if (!r || TREE_CODE (r) != RECORD_TYPE)
4876 return 0;
4877 /* Make sure they are the same mode */
4878 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4879 return 0;
4880 /* If there are no following fields then the field alignment assures
4881 the structure is padded to the alignment which means this field is
4882 padded too. */
4883 if (TREE_CHAIN (t) == 0)
4884 return 1;
4885 /* If the following field is also aligned then this field will be
4886 padded. */
4887 t = TREE_CHAIN (t);
4888 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4889 return 1;
4890 return 0;
4891 }
4892
4893 /* Parse the -mfixed-range= option string. */
4894 static void
4895 fix_range (const char *const_str)
4896 {
4897 int i, first, last;
4898 char *str, *dash, *comma;
4899
4900 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4901 REG2 are either register names or register numbers. The effect
4902 of this option is to mark the registers in the range from REG1 to
4903 REG2 as ``fixed'' so they won't be used by the compiler. */
4904
4905 i = strlen (const_str);
4906 str = (char *) alloca (i + 1);
4907 memcpy (str, const_str, i + 1);
4908
4909 while (1)
4910 {
4911 dash = strchr (str, '-');
4912 if (!dash)
4913 {
4914 warning (0, "value of -mfixed-range must have form REG1-REG2");
4915 return;
4916 }
4917 *dash = '\0';
4918 comma = strchr (dash + 1, ',');
4919 if (comma)
4920 *comma = '\0';
4921
4922 first = decode_reg_name (str);
4923 if (first < 0)
4924 {
4925 warning (0, "unknown register name: %s", str);
4926 return;
4927 }
4928
4929 last = decode_reg_name (dash + 1);
4930 if (last < 0)
4931 {
4932 warning (0, "unknown register name: %s", dash + 1);
4933 return;
4934 }
4935
4936 *dash = '-';
4937
4938 if (first > last)
4939 {
4940 warning (0, "%s-%s is an empty range", str, dash + 1);
4941 return;
4942 }
4943
4944 for (i = first; i <= last; ++i)
4945 fixed_regs[i] = call_used_regs[i] = 1;
4946
4947 if (!comma)
4948 break;
4949
4950 *comma = ',';
4951 str = comma + 1;
4952 }
4953 }
4954
4955 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4956 can be generated using the fsmbi instruction. */
4957 int
4958 fsmbi_const_p (rtx x)
4959 {
4960 if (CONSTANT_P (x))
4961 {
4962 /* We can always choose TImode for CONST_INT because the high bits
4963 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4964 enum immediate_class c = classify_immediate (x, TImode);
4965 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4966 }
4967 return 0;
4968 }
4969
4970 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4971 can be generated using the cbd, chd, cwd or cdd instruction. */
4972 int
4973 cpat_const_p (rtx x, enum machine_mode mode)
4974 {
4975 if (CONSTANT_P (x))
4976 {
4977 enum immediate_class c = classify_immediate (x, mode);
4978 return c == IC_CPAT;
4979 }
4980 return 0;
4981 }
4982
4983 rtx
4984 gen_cpat_const (rtx * ops)
4985 {
4986 unsigned char dst[16];
4987 int i, offset, shift, isize;
4988 if (GET_CODE (ops[3]) != CONST_INT
4989 || GET_CODE (ops[2]) != CONST_INT
4990 || (GET_CODE (ops[1]) != CONST_INT
4991 && GET_CODE (ops[1]) != REG))
4992 return 0;
4993 if (GET_CODE (ops[1]) == REG
4994 && (!REG_POINTER (ops[1])
4995 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4996 return 0;
4997
4998 for (i = 0; i < 16; i++)
4999 dst[i] = i + 16;
5000 isize = INTVAL (ops[3]);
5001 if (isize == 1)
5002 shift = 3;
5003 else if (isize == 2)
5004 shift = 2;
5005 else
5006 shift = 0;
5007 offset = (INTVAL (ops[2]) +
5008 (GET_CODE (ops[1]) ==
5009 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5010 for (i = 0; i < isize; i++)
5011 dst[offset + i] = i + shift;
5012 return array_to_constant (TImode, dst);
5013 }
5014
5015 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5016 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5017 than 16 bytes, the value is repeated across the rest of the array. */
5018 void
5019 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5020 {
5021 HOST_WIDE_INT val;
5022 int i, j, first;
5023
5024 memset (arr, 0, 16);
5025 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5026 if (GET_CODE (x) == CONST_INT
5027 || (GET_CODE (x) == CONST_DOUBLE
5028 && (mode == SFmode || mode == DFmode)))
5029 {
5030 gcc_assert (mode != VOIDmode && mode != BLKmode);
5031
5032 if (GET_CODE (x) == CONST_DOUBLE)
5033 val = const_double_to_hwint (x);
5034 else
5035 val = INTVAL (x);
5036 first = GET_MODE_SIZE (mode) - 1;
5037 for (i = first; i >= 0; i--)
5038 {
5039 arr[i] = val & 0xff;
5040 val >>= 8;
5041 }
5042 /* Splat the constant across the whole array. */
5043 for (j = 0, i = first + 1; i < 16; i++)
5044 {
5045 arr[i] = arr[j];
5046 j = (j == first) ? 0 : j + 1;
5047 }
5048 }
5049 else if (GET_CODE (x) == CONST_DOUBLE)
5050 {
5051 val = CONST_DOUBLE_LOW (x);
5052 for (i = 15; i >= 8; i--)
5053 {
5054 arr[i] = val & 0xff;
5055 val >>= 8;
5056 }
5057 val = CONST_DOUBLE_HIGH (x);
5058 for (i = 7; i >= 0; i--)
5059 {
5060 arr[i] = val & 0xff;
5061 val >>= 8;
5062 }
5063 }
5064 else if (GET_CODE (x) == CONST_VECTOR)
5065 {
5066 int units;
5067 rtx elt;
5068 mode = GET_MODE_INNER (mode);
5069 units = CONST_VECTOR_NUNITS (x);
5070 for (i = 0; i < units; i++)
5071 {
5072 elt = CONST_VECTOR_ELT (x, i);
5073 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5074 {
5075 if (GET_CODE (elt) == CONST_DOUBLE)
5076 val = const_double_to_hwint (elt);
5077 else
5078 val = INTVAL (elt);
5079 first = GET_MODE_SIZE (mode) - 1;
5080 if (first + i * GET_MODE_SIZE (mode) > 16)
5081 abort ();
5082 for (j = first; j >= 0; j--)
5083 {
5084 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5085 val >>= 8;
5086 }
5087 }
5088 }
5089 }
5090 else
5091 gcc_unreachable();
5092 }
5093
5094 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5095 smaller than 16 bytes, use the bytes that would represent that value
5096 in a register, e.g., for QImode return the value of arr[3]. */
5097 rtx
5098 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5099 {
5100 enum machine_mode inner_mode;
5101 rtvec v;
5102 int units, size, i, j, k;
5103 HOST_WIDE_INT val;
5104
5105 if (GET_MODE_CLASS (mode) == MODE_INT
5106 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5107 {
5108 j = GET_MODE_SIZE (mode);
5109 i = j < 4 ? 4 - j : 0;
5110 for (val = 0; i < j; i++)
5111 val = (val << 8) | arr[i];
5112 val = trunc_int_for_mode (val, mode);
5113 return GEN_INT (val);
5114 }
5115
5116 if (mode == TImode)
5117 {
5118 HOST_WIDE_INT high;
5119 for (i = high = 0; i < 8; i++)
5120 high = (high << 8) | arr[i];
5121 for (i = 8, val = 0; i < 16; i++)
5122 val = (val << 8) | arr[i];
5123 return immed_double_const (val, high, TImode);
5124 }
5125 if (mode == SFmode)
5126 {
5127 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5128 val = trunc_int_for_mode (val, SImode);
5129 return hwint_to_const_double (SFmode, val);
5130 }
5131 if (mode == DFmode)
5132 {
5133 for (i = 0, val = 0; i < 8; i++)
5134 val = (val << 8) | arr[i];
5135 return hwint_to_const_double (DFmode, val);
5136 }
5137
5138 if (!VECTOR_MODE_P (mode))
5139 abort ();
5140
5141 units = GET_MODE_NUNITS (mode);
5142 size = GET_MODE_UNIT_SIZE (mode);
5143 inner_mode = GET_MODE_INNER (mode);
5144 v = rtvec_alloc (units);
5145
5146 for (k = i = 0; i < units; ++i)
5147 {
5148 val = 0;
5149 for (j = 0; j < size; j++, k++)
5150 val = (val << 8) | arr[k];
5151
5152 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5153 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5154 else
5155 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5156 }
5157 if (k > 16)
5158 abort ();
5159
5160 return gen_rtx_CONST_VECTOR (mode, v);
5161 }
5162
5163 static void
5164 reloc_diagnostic (rtx x)
5165 {
5166 tree decl = 0;
5167 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5168 return;
5169
5170 if (GET_CODE (x) == SYMBOL_REF)
5171 decl = SYMBOL_REF_DECL (x);
5172 else if (GET_CODE (x) == CONST
5173 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5174 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5175
5176 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5177 if (decl && !DECL_P (decl))
5178 decl = 0;
5179
5180 /* The decl could be a string constant. */
5181 if (decl && DECL_P (decl))
5182 {
5183 location_t loc;
5184 /* We use last_assemble_variable_decl to get line information. It's
5185 not always going to be right and might not even be close, but will
5186 be right for the more common cases. */
5187 if (!last_assemble_variable_decl || in_section == ctors_section)
5188 loc = DECL_SOURCE_LOCATION (decl);
5189 else
5190 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5191
5192 if (TARGET_WARN_RELOC)
5193 warning_at (loc, 0,
5194 "creating run-time relocation for %qD", decl);
5195 else
5196 error_at (loc,
5197 "creating run-time relocation for %qD", decl);
5198 }
5199 else
5200 {
5201 if (TARGET_WARN_RELOC)
5202 warning_at (input_location, 0, "creating run-time relocation");
5203 else
5204 error_at (input_location, "creating run-time relocation");
5205 }
5206 }
5207
5208 /* Hook into assemble_integer so we can generate an error for run-time
5209 relocations. The SPU ABI disallows them. */
5210 static bool
5211 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5212 {
5213 /* By default run-time relocations aren't supported, but we allow them
5214 in case users support it in their own run-time loader. And we provide
5215 a warning for those users that don't. */
5216 if ((GET_CODE (x) == SYMBOL_REF)
5217 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5218 reloc_diagnostic (x);
5219
5220 return default_assemble_integer (x, size, aligned_p);
5221 }
5222
5223 static void
5224 spu_asm_globalize_label (FILE * file, const char *name)
5225 {
5226 fputs ("\t.global\t", file);
5227 assemble_name (file, name);
5228 fputs ("\n", file);
5229 }
5230
5231 static bool
5232 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5233 int opno ATTRIBUTE_UNUSED, int *total,
5234 bool speed ATTRIBUTE_UNUSED)
5235 {
5236 enum machine_mode mode = GET_MODE (x);
5237 int cost = COSTS_N_INSNS (2);
5238
5239 /* Folding to a CONST_VECTOR will use extra space but there might
5240 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5241 only if it allows us to fold away multiple insns. Changing the cost
5242 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5243 because this cost will only be compared against a single insn.
5244 if (code == CONST_VECTOR)
5245 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5246 */
5247
5248 /* Use defaults for float operations. Not accurate but good enough. */
5249 if (mode == DFmode)
5250 {
5251 *total = COSTS_N_INSNS (13);
5252 return true;
5253 }
5254 if (mode == SFmode)
5255 {
5256 *total = COSTS_N_INSNS (6);
5257 return true;
5258 }
5259 switch (code)
5260 {
5261 case CONST_INT:
5262 if (satisfies_constraint_K (x))
5263 *total = 0;
5264 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5265 *total = COSTS_N_INSNS (1);
5266 else
5267 *total = COSTS_N_INSNS (3);
5268 return true;
5269
5270 case CONST:
5271 *total = COSTS_N_INSNS (3);
5272 return true;
5273
5274 case LABEL_REF:
5275 case SYMBOL_REF:
5276 *total = COSTS_N_INSNS (0);
5277 return true;
5278
5279 case CONST_DOUBLE:
5280 *total = COSTS_N_INSNS (5);
5281 return true;
5282
5283 case FLOAT_EXTEND:
5284 case FLOAT_TRUNCATE:
5285 case FLOAT:
5286 case UNSIGNED_FLOAT:
5287 case FIX:
5288 case UNSIGNED_FIX:
5289 *total = COSTS_N_INSNS (7);
5290 return true;
5291
5292 case PLUS:
5293 if (mode == TImode)
5294 {
5295 *total = COSTS_N_INSNS (9);
5296 return true;
5297 }
5298 break;
5299
5300 case MULT:
5301 cost =
5302 GET_CODE (XEXP (x, 0)) ==
5303 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5304 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5305 {
5306 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5307 {
5308 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5309 cost = COSTS_N_INSNS (14);
5310 if ((val & 0xffff) == 0)
5311 cost = COSTS_N_INSNS (9);
5312 else if (val > 0 && val < 0x10000)
5313 cost = COSTS_N_INSNS (11);
5314 }
5315 }
5316 *total = cost;
5317 return true;
5318 case DIV:
5319 case UDIV:
5320 case MOD:
5321 case UMOD:
5322 *total = COSTS_N_INSNS (20);
5323 return true;
5324 case ROTATE:
5325 case ROTATERT:
5326 case ASHIFT:
5327 case ASHIFTRT:
5328 case LSHIFTRT:
5329 *total = COSTS_N_INSNS (4);
5330 return true;
5331 case UNSPEC:
5332 if (XINT (x, 1) == UNSPEC_CONVERT)
5333 *total = COSTS_N_INSNS (0);
5334 else
5335 *total = COSTS_N_INSNS (4);
5336 return true;
5337 }
5338 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5339 if (GET_MODE_CLASS (mode) == MODE_INT
5340 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5341 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5342 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5343 *total = cost;
5344 return true;
5345 }
5346
5347 static enum machine_mode
5348 spu_unwind_word_mode (void)
5349 {
5350 return SImode;
5351 }
5352
5353 /* Decide whether we can make a sibling call to a function. DECL is the
5354 declaration of the function being targeted by the call and EXP is the
5355 CALL_EXPR representing the call. */
5356 static bool
5357 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5358 {
5359 return decl && !TARGET_LARGE_MEM;
5360 }
5361
5362 /* We need to correctly update the back chain pointer and the Available
5363 Stack Size (which is in the second slot of the sp register.) */
5364 void
5365 spu_allocate_stack (rtx op0, rtx op1)
5366 {
5367 HOST_WIDE_INT v;
5368 rtx chain = gen_reg_rtx (V4SImode);
5369 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5370 rtx sp = gen_reg_rtx (V4SImode);
5371 rtx splatted = gen_reg_rtx (V4SImode);
5372 rtx pat = gen_reg_rtx (TImode);
5373
5374 /* copy the back chain so we can save it back again. */
5375 emit_move_insn (chain, stack_bot);
5376
5377 op1 = force_reg (SImode, op1);
5378
5379 v = 0x1020300010203ll;
5380 emit_move_insn (pat, immed_double_const (v, v, TImode));
5381 emit_insn (gen_shufb (splatted, op1, op1, pat));
5382
5383 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5384 emit_insn (gen_subv4si3 (sp, sp, splatted));
5385
5386 if (flag_stack_check)
5387 {
5388 rtx avail = gen_reg_rtx(SImode);
5389 rtx result = gen_reg_rtx(SImode);
5390 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5391 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5392 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5393 }
5394
5395 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5396
5397 emit_move_insn (stack_bot, chain);
5398
5399 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5400 }
5401
5402 void
5403 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5404 {
5405 static unsigned char arr[16] =
5406 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5407 rtx temp = gen_reg_rtx (SImode);
5408 rtx temp2 = gen_reg_rtx (SImode);
5409 rtx temp3 = gen_reg_rtx (V4SImode);
5410 rtx temp4 = gen_reg_rtx (V4SImode);
5411 rtx pat = gen_reg_rtx (TImode);
5412 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5413
5414 /* Restore the backchain from the first word, sp from the second. */
5415 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5416 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5417
5418 emit_move_insn (pat, array_to_constant (TImode, arr));
5419
5420 /* Compute Available Stack Size for sp */
5421 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5422 emit_insn (gen_shufb (temp3, temp, temp, pat));
5423
5424 /* Compute Available Stack Size for back chain */
5425 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5426 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5427 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5428
5429 emit_insn (gen_addv4si3 (sp, sp, temp3));
5430 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5431 }
5432
5433 static void
5434 spu_init_libfuncs (void)
5435 {
5436 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5437 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5438 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5439 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5440 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5441 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5442 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5443 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5444 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5445 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5446 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5447 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5448
5449 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5450 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5451
5452 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5453 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5454 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5455 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5456 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5457 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5458 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5459 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5460 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5461 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5462 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5463 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5464
5465 set_optab_libfunc (smul_optab, TImode, "__multi3");
5466 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5467 set_optab_libfunc (smod_optab, TImode, "__modti3");
5468 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5469 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5470 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5471 }
5472
5473 /* Make a subreg, stripping any existing subreg. We could possibly just
5474 call simplify_subreg, but in this case we know what we want. */
5475 rtx
5476 spu_gen_subreg (enum machine_mode mode, rtx x)
5477 {
5478 if (GET_CODE (x) == SUBREG)
5479 x = SUBREG_REG (x);
5480 if (GET_MODE (x) == mode)
5481 return x;
5482 return gen_rtx_SUBREG (mode, x, 0);
5483 }
5484
5485 static bool
5486 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5487 {
5488 return (TYPE_MODE (type) == BLKmode
5489 && ((type) == 0
5490 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5491 || int_size_in_bytes (type) >
5492 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5493 }
5494 \f
5495 /* Create the built-in types and functions */
5496
5497 enum spu_function_code
5498 {
5499 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5500 #include "spu-builtins.def"
5501 #undef DEF_BUILTIN
5502 NUM_SPU_BUILTINS
5503 };
5504
5505 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5506
5507 struct spu_builtin_description spu_builtins[] = {
5508 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5509 {fcode, icode, name, type, params},
5510 #include "spu-builtins.def"
5511 #undef DEF_BUILTIN
5512 };
5513
5514 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5515
5516 /* Returns the spu builtin decl for CODE. */
5517
5518 static tree
5519 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5520 {
5521 if (code >= NUM_SPU_BUILTINS)
5522 return error_mark_node;
5523
5524 return spu_builtin_decls[code];
5525 }
5526
5527
5528 static void
5529 spu_init_builtins (void)
5530 {
5531 struct spu_builtin_description *d;
5532 unsigned int i;
5533
5534 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5535 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5536 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5537 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5538 V4SF_type_node = build_vector_type (float_type_node, 4);
5539 V2DF_type_node = build_vector_type (double_type_node, 2);
5540
5541 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5542 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5543 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5544 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5545
5546 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5547
5548 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5560
5561 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5562 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5563 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5564 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5565 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5566 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5567 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5568 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5569
5570 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5571 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5572
5573 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5574
5575 spu_builtin_types[SPU_BTI_PTR] =
5576 build_pointer_type (build_qualified_type
5577 (void_type_node,
5578 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5579
5580 /* For each builtin we build a new prototype. The tree code will make
5581 sure nodes are shared. */
5582 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5583 {
5584 tree p;
5585 char name[64]; /* build_function will make a copy. */
5586 int parm;
5587
5588 if (d->name == 0)
5589 continue;
5590
5591 /* Find last parm. */
5592 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5593 ;
5594
5595 p = void_list_node;
5596 while (parm > 1)
5597 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5598
5599 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5600
5601 sprintf (name, "__builtin_%s", d->name);
5602 spu_builtin_decls[i] =
5603 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5604 if (d->fcode == SPU_MASK_FOR_LOAD)
5605 TREE_READONLY (spu_builtin_decls[i]) = 1;
5606
5607 /* These builtins don't throw. */
5608 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5609 }
5610 }
5611
5612 void
5613 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5614 {
5615 static unsigned char arr[16] =
5616 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5617
5618 rtx temp = gen_reg_rtx (Pmode);
5619 rtx temp2 = gen_reg_rtx (V4SImode);
5620 rtx temp3 = gen_reg_rtx (V4SImode);
5621 rtx pat = gen_reg_rtx (TImode);
5622 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5623
5624 emit_move_insn (pat, array_to_constant (TImode, arr));
5625
5626 /* Restore the sp. */
5627 emit_move_insn (temp, op1);
5628 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5629
5630 /* Compute available stack size for sp. */
5631 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5632 emit_insn (gen_shufb (temp3, temp, temp, pat));
5633
5634 emit_insn (gen_addv4si3 (sp, sp, temp3));
5635 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5636 }
5637
5638 int
5639 spu_safe_dma (HOST_WIDE_INT channel)
5640 {
5641 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5642 }
5643
5644 void
5645 spu_builtin_splats (rtx ops[])
5646 {
5647 enum machine_mode mode = GET_MODE (ops[0]);
5648 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5649 {
5650 unsigned char arr[16];
5651 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5652 emit_move_insn (ops[0], array_to_constant (mode, arr));
5653 }
5654 else
5655 {
5656 rtx reg = gen_reg_rtx (TImode);
5657 rtx shuf;
5658 if (GET_CODE (ops[1]) != REG
5659 && GET_CODE (ops[1]) != SUBREG)
5660 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5661 switch (mode)
5662 {
5663 case V2DImode:
5664 case V2DFmode:
5665 shuf =
5666 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5667 TImode);
5668 break;
5669 case V4SImode:
5670 case V4SFmode:
5671 shuf =
5672 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5673 TImode);
5674 break;
5675 case V8HImode:
5676 shuf =
5677 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5678 TImode);
5679 break;
5680 case V16QImode:
5681 shuf =
5682 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5683 TImode);
5684 break;
5685 default:
5686 abort ();
5687 }
5688 emit_move_insn (reg, shuf);
5689 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5690 }
5691 }
5692
5693 void
5694 spu_builtin_extract (rtx ops[])
5695 {
5696 enum machine_mode mode;
5697 rtx rot, from, tmp;
5698
5699 mode = GET_MODE (ops[1]);
5700
5701 if (GET_CODE (ops[2]) == CONST_INT)
5702 {
5703 switch (mode)
5704 {
5705 case V16QImode:
5706 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5707 break;
5708 case V8HImode:
5709 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5710 break;
5711 case V4SFmode:
5712 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5713 break;
5714 case V4SImode:
5715 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5716 break;
5717 case V2DImode:
5718 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5719 break;
5720 case V2DFmode:
5721 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5722 break;
5723 default:
5724 abort ();
5725 }
5726 return;
5727 }
5728
5729 from = spu_gen_subreg (TImode, ops[1]);
5730 rot = gen_reg_rtx (TImode);
5731 tmp = gen_reg_rtx (SImode);
5732
5733 switch (mode)
5734 {
5735 case V16QImode:
5736 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5737 break;
5738 case V8HImode:
5739 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5740 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5741 break;
5742 case V4SFmode:
5743 case V4SImode:
5744 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5745 break;
5746 case V2DImode:
5747 case V2DFmode:
5748 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5749 break;
5750 default:
5751 abort ();
5752 }
5753 emit_insn (gen_rotqby_ti (rot, from, tmp));
5754
5755 emit_insn (gen_spu_convert (ops[0], rot));
5756 }
5757
5758 void
5759 spu_builtin_insert (rtx ops[])
5760 {
5761 enum machine_mode mode = GET_MODE (ops[0]);
5762 enum machine_mode imode = GET_MODE_INNER (mode);
5763 rtx mask = gen_reg_rtx (TImode);
5764 rtx offset;
5765
5766 if (GET_CODE (ops[3]) == CONST_INT)
5767 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5768 else
5769 {
5770 offset = gen_reg_rtx (SImode);
5771 emit_insn (gen_mulsi3
5772 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5773 }
5774 emit_insn (gen_cpat
5775 (mask, stack_pointer_rtx, offset,
5776 GEN_INT (GET_MODE_SIZE (imode))));
5777 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5778 }
5779
5780 void
5781 spu_builtin_promote (rtx ops[])
5782 {
5783 enum machine_mode mode, imode;
5784 rtx rot, from, offset;
5785 HOST_WIDE_INT pos;
5786
5787 mode = GET_MODE (ops[0]);
5788 imode = GET_MODE_INNER (mode);
5789
5790 from = gen_reg_rtx (TImode);
5791 rot = spu_gen_subreg (TImode, ops[0]);
5792
5793 emit_insn (gen_spu_convert (from, ops[1]));
5794
5795 if (GET_CODE (ops[2]) == CONST_INT)
5796 {
5797 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5798 if (GET_MODE_SIZE (imode) < 4)
5799 pos += 4 - GET_MODE_SIZE (imode);
5800 offset = GEN_INT (pos & 15);
5801 }
5802 else
5803 {
5804 offset = gen_reg_rtx (SImode);
5805 switch (mode)
5806 {
5807 case V16QImode:
5808 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5809 break;
5810 case V8HImode:
5811 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5812 emit_insn (gen_addsi3 (offset, offset, offset));
5813 break;
5814 case V4SFmode:
5815 case V4SImode:
5816 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5817 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5818 break;
5819 case V2DImode:
5820 case V2DFmode:
5821 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5822 break;
5823 default:
5824 abort ();
5825 }
5826 }
5827 emit_insn (gen_rotqby_ti (rot, from, offset));
5828 }
5829
5830 static void
5831 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5832 {
5833 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5834 rtx shuf = gen_reg_rtx (V4SImode);
5835 rtx insn = gen_reg_rtx (V4SImode);
5836 rtx shufc;
5837 rtx insnc;
5838 rtx mem;
5839
5840 fnaddr = force_reg (SImode, fnaddr);
5841 cxt = force_reg (SImode, cxt);
5842
5843 if (TARGET_LARGE_MEM)
5844 {
5845 rtx rotl = gen_reg_rtx (V4SImode);
5846 rtx mask = gen_reg_rtx (V4SImode);
5847 rtx bi = gen_reg_rtx (SImode);
5848 static unsigned char const shufa[16] = {
5849 2, 3, 0, 1, 18, 19, 16, 17,
5850 0, 1, 2, 3, 16, 17, 18, 19
5851 };
5852 static unsigned char const insna[16] = {
5853 0x41, 0, 0, 79,
5854 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5855 0x60, 0x80, 0, 79,
5856 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5857 };
5858
5859 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5860 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5861
5862 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5863 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5864 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5865 emit_insn (gen_selb (insn, insnc, rotl, mask));
5866
5867 mem = adjust_address (m_tramp, V4SImode, 0);
5868 emit_move_insn (mem, insn);
5869
5870 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5871 mem = adjust_address (m_tramp, Pmode, 16);
5872 emit_move_insn (mem, bi);
5873 }
5874 else
5875 {
5876 rtx scxt = gen_reg_rtx (SImode);
5877 rtx sfnaddr = gen_reg_rtx (SImode);
5878 static unsigned char const insna[16] = {
5879 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5880 0x30, 0, 0, 0,
5881 0, 0, 0, 0,
5882 0, 0, 0, 0
5883 };
5884
5885 shufc = gen_reg_rtx (TImode);
5886 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5887
5888 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5889 fits 18 bits and the last 4 are zeros. This will be true if
5890 the stack pointer is initialized to 0x3fff0 at program start,
5891 otherwise the ila instruction will be garbage. */
5892
5893 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5894 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5895 emit_insn (gen_cpat
5896 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5897 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5898 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5899
5900 mem = adjust_address (m_tramp, V4SImode, 0);
5901 emit_move_insn (mem, insn);
5902 }
5903 emit_insn (gen_sync ());
5904 }
5905
5906 static bool
5907 spu_warn_func_return (tree decl)
5908 {
5909 /* Naked functions are implemented entirely in assembly, including the
5910 return sequence, so suppress warnings about this. */
5911 return !spu_naked_function_p (decl);
5912 }
5913
5914 void
5915 spu_expand_sign_extend (rtx ops[])
5916 {
5917 unsigned char arr[16];
5918 rtx pat = gen_reg_rtx (TImode);
5919 rtx sign, c;
5920 int i, last;
5921 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5922 if (GET_MODE (ops[1]) == QImode)
5923 {
5924 sign = gen_reg_rtx (HImode);
5925 emit_insn (gen_extendqihi2 (sign, ops[1]));
5926 for (i = 0; i < 16; i++)
5927 arr[i] = 0x12;
5928 arr[last] = 0x13;
5929 }
5930 else
5931 {
5932 for (i = 0; i < 16; i++)
5933 arr[i] = 0x10;
5934 switch (GET_MODE (ops[1]))
5935 {
5936 case HImode:
5937 sign = gen_reg_rtx (SImode);
5938 emit_insn (gen_extendhisi2 (sign, ops[1]));
5939 arr[last] = 0x03;
5940 arr[last - 1] = 0x02;
5941 break;
5942 case SImode:
5943 sign = gen_reg_rtx (SImode);
5944 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5945 for (i = 0; i < 4; i++)
5946 arr[last - i] = 3 - i;
5947 break;
5948 case DImode:
5949 sign = gen_reg_rtx (SImode);
5950 c = gen_reg_rtx (SImode);
5951 emit_insn (gen_spu_convert (c, ops[1]));
5952 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5953 for (i = 0; i < 8; i++)
5954 arr[last - i] = 7 - i;
5955 break;
5956 default:
5957 abort ();
5958 }
5959 }
5960 emit_move_insn (pat, array_to_constant (TImode, arr));
5961 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5962 }
5963
5964 /* expand vector initialization. If there are any constant parts,
5965 load constant parts first. Then load any non-constant parts. */
5966 void
5967 spu_expand_vector_init (rtx target, rtx vals)
5968 {
5969 enum machine_mode mode = GET_MODE (target);
5970 int n_elts = GET_MODE_NUNITS (mode);
5971 int n_var = 0;
5972 bool all_same = true;
5973 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5974 int i;
5975
5976 first = XVECEXP (vals, 0, 0);
5977 for (i = 0; i < n_elts; ++i)
5978 {
5979 x = XVECEXP (vals, 0, i);
5980 if (!(CONST_INT_P (x)
5981 || GET_CODE (x) == CONST_DOUBLE
5982 || GET_CODE (x) == CONST_FIXED))
5983 ++n_var;
5984 else
5985 {
5986 if (first_constant == NULL_RTX)
5987 first_constant = x;
5988 }
5989 if (i > 0 && !rtx_equal_p (x, first))
5990 all_same = false;
5991 }
5992
5993 /* if all elements are the same, use splats to repeat elements */
5994 if (all_same)
5995 {
5996 if (!CONSTANT_P (first)
5997 && !register_operand (first, GET_MODE (x)))
5998 first = force_reg (GET_MODE (first), first);
5999 emit_insn (gen_spu_splats (target, first));
6000 return;
6001 }
6002
6003 /* load constant parts */
6004 if (n_var != n_elts)
6005 {
6006 if (n_var == 0)
6007 {
6008 emit_move_insn (target,
6009 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6010 }
6011 else
6012 {
6013 rtx constant_parts_rtx = copy_rtx (vals);
6014
6015 gcc_assert (first_constant != NULL_RTX);
6016 /* fill empty slots with the first constant, this increases
6017 our chance of using splats in the recursive call below. */
6018 for (i = 0; i < n_elts; ++i)
6019 {
6020 x = XVECEXP (constant_parts_rtx, 0, i);
6021 if (!(CONST_INT_P (x)
6022 || GET_CODE (x) == CONST_DOUBLE
6023 || GET_CODE (x) == CONST_FIXED))
6024 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6025 }
6026
6027 spu_expand_vector_init (target, constant_parts_rtx);
6028 }
6029 }
6030
6031 /* load variable parts */
6032 if (n_var != 0)
6033 {
6034 rtx insert_operands[4];
6035
6036 insert_operands[0] = target;
6037 insert_operands[2] = target;
6038 for (i = 0; i < n_elts; ++i)
6039 {
6040 x = XVECEXP (vals, 0, i);
6041 if (!(CONST_INT_P (x)
6042 || GET_CODE (x) == CONST_DOUBLE
6043 || GET_CODE (x) == CONST_FIXED))
6044 {
6045 if (!register_operand (x, GET_MODE (x)))
6046 x = force_reg (GET_MODE (x), x);
6047 insert_operands[1] = x;
6048 insert_operands[3] = GEN_INT (i);
6049 spu_builtin_insert (insert_operands);
6050 }
6051 }
6052 }
6053 }
6054
6055 /* Return insn index for the vector compare instruction for given CODE,
6056 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6057
6058 static int
6059 get_vec_cmp_insn (enum rtx_code code,
6060 enum machine_mode dest_mode,
6061 enum machine_mode op_mode)
6062
6063 {
6064 switch (code)
6065 {
6066 case EQ:
6067 if (dest_mode == V16QImode && op_mode == V16QImode)
6068 return CODE_FOR_ceq_v16qi;
6069 if (dest_mode == V8HImode && op_mode == V8HImode)
6070 return CODE_FOR_ceq_v8hi;
6071 if (dest_mode == V4SImode && op_mode == V4SImode)
6072 return CODE_FOR_ceq_v4si;
6073 if (dest_mode == V4SImode && op_mode == V4SFmode)
6074 return CODE_FOR_ceq_v4sf;
6075 if (dest_mode == V2DImode && op_mode == V2DFmode)
6076 return CODE_FOR_ceq_v2df;
6077 break;
6078 case GT:
6079 if (dest_mode == V16QImode && op_mode == V16QImode)
6080 return CODE_FOR_cgt_v16qi;
6081 if (dest_mode == V8HImode && op_mode == V8HImode)
6082 return CODE_FOR_cgt_v8hi;
6083 if (dest_mode == V4SImode && op_mode == V4SImode)
6084 return CODE_FOR_cgt_v4si;
6085 if (dest_mode == V4SImode && op_mode == V4SFmode)
6086 return CODE_FOR_cgt_v4sf;
6087 if (dest_mode == V2DImode && op_mode == V2DFmode)
6088 return CODE_FOR_cgt_v2df;
6089 break;
6090 case GTU:
6091 if (dest_mode == V16QImode && op_mode == V16QImode)
6092 return CODE_FOR_clgt_v16qi;
6093 if (dest_mode == V8HImode && op_mode == V8HImode)
6094 return CODE_FOR_clgt_v8hi;
6095 if (dest_mode == V4SImode && op_mode == V4SImode)
6096 return CODE_FOR_clgt_v4si;
6097 break;
6098 default:
6099 break;
6100 }
6101 return -1;
6102 }
6103
6104 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6105 DMODE is expected destination mode. This is a recursive function. */
6106
6107 static rtx
6108 spu_emit_vector_compare (enum rtx_code rcode,
6109 rtx op0, rtx op1,
6110 enum machine_mode dmode)
6111 {
6112 int vec_cmp_insn;
6113 rtx mask;
6114 enum machine_mode dest_mode;
6115 enum machine_mode op_mode = GET_MODE (op1);
6116
6117 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6118
6119 /* Floating point vector compare instructions uses destination V4SImode.
6120 Double floating point vector compare instructions uses destination V2DImode.
6121 Move destination to appropriate mode later. */
6122 if (dmode == V4SFmode)
6123 dest_mode = V4SImode;
6124 else if (dmode == V2DFmode)
6125 dest_mode = V2DImode;
6126 else
6127 dest_mode = dmode;
6128
6129 mask = gen_reg_rtx (dest_mode);
6130 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6131
6132 if (vec_cmp_insn == -1)
6133 {
6134 bool swap_operands = false;
6135 bool try_again = false;
6136 switch (rcode)
6137 {
6138 case LT:
6139 rcode = GT;
6140 swap_operands = true;
6141 try_again = true;
6142 break;
6143 case LTU:
6144 rcode = GTU;
6145 swap_operands = true;
6146 try_again = true;
6147 break;
6148 case NE:
6149 case UNEQ:
6150 case UNLE:
6151 case UNLT:
6152 case UNGE:
6153 case UNGT:
6154 case UNORDERED:
6155 /* Treat A != B as ~(A==B). */
6156 {
6157 enum rtx_code rev_code;
6158 enum insn_code nor_code;
6159 rtx rev_mask;
6160
6161 rev_code = reverse_condition_maybe_unordered (rcode);
6162 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6163
6164 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6165 gcc_assert (nor_code != CODE_FOR_nothing);
6166 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6167 if (dmode != dest_mode)
6168 {
6169 rtx temp = gen_reg_rtx (dest_mode);
6170 convert_move (temp, mask, 0);
6171 return temp;
6172 }
6173 return mask;
6174 }
6175 break;
6176 case GE:
6177 case GEU:
6178 case LE:
6179 case LEU:
6180 /* Try GT/GTU/LT/LTU OR EQ */
6181 {
6182 rtx c_rtx, eq_rtx;
6183 enum insn_code ior_code;
6184 enum rtx_code new_code;
6185
6186 switch (rcode)
6187 {
6188 case GE: new_code = GT; break;
6189 case GEU: new_code = GTU; break;
6190 case LE: new_code = LT; break;
6191 case LEU: new_code = LTU; break;
6192 default:
6193 gcc_unreachable ();
6194 }
6195
6196 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6197 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6198
6199 ior_code = optab_handler (ior_optab, dest_mode);
6200 gcc_assert (ior_code != CODE_FOR_nothing);
6201 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6202 if (dmode != dest_mode)
6203 {
6204 rtx temp = gen_reg_rtx (dest_mode);
6205 convert_move (temp, mask, 0);
6206 return temp;
6207 }
6208 return mask;
6209 }
6210 break;
6211 case LTGT:
6212 /* Try LT OR GT */
6213 {
6214 rtx lt_rtx, gt_rtx;
6215 enum insn_code ior_code;
6216
6217 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6218 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6219
6220 ior_code = optab_handler (ior_optab, dest_mode);
6221 gcc_assert (ior_code != CODE_FOR_nothing);
6222 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6223 if (dmode != dest_mode)
6224 {
6225 rtx temp = gen_reg_rtx (dest_mode);
6226 convert_move (temp, mask, 0);
6227 return temp;
6228 }
6229 return mask;
6230 }
6231 break;
6232 case ORDERED:
6233 /* Implement as (A==A) & (B==B) */
6234 {
6235 rtx a_rtx, b_rtx;
6236 enum insn_code and_code;
6237
6238 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6239 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6240
6241 and_code = optab_handler (and_optab, dest_mode);
6242 gcc_assert (and_code != CODE_FOR_nothing);
6243 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6244 if (dmode != dest_mode)
6245 {
6246 rtx temp = gen_reg_rtx (dest_mode);
6247 convert_move (temp, mask, 0);
6248 return temp;
6249 }
6250 return mask;
6251 }
6252 break;
6253 default:
6254 gcc_unreachable ();
6255 }
6256
6257 /* You only get two chances. */
6258 if (try_again)
6259 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6260
6261 gcc_assert (vec_cmp_insn != -1);
6262
6263 if (swap_operands)
6264 {
6265 rtx tmp;
6266 tmp = op0;
6267 op0 = op1;
6268 op1 = tmp;
6269 }
6270 }
6271
6272 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6273 if (dmode != dest_mode)
6274 {
6275 rtx temp = gen_reg_rtx (dest_mode);
6276 convert_move (temp, mask, 0);
6277 return temp;
6278 }
6279 return mask;
6280 }
6281
6282
6283 /* Emit vector conditional expression.
6284 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6285 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6286
6287 int
6288 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6289 rtx cond, rtx cc_op0, rtx cc_op1)
6290 {
6291 enum machine_mode dest_mode = GET_MODE (dest);
6292 enum rtx_code rcode = GET_CODE (cond);
6293 rtx mask;
6294
6295 /* Get the vector mask for the given relational operations. */
6296 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6297
6298 emit_insn(gen_selb (dest, op2, op1, mask));
6299
6300 return 1;
6301 }
6302
6303 static rtx
6304 spu_force_reg (enum machine_mode mode, rtx op)
6305 {
6306 rtx x, r;
6307 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6308 {
6309 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6310 || GET_MODE (op) == BLKmode)
6311 return force_reg (mode, convert_to_mode (mode, op, 0));
6312 abort ();
6313 }
6314
6315 r = force_reg (GET_MODE (op), op);
6316 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6317 {
6318 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6319 if (x)
6320 return x;
6321 }
6322
6323 x = gen_reg_rtx (mode);
6324 emit_insn (gen_spu_convert (x, r));
6325 return x;
6326 }
6327
6328 static void
6329 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6330 {
6331 HOST_WIDE_INT v = 0;
6332 int lsbits;
6333 /* Check the range of immediate operands. */
6334 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6335 {
6336 int range = p - SPU_BTI_7;
6337
6338 if (!CONSTANT_P (op))
6339 error ("%s expects an integer literal in the range [%d, %d]",
6340 d->name,
6341 spu_builtin_range[range].low, spu_builtin_range[range].high);
6342
6343 if (GET_CODE (op) == CONST
6344 && (GET_CODE (XEXP (op, 0)) == PLUS
6345 || GET_CODE (XEXP (op, 0)) == MINUS))
6346 {
6347 v = INTVAL (XEXP (XEXP (op, 0), 1));
6348 op = XEXP (XEXP (op, 0), 0);
6349 }
6350 else if (GET_CODE (op) == CONST_INT)
6351 v = INTVAL (op);
6352 else if (GET_CODE (op) == CONST_VECTOR
6353 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6354 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6355
6356 /* The default for v is 0 which is valid in every range. */
6357 if (v < spu_builtin_range[range].low
6358 || v > spu_builtin_range[range].high)
6359 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6360 d->name,
6361 spu_builtin_range[range].low, spu_builtin_range[range].high,
6362 v);
6363
6364 switch (p)
6365 {
6366 case SPU_BTI_S10_4:
6367 lsbits = 4;
6368 break;
6369 case SPU_BTI_U16_2:
6370 /* This is only used in lqa, and stqa. Even though the insns
6371 encode 16 bits of the address (all but the 2 least
6372 significant), only 14 bits are used because it is masked to
6373 be 16 byte aligned. */
6374 lsbits = 4;
6375 break;
6376 case SPU_BTI_S16_2:
6377 /* This is used for lqr and stqr. */
6378 lsbits = 2;
6379 break;
6380 default:
6381 lsbits = 0;
6382 }
6383
6384 if (GET_CODE (op) == LABEL_REF
6385 || (GET_CODE (op) == SYMBOL_REF
6386 && SYMBOL_REF_FUNCTION_P (op))
6387 || (v & ((1 << lsbits) - 1)) != 0)
6388 warning (0, "%d least significant bits of %s are ignored", lsbits,
6389 d->name);
6390 }
6391 }
6392
6393
6394 static int
6395 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6396 rtx target, rtx ops[])
6397 {
6398 enum insn_code icode = (enum insn_code) d->icode;
6399 int i = 0, a;
6400
6401 /* Expand the arguments into rtl. */
6402
6403 if (d->parm[0] != SPU_BTI_VOID)
6404 ops[i++] = target;
6405
6406 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6407 {
6408 tree arg = CALL_EXPR_ARG (exp, a);
6409 if (arg == 0)
6410 abort ();
6411 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6412 }
6413
6414 gcc_assert (i == insn_data[icode].n_generator_args);
6415 return i;
6416 }
6417
6418 static rtx
6419 spu_expand_builtin_1 (struct spu_builtin_description *d,
6420 tree exp, rtx target)
6421 {
6422 rtx pat;
6423 rtx ops[8];
6424 enum insn_code icode = (enum insn_code) d->icode;
6425 enum machine_mode mode, tmode;
6426 int i, p;
6427 int n_operands;
6428 tree return_type;
6429
6430 /* Set up ops[] with values from arglist. */
6431 n_operands = expand_builtin_args (d, exp, target, ops);
6432
6433 /* Handle the target operand which must be operand 0. */
6434 i = 0;
6435 if (d->parm[0] != SPU_BTI_VOID)
6436 {
6437
6438 /* We prefer the mode specified for the match_operand otherwise
6439 use the mode from the builtin function prototype. */
6440 tmode = insn_data[d->icode].operand[0].mode;
6441 if (tmode == VOIDmode)
6442 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6443
6444 /* Try to use target because not using it can lead to extra copies
6445 and when we are using all of the registers extra copies leads
6446 to extra spills. */
6447 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6448 ops[0] = target;
6449 else
6450 target = ops[0] = gen_reg_rtx (tmode);
6451
6452 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6453 abort ();
6454
6455 i++;
6456 }
6457
6458 if (d->fcode == SPU_MASK_FOR_LOAD)
6459 {
6460 enum machine_mode mode = insn_data[icode].operand[1].mode;
6461 tree arg;
6462 rtx addr, op, pat;
6463
6464 /* get addr */
6465 arg = CALL_EXPR_ARG (exp, 0);
6466 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6467 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6468 addr = memory_address (mode, op);
6469
6470 /* negate addr */
6471 op = gen_reg_rtx (GET_MODE (addr));
6472 emit_insn (gen_rtx_SET (VOIDmode, op,
6473 gen_rtx_NEG (GET_MODE (addr), addr)));
6474 op = gen_rtx_MEM (mode, op);
6475
6476 pat = GEN_FCN (icode) (target, op);
6477 if (!pat)
6478 return 0;
6479 emit_insn (pat);
6480 return target;
6481 }
6482
6483 /* Ignore align_hint, but still expand it's args in case they have
6484 side effects. */
6485 if (icode == CODE_FOR_spu_align_hint)
6486 return 0;
6487
6488 /* Handle the rest of the operands. */
6489 for (p = 1; i < n_operands; i++, p++)
6490 {
6491 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6492 mode = insn_data[d->icode].operand[i].mode;
6493 else
6494 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6495
6496 /* mode can be VOIDmode here for labels */
6497
6498 /* For specific intrinsics with an immediate operand, e.g.,
6499 si_ai(), we sometimes need to convert the scalar argument to a
6500 vector argument by splatting the scalar. */
6501 if (VECTOR_MODE_P (mode)
6502 && (GET_CODE (ops[i]) == CONST_INT
6503 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6504 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6505 {
6506 if (GET_CODE (ops[i]) == CONST_INT)
6507 ops[i] = spu_const (mode, INTVAL (ops[i]));
6508 else
6509 {
6510 rtx reg = gen_reg_rtx (mode);
6511 enum machine_mode imode = GET_MODE_INNER (mode);
6512 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6513 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6514 if (imode != GET_MODE (ops[i]))
6515 ops[i] = convert_to_mode (imode, ops[i],
6516 TYPE_UNSIGNED (spu_builtin_types
6517 [d->parm[i]]));
6518 emit_insn (gen_spu_splats (reg, ops[i]));
6519 ops[i] = reg;
6520 }
6521 }
6522
6523 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6524
6525 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6526 ops[i] = spu_force_reg (mode, ops[i]);
6527 }
6528
6529 switch (n_operands)
6530 {
6531 case 0:
6532 pat = GEN_FCN (icode) (0);
6533 break;
6534 case 1:
6535 pat = GEN_FCN (icode) (ops[0]);
6536 break;
6537 case 2:
6538 pat = GEN_FCN (icode) (ops[0], ops[1]);
6539 break;
6540 case 3:
6541 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6542 break;
6543 case 4:
6544 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6545 break;
6546 case 5:
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6548 break;
6549 case 6:
6550 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6551 break;
6552 default:
6553 abort ();
6554 }
6555
6556 if (!pat)
6557 abort ();
6558
6559 if (d->type == B_CALL || d->type == B_BISLED)
6560 emit_call_insn (pat);
6561 else if (d->type == B_JUMP)
6562 {
6563 emit_jump_insn (pat);
6564 emit_barrier ();
6565 }
6566 else
6567 emit_insn (pat);
6568
6569 return_type = spu_builtin_types[d->parm[0]];
6570 if (d->parm[0] != SPU_BTI_VOID
6571 && GET_MODE (target) != TYPE_MODE (return_type))
6572 {
6573 /* target is the return value. It should always be the mode of
6574 the builtin function prototype. */
6575 target = spu_force_reg (TYPE_MODE (return_type), target);
6576 }
6577
6578 return target;
6579 }
6580
6581 rtx
6582 spu_expand_builtin (tree exp,
6583 rtx target,
6584 rtx subtarget ATTRIBUTE_UNUSED,
6585 enum machine_mode mode ATTRIBUTE_UNUSED,
6586 int ignore ATTRIBUTE_UNUSED)
6587 {
6588 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6589 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6590 struct spu_builtin_description *d;
6591
6592 if (fcode < NUM_SPU_BUILTINS)
6593 {
6594 d = &spu_builtins[fcode];
6595
6596 return spu_expand_builtin_1 (d, exp, target);
6597 }
6598 abort ();
6599 }
6600
6601 /* Implement targetm.vectorize.builtin_mask_for_load. */
6602 static tree
6603 spu_builtin_mask_for_load (void)
6604 {
6605 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6606 }
6607
6608 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6609 static int
6610 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6611 tree vectype,
6612 int misalign ATTRIBUTE_UNUSED)
6613 {
6614 unsigned elements;
6615
6616 switch (type_of_cost)
6617 {
6618 case scalar_stmt:
6619 case vector_stmt:
6620 case vector_load:
6621 case vector_store:
6622 case vec_to_scalar:
6623 case scalar_to_vec:
6624 case cond_branch_not_taken:
6625 case vec_perm:
6626 case vec_promote_demote:
6627 return 1;
6628
6629 case scalar_store:
6630 return 10;
6631
6632 case scalar_load:
6633 /* Load + rotate. */
6634 return 2;
6635
6636 case unaligned_load:
6637 return 2;
6638
6639 case cond_branch_taken:
6640 return 6;
6641
6642 case vec_construct:
6643 elements = TYPE_VECTOR_SUBPARTS (vectype);
6644 return elements / 2 + 1;
6645
6646 default:
6647 gcc_unreachable ();
6648 }
6649 }
6650
6651 /* Implement targetm.vectorize.init_cost. */
6652
6653 static void *
6654 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6655 {
6656 unsigned *cost = XNEWVEC (unsigned, 3);
6657 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6658 return cost;
6659 }
6660
6661 /* Implement targetm.vectorize.add_stmt_cost. */
6662
6663 static unsigned
6664 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6665 struct _stmt_vec_info *stmt_info, int misalign,
6666 enum vect_cost_model_location where)
6667 {
6668 unsigned *cost = (unsigned *) data;
6669 unsigned retval = 0;
6670
6671 if (flag_vect_cost_model)
6672 {
6673 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6674 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6675
6676 /* Statements in an inner loop relative to the loop being
6677 vectorized are weighted more heavily. The value here is
6678 arbitrary and could potentially be improved with analysis. */
6679 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6680 count *= 50; /* FIXME. */
6681
6682 retval = (unsigned) (count * stmt_cost);
6683 cost[where] += retval;
6684 }
6685
6686 return retval;
6687 }
6688
6689 /* Implement targetm.vectorize.finish_cost. */
6690
6691 static void
6692 spu_finish_cost (void *data, unsigned *prologue_cost,
6693 unsigned *body_cost, unsigned *epilogue_cost)
6694 {
6695 unsigned *cost = (unsigned *) data;
6696 *prologue_cost = cost[vect_prologue];
6697 *body_cost = cost[vect_body];
6698 *epilogue_cost = cost[vect_epilogue];
6699 }
6700
6701 /* Implement targetm.vectorize.destroy_cost_data. */
6702
6703 static void
6704 spu_destroy_cost_data (void *data)
6705 {
6706 free (data);
6707 }
6708
6709 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6710 after applying N number of iterations. This routine does not determine
6711 how may iterations are required to reach desired alignment. */
6712
6713 static bool
6714 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6715 {
6716 if (is_packed)
6717 return false;
6718
6719 /* All other types are naturally aligned. */
6720 return true;
6721 }
6722
6723 /* Return the appropriate mode for a named address pointer. */
6724 static enum machine_mode
6725 spu_addr_space_pointer_mode (addr_space_t addrspace)
6726 {
6727 switch (addrspace)
6728 {
6729 case ADDR_SPACE_GENERIC:
6730 return ptr_mode;
6731 case ADDR_SPACE_EA:
6732 return EAmode;
6733 default:
6734 gcc_unreachable ();
6735 }
6736 }
6737
6738 /* Return the appropriate mode for a named address address. */
6739 static enum machine_mode
6740 spu_addr_space_address_mode (addr_space_t addrspace)
6741 {
6742 switch (addrspace)
6743 {
6744 case ADDR_SPACE_GENERIC:
6745 return Pmode;
6746 case ADDR_SPACE_EA:
6747 return EAmode;
6748 default:
6749 gcc_unreachable ();
6750 }
6751 }
6752
6753 /* Determine if one named address space is a subset of another. */
6754
6755 static bool
6756 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6757 {
6758 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6759 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6760
6761 if (subset == superset)
6762 return true;
6763
6764 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6765 being subsets but instead as disjoint address spaces. */
6766 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6767 return false;
6768
6769 else
6770 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6771 }
6772
6773 /* Convert from one address space to another. */
6774 static rtx
6775 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6776 {
6777 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6778 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6779
6780 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6781 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6782
6783 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6784 {
6785 rtx result, ls;
6786
6787 ls = gen_const_mem (DImode,
6788 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6789 set_mem_align (ls, 128);
6790
6791 result = gen_reg_rtx (Pmode);
6792 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6793 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6794 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6795 ls, const0_rtx, Pmode, 1);
6796
6797 emit_insn (gen_subsi3 (result, op, ls));
6798
6799 return result;
6800 }
6801
6802 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6803 {
6804 rtx result, ls;
6805
6806 ls = gen_const_mem (DImode,
6807 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6808 set_mem_align (ls, 128);
6809
6810 result = gen_reg_rtx (EAmode);
6811 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6812 op = force_reg (Pmode, op);
6813 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6814 ls, const0_rtx, EAmode, 1);
6815 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6816
6817 if (EAmode == SImode)
6818 emit_insn (gen_addsi3 (result, op, ls));
6819 else
6820 emit_insn (gen_adddi3 (result, op, ls));
6821
6822 return result;
6823 }
6824
6825 else
6826 gcc_unreachable ();
6827 }
6828
6829
6830 /* Count the total number of instructions in each pipe and return the
6831 maximum, which is used as the Minimum Iteration Interval (MII)
6832 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6833 -2 are instructions that can go in pipe0 or pipe1. */
6834 static int
6835 spu_sms_res_mii (struct ddg *g)
6836 {
6837 int i;
6838 unsigned t[4] = {0, 0, 0, 0};
6839
6840 for (i = 0; i < g->num_nodes; i++)
6841 {
6842 rtx insn = g->nodes[i].insn;
6843 int p = get_pipe (insn) + 2;
6844
6845 gcc_assert (p >= 0);
6846 gcc_assert (p < 4);
6847
6848 t[p]++;
6849 if (dump_file && INSN_P (insn))
6850 fprintf (dump_file, "i%d %s %d %d\n",
6851 INSN_UID (insn),
6852 insn_data[INSN_CODE(insn)].name,
6853 p, t[p]);
6854 }
6855 if (dump_file)
6856 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6857
6858 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6859 }
6860
6861
6862 void
6863 spu_init_expanders (void)
6864 {
6865 if (cfun)
6866 {
6867 rtx r0, r1;
6868 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6869 frame_pointer_needed is true. We don't know that until we're
6870 expanding the prologue. */
6871 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6872
6873 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6874 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6875 to be treated as aligned, so generate them here. */
6876 r0 = gen_reg_rtx (SImode);
6877 r1 = gen_reg_rtx (SImode);
6878 mark_reg_pointer (r0, 128);
6879 mark_reg_pointer (r1, 128);
6880 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6881 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6882 }
6883 }
6884
6885 static enum machine_mode
6886 spu_libgcc_cmp_return_mode (void)
6887 {
6888
6889 /* For SPU word mode is TI mode so it is better to use SImode
6890 for compare returns. */
6891 return SImode;
6892 }
6893
6894 static enum machine_mode
6895 spu_libgcc_shift_count_mode (void)
6896 {
6897 /* For SPU word mode is TI mode so it is better to use SImode
6898 for shift counts. */
6899 return SImode;
6900 }
6901
6902 /* Implement targetm.section_type_flags. */
6903 static unsigned int
6904 spu_section_type_flags (tree decl, const char *name, int reloc)
6905 {
6906 /* .toe needs to have type @nobits. */
6907 if (strcmp (name, ".toe") == 0)
6908 return SECTION_BSS;
6909 /* Don't load _ea into the current address space. */
6910 if (strcmp (name, "._ea") == 0)
6911 return SECTION_WRITE | SECTION_DEBUG;
6912 return default_section_type_flags (decl, name, reloc);
6913 }
6914
6915 /* Implement targetm.select_section. */
6916 static section *
6917 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6918 {
6919 /* Variables and constants defined in the __ea address space
6920 go into a special section named "._ea". */
6921 if (TREE_TYPE (decl) != error_mark_node
6922 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6923 {
6924 /* We might get called with string constants, but get_named_section
6925 doesn't like them as they are not DECLs. Also, we need to set
6926 flags in that case. */
6927 if (!DECL_P (decl))
6928 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6929
6930 return get_named_section (decl, "._ea", reloc);
6931 }
6932
6933 return default_elf_select_section (decl, reloc, align);
6934 }
6935
6936 /* Implement targetm.unique_section. */
6937 static void
6938 spu_unique_section (tree decl, int reloc)
6939 {
6940 /* We don't support unique section names in the __ea address
6941 space for now. */
6942 if (TREE_TYPE (decl) != error_mark_node
6943 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6944 return;
6945
6946 default_unique_section (decl, reloc);
6947 }
6948
6949 /* Generate a constant or register which contains 2^SCALE. We assume
6950 the result is valid for MODE. Currently, MODE must be V4SFmode and
6951 SCALE must be SImode. */
6952 rtx
6953 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6954 {
6955 gcc_assert (mode == V4SFmode);
6956 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6957 if (GET_CODE (scale) != CONST_INT)
6958 {
6959 /* unsigned int exp = (127 + scale) << 23;
6960 __vector float m = (__vector float) spu_splats (exp); */
6961 rtx reg = force_reg (SImode, scale);
6962 rtx exp = gen_reg_rtx (SImode);
6963 rtx mul = gen_reg_rtx (mode);
6964 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6965 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6966 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6967 return mul;
6968 }
6969 else
6970 {
6971 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6972 unsigned char arr[16];
6973 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6974 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6975 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6976 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6977 return array_to_constant (mode, arr);
6978 }
6979 }
6980
6981 /* After reload, just change the convert into a move instruction
6982 or a dead instruction. */
6983 void
6984 spu_split_convert (rtx ops[])
6985 {
6986 if (REGNO (ops[0]) == REGNO (ops[1]))
6987 emit_note (NOTE_INSN_DELETED);
6988 else
6989 {
6990 /* Use TImode always as this might help hard reg copyprop. */
6991 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6992 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6993 emit_insn (gen_move_insn (op0, op1));
6994 }
6995 }
6996
6997 void
6998 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6999 {
7000 fprintf (file, "# profile\n");
7001 fprintf (file, "brsl $75, _mcount\n");
7002 }
7003
7004 /* Implement targetm.ref_may_alias_errno. */
7005 static bool
7006 spu_ref_may_alias_errno (ao_ref *ref)
7007 {
7008 tree base = ao_ref_base (ref);
7009
7010 /* With SPU newlib, errno is defined as something like
7011 _impure_data._errno
7012 The default implementation of this target macro does not
7013 recognize such expressions, so special-code for it here. */
7014
7015 if (TREE_CODE (base) == VAR_DECL
7016 && !TREE_STATIC (base)
7017 && DECL_EXTERNAL (base)
7018 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7019 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7020 "_impure_data") == 0
7021 /* _errno is the first member of _impure_data. */
7022 && ref->offset == 0)
7023 return true;
7024
7025 return default_ref_may_alias_errno (ref);
7026 }
7027
7028 /* Output thunk to FILE that implements a C++ virtual function call (with
7029 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7030 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7031 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7032 relative to the resulting this pointer. */
7033
7034 static void
7035 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7036 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7037 tree function)
7038 {
7039 rtx op[8];
7040
7041 /* Make sure unwind info is emitted for the thunk if needed. */
7042 final_start_function (emit_barrier (), file, 1);
7043
7044 /* Operand 0 is the target function. */
7045 op[0] = XEXP (DECL_RTL (function), 0);
7046
7047 /* Operand 1 is the 'this' pointer. */
7048 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7049 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7050 else
7051 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7052
7053 /* Operands 2/3 are the low/high halfwords of delta. */
7054 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7055 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7056
7057 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7058 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7059 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7060
7061 /* Operands 6/7 are temporary registers. */
7062 op[6] = gen_rtx_REG (Pmode, 79);
7063 op[7] = gen_rtx_REG (Pmode, 78);
7064
7065 /* Add DELTA to this pointer. */
7066 if (delta)
7067 {
7068 if (delta >= -0x200 && delta < 0x200)
7069 output_asm_insn ("ai\t%1,%1,%2", op);
7070 else if (delta >= -0x8000 && delta < 0x8000)
7071 {
7072 output_asm_insn ("il\t%6,%2", op);
7073 output_asm_insn ("a\t%1,%1,%6", op);
7074 }
7075 else
7076 {
7077 output_asm_insn ("ilhu\t%6,%3", op);
7078 output_asm_insn ("iohl\t%6,%2", op);
7079 output_asm_insn ("a\t%1,%1,%6", op);
7080 }
7081 }
7082
7083 /* Perform vcall adjustment. */
7084 if (vcall_offset)
7085 {
7086 output_asm_insn ("lqd\t%7,0(%1)", op);
7087 output_asm_insn ("rotqby\t%7,%7,%1", op);
7088
7089 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7090 output_asm_insn ("ai\t%7,%7,%4", op);
7091 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7092 {
7093 output_asm_insn ("il\t%6,%4", op);
7094 output_asm_insn ("a\t%7,%7,%6", op);
7095 }
7096 else
7097 {
7098 output_asm_insn ("ilhu\t%6,%5", op);
7099 output_asm_insn ("iohl\t%6,%4", op);
7100 output_asm_insn ("a\t%7,%7,%6", op);
7101 }
7102
7103 output_asm_insn ("lqd\t%6,0(%7)", op);
7104 output_asm_insn ("rotqby\t%6,%6,%7", op);
7105 output_asm_insn ("a\t%1,%1,%6", op);
7106 }
7107
7108 /* Jump to target. */
7109 output_asm_insn ("br\t%0", op);
7110
7111 final_end_function ();
7112 }
7113
7114 /* Canonicalize a comparison from one we don't have to one we do have. */
7115 static void
7116 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7117 bool op0_preserve_value)
7118 {
7119 if (!op0_preserve_value
7120 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7121 {
7122 rtx tem = *op0;
7123 *op0 = *op1;
7124 *op1 = tem;
7125 *code = (int)swap_condition ((enum rtx_code)*code);
7126 }
7127 }
7128 \f
7129 /* Table of machine attributes. */
7130 static const struct attribute_spec spu_attribute_table[] =
7131 {
7132 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7133 affects_type_identity } */
7134 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7135 false },
7136 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7137 false },
7138 { NULL, 0, 0, false, false, false, NULL, false }
7139 };
7140
7141 /* TARGET overrides. */
7142
7143 #undef TARGET_ADDR_SPACE_POINTER_MODE
7144 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7145
7146 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7147 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7148
7149 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7150 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7151 spu_addr_space_legitimate_address_p
7152
7153 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7154 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7155
7156 #undef TARGET_ADDR_SPACE_SUBSET_P
7157 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7158
7159 #undef TARGET_ADDR_SPACE_CONVERT
7160 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7161
7162 #undef TARGET_INIT_BUILTINS
7163 #define TARGET_INIT_BUILTINS spu_init_builtins
7164 #undef TARGET_BUILTIN_DECL
7165 #define TARGET_BUILTIN_DECL spu_builtin_decl
7166
7167 #undef TARGET_EXPAND_BUILTIN
7168 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7169
7170 #undef TARGET_UNWIND_WORD_MODE
7171 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7172
7173 #undef TARGET_LEGITIMIZE_ADDRESS
7174 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7175
7176 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7177 and .quad for the debugger. When it is known that the assembler is fixed,
7178 these can be removed. */
7179 #undef TARGET_ASM_UNALIGNED_SI_OP
7180 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7181
7182 #undef TARGET_ASM_ALIGNED_DI_OP
7183 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7184
7185 /* The .8byte directive doesn't seem to work well for a 32 bit
7186 architecture. */
7187 #undef TARGET_ASM_UNALIGNED_DI_OP
7188 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7189
7190 #undef TARGET_RTX_COSTS
7191 #define TARGET_RTX_COSTS spu_rtx_costs
7192
7193 #undef TARGET_ADDRESS_COST
7194 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7195
7196 #undef TARGET_SCHED_ISSUE_RATE
7197 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7198
7199 #undef TARGET_SCHED_INIT_GLOBAL
7200 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7201
7202 #undef TARGET_SCHED_INIT
7203 #define TARGET_SCHED_INIT spu_sched_init
7204
7205 #undef TARGET_SCHED_VARIABLE_ISSUE
7206 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7207
7208 #undef TARGET_SCHED_REORDER
7209 #define TARGET_SCHED_REORDER spu_sched_reorder
7210
7211 #undef TARGET_SCHED_REORDER2
7212 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7213
7214 #undef TARGET_SCHED_ADJUST_COST
7215 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7216
7217 #undef TARGET_ATTRIBUTE_TABLE
7218 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7219
7220 #undef TARGET_ASM_INTEGER
7221 #define TARGET_ASM_INTEGER spu_assemble_integer
7222
7223 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7224 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7225
7226 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7227 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7228
7229 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7230 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7231
7232 #undef TARGET_ASM_GLOBALIZE_LABEL
7233 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7234
7235 #undef TARGET_PASS_BY_REFERENCE
7236 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7237
7238 #undef TARGET_FUNCTION_ARG
7239 #define TARGET_FUNCTION_ARG spu_function_arg
7240
7241 #undef TARGET_FUNCTION_ARG_ADVANCE
7242 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7243
7244 #undef TARGET_MUST_PASS_IN_STACK
7245 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7246
7247 #undef TARGET_BUILD_BUILTIN_VA_LIST
7248 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7249
7250 #undef TARGET_EXPAND_BUILTIN_VA_START
7251 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7252
7253 #undef TARGET_SETUP_INCOMING_VARARGS
7254 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7255
7256 #undef TARGET_MACHINE_DEPENDENT_REORG
7257 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7258
7259 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7260 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7261
7262 #undef TARGET_INIT_LIBFUNCS
7263 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7264
7265 #undef TARGET_RETURN_IN_MEMORY
7266 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7267
7268 #undef TARGET_ENCODE_SECTION_INFO
7269 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7270
7271 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7272 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7273
7274 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7275 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7276
7277 #undef TARGET_VECTORIZE_INIT_COST
7278 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7279
7280 #undef TARGET_VECTORIZE_ADD_STMT_COST
7281 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7282
7283 #undef TARGET_VECTORIZE_FINISH_COST
7284 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7285
7286 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7287 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7288
7289 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7290 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7291
7292 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7293 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7294
7295 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7296 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7297
7298 #undef TARGET_SCHED_SMS_RES_MII
7299 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7300
7301 #undef TARGET_SECTION_TYPE_FLAGS
7302 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7303
7304 #undef TARGET_ASM_SELECT_SECTION
7305 #define TARGET_ASM_SELECT_SECTION spu_select_section
7306
7307 #undef TARGET_ASM_UNIQUE_SECTION
7308 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7309
7310 #undef TARGET_LEGITIMATE_ADDRESS_P
7311 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7312
7313 #undef TARGET_LEGITIMATE_CONSTANT_P
7314 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7315
7316 #undef TARGET_TRAMPOLINE_INIT
7317 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7318
7319 #undef TARGET_WARN_FUNC_RETURN
7320 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7321
7322 #undef TARGET_OPTION_OVERRIDE
7323 #define TARGET_OPTION_OVERRIDE spu_option_override
7324
7325 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7326 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7327
7328 #undef TARGET_REF_MAY_ALIAS_ERRNO
7329 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7330
7331 #undef TARGET_ASM_OUTPUT_MI_THUNK
7332 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7333 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7334 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7335
7336 /* Variable tracking should be run after all optimizations which
7337 change order of insns. It also needs a valid CFG. */
7338 #undef TARGET_DELAY_VARTRACK
7339 #define TARGET_DELAY_VARTRACK true
7340
7341 #undef TARGET_CANONICALIZE_COMPARISON
7342 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7343
7344 #undef TARGET_CAN_USE_DOLOOP_P
7345 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7346
7347 struct gcc_target targetm = TARGET_INITIALIZER;
7348
7349 #include "gt-spu.h"