]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
Remove remaining uses of CONST_DOUBLE_FROM_REAL_VALUE
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "cfghooks.h"
22 #include "tree.h"
23 #include "gimple.h"
24 #include "rtl.h"
25 #include "df.h"
26 #include "regs.h"
27 #include "insn-config.h"
28 #include "conditions.h"
29 #include "insn-attr.h"
30 #include "flags.h"
31 #include "recog.h"
32 #include "alias.h"
33 #include "fold-const.h"
34 #include "stringpool.h"
35 #include "stor-layout.h"
36 #include "calls.h"
37 #include "varasm.h"
38 #include "expmed.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "emit-rtl.h"
42 #include "stmt.h"
43 #include "expr.h"
44 #include "insn-codes.h"
45 #include "optabs.h"
46 #include "except.h"
47 #include "output.h"
48 #include "cfgrtl.h"
49 #include "cfganal.h"
50 #include "lcm.h"
51 #include "cfgbuild.h"
52 #include "cfgcleanup.h"
53 #include "diagnostic-core.h"
54 #include "tm_p.h"
55 #include "target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "params.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimplify.h"
64 #include "tm-constrs.h"
65 #include "ddg.h"
66 #include "timevar.h"
67 #include "dumpfile.h"
68 #include "cfgloop.h"
69 #include "builtins.h"
70 #include "rtl-iter.h"
71
72 /* This file should be included last. */
73 #include "target-def.h"
74
75 /* Builtin types, data and prototypes. */
76
77 enum spu_builtin_type_index
78 {
79 SPU_BTI_END_OF_PARAMS,
80
81 /* We create new type nodes for these. */
82 SPU_BTI_V16QI,
83 SPU_BTI_V8HI,
84 SPU_BTI_V4SI,
85 SPU_BTI_V2DI,
86 SPU_BTI_V4SF,
87 SPU_BTI_V2DF,
88 SPU_BTI_UV16QI,
89 SPU_BTI_UV8HI,
90 SPU_BTI_UV4SI,
91 SPU_BTI_UV2DI,
92
93 /* A 16-byte type. (Implemented with V16QI_type_node) */
94 SPU_BTI_QUADWORD,
95
96 /* These all correspond to intSI_type_node */
97 SPU_BTI_7,
98 SPU_BTI_S7,
99 SPU_BTI_U7,
100 SPU_BTI_S10,
101 SPU_BTI_S10_4,
102 SPU_BTI_U14,
103 SPU_BTI_16,
104 SPU_BTI_S16,
105 SPU_BTI_S16_2,
106 SPU_BTI_U16,
107 SPU_BTI_U16_2,
108 SPU_BTI_U18,
109
110 /* These correspond to the standard types */
111 SPU_BTI_INTQI,
112 SPU_BTI_INTHI,
113 SPU_BTI_INTSI,
114 SPU_BTI_INTDI,
115
116 SPU_BTI_UINTQI,
117 SPU_BTI_UINTHI,
118 SPU_BTI_UINTSI,
119 SPU_BTI_UINTDI,
120
121 SPU_BTI_FLOAT,
122 SPU_BTI_DOUBLE,
123
124 SPU_BTI_VOID,
125 SPU_BTI_PTR,
126
127 SPU_BTI_MAX
128 };
129
130 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
131 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
132 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
133 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
134 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
135 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
136 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
137 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
138 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
139 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
140
141 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
142
143 struct spu_builtin_range
144 {
145 int low, high;
146 };
147
148 static struct spu_builtin_range spu_builtin_range[] = {
149 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
150 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
151 {0ll, 0x7fll}, /* SPU_BTI_U7 */
152 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
153 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
154 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
155 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
156 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
157 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
158 {0ll, 0xffffll}, /* SPU_BTI_U16 */
159 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
160 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
161 };
162
163 \f
164 /* Target specific attribute specifications. */
165 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
166
167 /* Prototypes and external defs. */
168 static int get_pipe (rtx_insn *insn);
169 static int spu_naked_function_p (tree func);
170 static int mem_is_padded_component_ref (rtx x);
171 static void fix_range (const char *);
172 static rtx spu_expand_load (rtx, rtx, rtx, int);
173
174 /* Which instruction set architecture to use. */
175 int spu_arch;
176 /* Which cpu are we tuning for. */
177 int spu_tune;
178
179 /* The hardware requires 8 insns between a hint and the branch it
180 effects. This variable describes how many rtl instructions the
181 compiler needs to see before inserting a hint, and then the compiler
182 will insert enough nops to make it at least 8 insns. The default is
183 for the compiler to allow up to 2 nops be emitted. The nops are
184 inserted in pairs, so we round down. */
185 int spu_hint_dist = (8*4) - (2*4);
186
187 enum spu_immediate {
188 SPU_NONE,
189 SPU_IL,
190 SPU_ILA,
191 SPU_ILH,
192 SPU_ILHU,
193 SPU_ORI,
194 SPU_ORHI,
195 SPU_ORBI,
196 SPU_IOHL
197 };
198 enum immediate_class
199 {
200 IC_POOL, /* constant pool */
201 IC_IL1, /* one il* instruction */
202 IC_IL2, /* both ilhu and iohl instructions */
203 IC_IL1s, /* one il* instruction */
204 IC_IL2s, /* both ilhu and iohl instructions */
205 IC_FSMBI, /* the fsmbi instruction */
206 IC_CPAT, /* one of the c*d instructions */
207 IC_FSMBI2 /* fsmbi plus 1 other instruction */
208 };
209
210 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
211 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
212 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
213 static enum immediate_class classify_immediate (rtx op,
214 machine_mode mode);
215
216 /* Pointer mode for __ea references. */
217 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
218
219 \f
220 /* Define the structure for the machine field in struct function. */
221 struct GTY(()) machine_function
222 {
223 /* Register to use for PIC accesses. */
224 rtx pic_reg;
225 };
226
227 /* How to allocate a 'struct machine_function'. */
228 static struct machine_function *
229 spu_init_machine_status (void)
230 {
231 return ggc_cleared_alloc<machine_function> ();
232 }
233
234 /* Implement TARGET_OPTION_OVERRIDE. */
235 static void
236 spu_option_override (void)
237 {
238 /* Set up function hooks. */
239 init_machine_status = spu_init_machine_status;
240
241 /* Small loops will be unpeeled at -O3. For SPU it is more important
242 to keep code small by default. */
243 if (!flag_unroll_loops && !flag_peel_loops)
244 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
245 global_options.x_param_values,
246 global_options_set.x_param_values);
247
248 flag_omit_frame_pointer = 1;
249
250 /* Functions must be 8 byte aligned so we correctly handle dual issue */
251 if (align_functions < 8)
252 align_functions = 8;
253
254 spu_hint_dist = 8*4 - spu_max_nops*4;
255 if (spu_hint_dist < 0)
256 spu_hint_dist = 0;
257
258 if (spu_fixed_range_string)
259 fix_range (spu_fixed_range_string);
260
261 /* Determine processor architectural level. */
262 if (spu_arch_string)
263 {
264 if (strcmp (&spu_arch_string[0], "cell") == 0)
265 spu_arch = PROCESSOR_CELL;
266 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
267 spu_arch = PROCESSOR_CELLEDP;
268 else
269 error ("bad value (%s) for -march= switch", spu_arch_string);
270 }
271
272 /* Determine processor to tune for. */
273 if (spu_tune_string)
274 {
275 if (strcmp (&spu_tune_string[0], "cell") == 0)
276 spu_tune = PROCESSOR_CELL;
277 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
278 spu_tune = PROCESSOR_CELLEDP;
279 else
280 error ("bad value (%s) for -mtune= switch", spu_tune_string);
281 }
282
283 /* Change defaults according to the processor architecture. */
284 if (spu_arch == PROCESSOR_CELLEDP)
285 {
286 /* If no command line option has been otherwise specified, change
287 the default to -mno-safe-hints on celledp -- only the original
288 Cell/B.E. processors require this workaround. */
289 if (!(target_flags_explicit & MASK_SAFE_HINTS))
290 target_flags &= ~MASK_SAFE_HINTS;
291 }
292
293 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
294 }
295 \f
296 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
297 struct attribute_spec.handler. */
298
299 /* True if MODE is valid for the target. By "valid", we mean able to
300 be manipulated in non-trivial ways. In particular, this means all
301 the arithmetic is supported. */
302 static bool
303 spu_scalar_mode_supported_p (machine_mode mode)
304 {
305 switch (mode)
306 {
307 case QImode:
308 case HImode:
309 case SImode:
310 case SFmode:
311 case DImode:
312 case TImode:
313 case DFmode:
314 return true;
315
316 default:
317 return false;
318 }
319 }
320
321 /* Similarly for vector modes. "Supported" here is less strict. At
322 least some operations are supported; need to check optabs or builtins
323 for further details. */
324 static bool
325 spu_vector_mode_supported_p (machine_mode mode)
326 {
327 switch (mode)
328 {
329 case V16QImode:
330 case V8HImode:
331 case V4SImode:
332 case V2DImode:
333 case V4SFmode:
334 case V2DFmode:
335 return true;
336
337 default:
338 return false;
339 }
340 }
341
342 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
343 least significant bytes of the outer mode. This function returns
344 TRUE for the SUBREG's where this is correct. */
345 int
346 valid_subreg (rtx op)
347 {
348 machine_mode om = GET_MODE (op);
349 machine_mode im = GET_MODE (SUBREG_REG (op));
350 return om != VOIDmode && im != VOIDmode
351 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
352 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
353 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
354 }
355
356 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
357 and adjust the start offset. */
358 static rtx
359 adjust_operand (rtx op, HOST_WIDE_INT * start)
360 {
361 machine_mode mode;
362 int op_size;
363 /* Strip any paradoxical SUBREG. */
364 if (GET_CODE (op) == SUBREG
365 && (GET_MODE_BITSIZE (GET_MODE (op))
366 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
367 {
368 if (start)
369 *start -=
370 GET_MODE_BITSIZE (GET_MODE (op)) -
371 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
372 op = SUBREG_REG (op);
373 }
374 /* If it is smaller than SI, assure a SUBREG */
375 op_size = GET_MODE_BITSIZE (GET_MODE (op));
376 if (op_size < 32)
377 {
378 if (start)
379 *start += 32 - op_size;
380 op_size = 32;
381 }
382 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
383 mode = mode_for_size (op_size, MODE_INT, 0);
384 if (mode != GET_MODE (op))
385 op = gen_rtx_SUBREG (mode, op, 0);
386 return op;
387 }
388
389 void
390 spu_expand_extv (rtx ops[], int unsignedp)
391 {
392 rtx dst = ops[0], src = ops[1];
393 HOST_WIDE_INT width = INTVAL (ops[2]);
394 HOST_WIDE_INT start = INTVAL (ops[3]);
395 HOST_WIDE_INT align_mask;
396 rtx s0, s1, mask, r0;
397
398 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
399
400 if (MEM_P (src))
401 {
402 /* First, determine if we need 1 TImode load or 2. We need only 1
403 if the bits being extracted do not cross the alignment boundary
404 as determined by the MEM and its address. */
405
406 align_mask = -MEM_ALIGN (src);
407 if ((start & align_mask) == ((start + width - 1) & align_mask))
408 {
409 /* Alignment is sufficient for 1 load. */
410 s0 = gen_reg_rtx (TImode);
411 r0 = spu_expand_load (s0, 0, src, start / 8);
412 start &= 7;
413 if (r0)
414 emit_insn (gen_rotqby_ti (s0, s0, r0));
415 }
416 else
417 {
418 /* Need 2 loads. */
419 s0 = gen_reg_rtx (TImode);
420 s1 = gen_reg_rtx (TImode);
421 r0 = spu_expand_load (s0, s1, src, start / 8);
422 start &= 7;
423
424 gcc_assert (start + width <= 128);
425 if (r0)
426 {
427 rtx r1 = gen_reg_rtx (SImode);
428 mask = gen_reg_rtx (TImode);
429 emit_move_insn (mask, GEN_INT (-1));
430 emit_insn (gen_rotqby_ti (s0, s0, r0));
431 emit_insn (gen_rotqby_ti (s1, s1, r0));
432 if (GET_CODE (r0) == CONST_INT)
433 r1 = GEN_INT (INTVAL (r0) & 15);
434 else
435 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
436 emit_insn (gen_shlqby_ti (mask, mask, r1));
437 emit_insn (gen_selb (s0, s1, s0, mask));
438 }
439 }
440
441 }
442 else if (GET_CODE (src) == SUBREG)
443 {
444 rtx r = SUBREG_REG (src);
445 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
446 s0 = gen_reg_rtx (TImode);
447 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
448 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
449 else
450 emit_move_insn (s0, src);
451 }
452 else
453 {
454 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
455 s0 = gen_reg_rtx (TImode);
456 emit_move_insn (s0, src);
457 }
458
459 /* Now s0 is TImode and contains the bits to extract at start. */
460
461 if (start)
462 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
463
464 if (128 - width)
465 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
466
467 emit_move_insn (dst, s0);
468 }
469
470 void
471 spu_expand_insv (rtx ops[])
472 {
473 HOST_WIDE_INT width = INTVAL (ops[1]);
474 HOST_WIDE_INT start = INTVAL (ops[2]);
475 unsigned HOST_WIDE_INT maskbits;
476 machine_mode dst_mode;
477 rtx dst = ops[0], src = ops[3];
478 int dst_size;
479 rtx mask;
480 rtx shift_reg;
481 int shift;
482
483
484 if (GET_CODE (ops[0]) == MEM)
485 dst = gen_reg_rtx (TImode);
486 else
487 dst = adjust_operand (dst, &start);
488 dst_mode = GET_MODE (dst);
489 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
490
491 if (CONSTANT_P (src))
492 {
493 machine_mode m =
494 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
495 src = force_reg (m, convert_to_mode (m, src, 0));
496 }
497 src = adjust_operand (src, 0);
498
499 mask = gen_reg_rtx (dst_mode);
500 shift_reg = gen_reg_rtx (dst_mode);
501 shift = dst_size - start - width;
502
503 /* It's not safe to use subreg here because the compiler assumes
504 that the SUBREG_REG is right justified in the SUBREG. */
505 convert_move (shift_reg, src, 1);
506
507 if (shift > 0)
508 {
509 switch (dst_mode)
510 {
511 case SImode:
512 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
513 break;
514 case DImode:
515 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
516 break;
517 case TImode:
518 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
519 break;
520 default:
521 abort ();
522 }
523 }
524 else if (shift < 0)
525 abort ();
526
527 switch (dst_size)
528 {
529 case 32:
530 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
531 if (start)
532 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
533 emit_move_insn (mask, GEN_INT (maskbits));
534 break;
535 case 64:
536 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
537 if (start)
538 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
539 emit_move_insn (mask, GEN_INT (maskbits));
540 break;
541 case 128:
542 {
543 unsigned char arr[16];
544 int i = start / 8;
545 memset (arr, 0, sizeof (arr));
546 arr[i] = 0xff >> (start & 7);
547 for (i++; i <= (start + width - 1) / 8; i++)
548 arr[i] = 0xff;
549 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
550 emit_move_insn (mask, array_to_constant (TImode, arr));
551 }
552 break;
553 default:
554 abort ();
555 }
556 if (GET_CODE (ops[0]) == MEM)
557 {
558 rtx low = gen_reg_rtx (SImode);
559 rtx rotl = gen_reg_rtx (SImode);
560 rtx mask0 = gen_reg_rtx (TImode);
561 rtx addr;
562 rtx addr0;
563 rtx addr1;
564 rtx mem;
565
566 addr = force_reg (Pmode, XEXP (ops[0], 0));
567 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
568 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
569 emit_insn (gen_negsi2 (rotl, low));
570 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
571 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
572 mem = change_address (ops[0], TImode, addr0);
573 set_mem_alias_set (mem, 0);
574 emit_move_insn (dst, mem);
575 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
576 if (start + width > MEM_ALIGN (ops[0]))
577 {
578 rtx shl = gen_reg_rtx (SImode);
579 rtx mask1 = gen_reg_rtx (TImode);
580 rtx dst1 = gen_reg_rtx (TImode);
581 rtx mem1;
582 addr1 = plus_constant (Pmode, addr, 16);
583 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
584 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
585 emit_insn (gen_shlqby_ti (mask1, mask, shl));
586 mem1 = change_address (ops[0], TImode, addr1);
587 set_mem_alias_set (mem1, 0);
588 emit_move_insn (dst1, mem1);
589 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
590 emit_move_insn (mem1, dst1);
591 }
592 emit_move_insn (mem, dst);
593 }
594 else
595 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
596 }
597
598
599 int
600 spu_expand_block_move (rtx ops[])
601 {
602 HOST_WIDE_INT bytes, align, offset;
603 rtx src, dst, sreg, dreg, target;
604 int i;
605 if (GET_CODE (ops[2]) != CONST_INT
606 || GET_CODE (ops[3]) != CONST_INT
607 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
608 return 0;
609
610 bytes = INTVAL (ops[2]);
611 align = INTVAL (ops[3]);
612
613 if (bytes <= 0)
614 return 1;
615
616 dst = ops[0];
617 src = ops[1];
618
619 if (align == 16)
620 {
621 for (offset = 0; offset + 16 <= bytes; offset += 16)
622 {
623 dst = adjust_address (ops[0], V16QImode, offset);
624 src = adjust_address (ops[1], V16QImode, offset);
625 emit_move_insn (dst, src);
626 }
627 if (offset < bytes)
628 {
629 rtx mask;
630 unsigned char arr[16] = { 0 };
631 for (i = 0; i < bytes - offset; i++)
632 arr[i] = 0xff;
633 dst = adjust_address (ops[0], V16QImode, offset);
634 src = adjust_address (ops[1], V16QImode, offset);
635 mask = gen_reg_rtx (V16QImode);
636 sreg = gen_reg_rtx (V16QImode);
637 dreg = gen_reg_rtx (V16QImode);
638 target = gen_reg_rtx (V16QImode);
639 emit_move_insn (mask, array_to_constant (V16QImode, arr));
640 emit_move_insn (dreg, dst);
641 emit_move_insn (sreg, src);
642 emit_insn (gen_selb (target, dreg, sreg, mask));
643 emit_move_insn (dst, target);
644 }
645 return 1;
646 }
647 return 0;
648 }
649
650 enum spu_comp_code
651 { SPU_EQ, SPU_GT, SPU_GTU };
652
653 int spu_comp_icode[12][3] = {
654 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
655 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
656 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
657 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
658 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
659 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
660 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
661 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
662 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
663 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
664 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
665 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
666 };
667
668 /* Generate a compare for CODE. Return a brand-new rtx that represents
669 the result of the compare. GCC can figure this out too if we don't
670 provide all variations of compares, but GCC always wants to use
671 WORD_MODE, we can generate better code in most cases if we do it
672 ourselves. */
673 void
674 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
675 {
676 int reverse_compare = 0;
677 int reverse_test = 0;
678 rtx compare_result, eq_result;
679 rtx comp_rtx, eq_rtx;
680 machine_mode comp_mode;
681 machine_mode op_mode;
682 enum spu_comp_code scode, eq_code;
683 enum insn_code ior_code;
684 enum rtx_code code = GET_CODE (cmp);
685 rtx op0 = XEXP (cmp, 0);
686 rtx op1 = XEXP (cmp, 1);
687 int index;
688 int eq_test = 0;
689
690 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
691 and so on, to keep the constant in operand 1. */
692 if (GET_CODE (op1) == CONST_INT)
693 {
694 HOST_WIDE_INT val = INTVAL (op1) - 1;
695 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
696 switch (code)
697 {
698 case GE:
699 op1 = GEN_INT (val);
700 code = GT;
701 break;
702 case LT:
703 op1 = GEN_INT (val);
704 code = LE;
705 break;
706 case GEU:
707 op1 = GEN_INT (val);
708 code = GTU;
709 break;
710 case LTU:
711 op1 = GEN_INT (val);
712 code = LEU;
713 break;
714 default:
715 break;
716 }
717 }
718
719 /* However, if we generate an integer result, performing a reverse test
720 would require an extra negation, so avoid that where possible. */
721 if (GET_CODE (op1) == CONST_INT && is_set == 1)
722 {
723 HOST_WIDE_INT val = INTVAL (op1) + 1;
724 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
725 switch (code)
726 {
727 case LE:
728 op1 = GEN_INT (val);
729 code = LT;
730 break;
731 case LEU:
732 op1 = GEN_INT (val);
733 code = LTU;
734 break;
735 default:
736 break;
737 }
738 }
739
740 comp_mode = SImode;
741 op_mode = GET_MODE (op0);
742
743 switch (code)
744 {
745 case GE:
746 scode = SPU_GT;
747 if (HONOR_NANS (op_mode))
748 {
749 reverse_compare = 0;
750 reverse_test = 0;
751 eq_test = 1;
752 eq_code = SPU_EQ;
753 }
754 else
755 {
756 reverse_compare = 1;
757 reverse_test = 1;
758 }
759 break;
760 case LE:
761 scode = SPU_GT;
762 if (HONOR_NANS (op_mode))
763 {
764 reverse_compare = 1;
765 reverse_test = 0;
766 eq_test = 1;
767 eq_code = SPU_EQ;
768 }
769 else
770 {
771 reverse_compare = 0;
772 reverse_test = 1;
773 }
774 break;
775 case LT:
776 reverse_compare = 1;
777 reverse_test = 0;
778 scode = SPU_GT;
779 break;
780 case GEU:
781 reverse_compare = 1;
782 reverse_test = 1;
783 scode = SPU_GTU;
784 break;
785 case LEU:
786 reverse_compare = 0;
787 reverse_test = 1;
788 scode = SPU_GTU;
789 break;
790 case LTU:
791 reverse_compare = 1;
792 reverse_test = 0;
793 scode = SPU_GTU;
794 break;
795 case NE:
796 reverse_compare = 0;
797 reverse_test = 1;
798 scode = SPU_EQ;
799 break;
800
801 case EQ:
802 scode = SPU_EQ;
803 break;
804 case GT:
805 scode = SPU_GT;
806 break;
807 case GTU:
808 scode = SPU_GTU;
809 break;
810 default:
811 scode = SPU_EQ;
812 break;
813 }
814
815 switch (op_mode)
816 {
817 case QImode:
818 index = 0;
819 comp_mode = QImode;
820 break;
821 case HImode:
822 index = 1;
823 comp_mode = HImode;
824 break;
825 case SImode:
826 index = 2;
827 break;
828 case DImode:
829 index = 3;
830 break;
831 case TImode:
832 index = 4;
833 break;
834 case SFmode:
835 index = 5;
836 break;
837 case DFmode:
838 index = 6;
839 break;
840 case V16QImode:
841 index = 7;
842 comp_mode = op_mode;
843 break;
844 case V8HImode:
845 index = 8;
846 comp_mode = op_mode;
847 break;
848 case V4SImode:
849 index = 9;
850 comp_mode = op_mode;
851 break;
852 case V4SFmode:
853 index = 10;
854 comp_mode = V4SImode;
855 break;
856 case V2DFmode:
857 index = 11;
858 comp_mode = V2DImode;
859 break;
860 case V2DImode:
861 default:
862 abort ();
863 }
864
865 if (GET_MODE (op1) == DFmode
866 && (scode != SPU_GT && scode != SPU_EQ))
867 abort ();
868
869 if (is_set == 0 && op1 == const0_rtx
870 && (GET_MODE (op0) == SImode
871 || GET_MODE (op0) == HImode
872 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
873 {
874 /* Don't need to set a register with the result when we are
875 comparing against zero and branching. */
876 reverse_test = !reverse_test;
877 compare_result = op0;
878 }
879 else
880 {
881 compare_result = gen_reg_rtx (comp_mode);
882
883 if (reverse_compare)
884 {
885 rtx t = op1;
886 op1 = op0;
887 op0 = t;
888 }
889
890 if (spu_comp_icode[index][scode] == 0)
891 abort ();
892
893 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
894 (op0, op_mode))
895 op0 = force_reg (op_mode, op0);
896 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
897 (op1, op_mode))
898 op1 = force_reg (op_mode, op1);
899 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
900 op0, op1);
901 if (comp_rtx == 0)
902 abort ();
903 emit_insn (comp_rtx);
904
905 if (eq_test)
906 {
907 eq_result = gen_reg_rtx (comp_mode);
908 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
909 op0, op1);
910 if (eq_rtx == 0)
911 abort ();
912 emit_insn (eq_rtx);
913 ior_code = optab_handler (ior_optab, comp_mode);
914 gcc_assert (ior_code != CODE_FOR_nothing);
915 emit_insn (GEN_FCN (ior_code)
916 (compare_result, compare_result, eq_result));
917 }
918 }
919
920 if (is_set == 0)
921 {
922 rtx bcomp;
923 rtx loc_ref;
924
925 /* We don't have branch on QI compare insns, so we convert the
926 QI compare result to a HI result. */
927 if (comp_mode == QImode)
928 {
929 rtx old_res = compare_result;
930 compare_result = gen_reg_rtx (HImode);
931 comp_mode = HImode;
932 emit_insn (gen_extendqihi2 (compare_result, old_res));
933 }
934
935 if (reverse_test)
936 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
937 else
938 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
939
940 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
941 emit_jump_insn (gen_rtx_SET (pc_rtx,
942 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
943 loc_ref, pc_rtx)));
944 }
945 else if (is_set == 2)
946 {
947 rtx target = operands[0];
948 int compare_size = GET_MODE_BITSIZE (comp_mode);
949 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
950 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
951 rtx select_mask;
952 rtx op_t = operands[2];
953 rtx op_f = operands[3];
954
955 /* The result of the comparison can be SI, HI or QI mode. Create a
956 mask based on that result. */
957 if (target_size > compare_size)
958 {
959 select_mask = gen_reg_rtx (mode);
960 emit_insn (gen_extend_compare (select_mask, compare_result));
961 }
962 else if (target_size < compare_size)
963 select_mask =
964 gen_rtx_SUBREG (mode, compare_result,
965 (compare_size - target_size) / BITS_PER_UNIT);
966 else if (comp_mode != mode)
967 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
968 else
969 select_mask = compare_result;
970
971 if (GET_MODE (target) != GET_MODE (op_t)
972 || GET_MODE (target) != GET_MODE (op_f))
973 abort ();
974
975 if (reverse_test)
976 emit_insn (gen_selb (target, op_t, op_f, select_mask));
977 else
978 emit_insn (gen_selb (target, op_f, op_t, select_mask));
979 }
980 else
981 {
982 rtx target = operands[0];
983 if (reverse_test)
984 emit_insn (gen_rtx_SET (compare_result,
985 gen_rtx_NOT (comp_mode, compare_result)));
986 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
987 emit_insn (gen_extendhisi2 (target, compare_result));
988 else if (GET_MODE (target) == SImode
989 && GET_MODE (compare_result) == QImode)
990 emit_insn (gen_extend_compare (target, compare_result));
991 else
992 emit_move_insn (target, compare_result);
993 }
994 }
995
996 HOST_WIDE_INT
997 const_double_to_hwint (rtx x)
998 {
999 HOST_WIDE_INT val;
1000 REAL_VALUE_TYPE rv;
1001 if (GET_MODE (x) == SFmode)
1002 {
1003 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1004 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1005 }
1006 else if (GET_MODE (x) == DFmode)
1007 {
1008 long l[2];
1009 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1010 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1011 val = l[0];
1012 val = (val << 32) | (l[1] & 0xffffffff);
1013 }
1014 else
1015 abort ();
1016 return val;
1017 }
1018
1019 rtx
1020 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1021 {
1022 long tv[2];
1023 REAL_VALUE_TYPE rv;
1024 gcc_assert (mode == SFmode || mode == DFmode);
1025
1026 if (mode == SFmode)
1027 tv[0] = (v << 32) >> 32;
1028 else if (mode == DFmode)
1029 {
1030 tv[1] = (v << 32) >> 32;
1031 tv[0] = v >> 32;
1032 }
1033 real_from_target (&rv, tv, mode);
1034 return const_double_from_real_value (rv, mode);
1035 }
1036
1037 void
1038 print_operand_address (FILE * file, register rtx addr)
1039 {
1040 rtx reg;
1041 rtx offset;
1042
1043 if (GET_CODE (addr) == AND
1044 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1045 && INTVAL (XEXP (addr, 1)) == -16)
1046 addr = XEXP (addr, 0);
1047
1048 switch (GET_CODE (addr))
1049 {
1050 case REG:
1051 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1052 break;
1053
1054 case PLUS:
1055 reg = XEXP (addr, 0);
1056 offset = XEXP (addr, 1);
1057 if (GET_CODE (offset) == REG)
1058 {
1059 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1060 reg_names[REGNO (offset)]);
1061 }
1062 else if (GET_CODE (offset) == CONST_INT)
1063 {
1064 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1065 INTVAL (offset), reg_names[REGNO (reg)]);
1066 }
1067 else
1068 abort ();
1069 break;
1070
1071 case CONST:
1072 case LABEL_REF:
1073 case SYMBOL_REF:
1074 case CONST_INT:
1075 output_addr_const (file, addr);
1076 break;
1077
1078 default:
1079 debug_rtx (addr);
1080 abort ();
1081 }
1082 }
1083
1084 void
1085 print_operand (FILE * file, rtx x, int code)
1086 {
1087 machine_mode mode = GET_MODE (x);
1088 HOST_WIDE_INT val;
1089 unsigned char arr[16];
1090 int xcode = GET_CODE (x);
1091 int i, info;
1092 if (GET_MODE (x) == VOIDmode)
1093 switch (code)
1094 {
1095 case 'L': /* 128 bits, signed */
1096 case 'm': /* 128 bits, signed */
1097 case 'T': /* 128 bits, signed */
1098 case 't': /* 128 bits, signed */
1099 mode = TImode;
1100 break;
1101 case 'K': /* 64 bits, signed */
1102 case 'k': /* 64 bits, signed */
1103 case 'D': /* 64 bits, signed */
1104 case 'd': /* 64 bits, signed */
1105 mode = DImode;
1106 break;
1107 case 'J': /* 32 bits, signed */
1108 case 'j': /* 32 bits, signed */
1109 case 's': /* 32 bits, signed */
1110 case 'S': /* 32 bits, signed */
1111 mode = SImode;
1112 break;
1113 }
1114 switch (code)
1115 {
1116
1117 case 'j': /* 32 bits, signed */
1118 case 'k': /* 64 bits, signed */
1119 case 'm': /* 128 bits, signed */
1120 if (xcode == CONST_INT
1121 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1122 {
1123 gcc_assert (logical_immediate_p (x, mode));
1124 constant_to_array (mode, x, arr);
1125 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1126 val = trunc_int_for_mode (val, SImode);
1127 switch (which_logical_immediate (val))
1128 {
1129 case SPU_ORI:
1130 break;
1131 case SPU_ORHI:
1132 fprintf (file, "h");
1133 break;
1134 case SPU_ORBI:
1135 fprintf (file, "b");
1136 break;
1137 default:
1138 gcc_unreachable();
1139 }
1140 }
1141 else
1142 gcc_unreachable();
1143 return;
1144
1145 case 'J': /* 32 bits, signed */
1146 case 'K': /* 64 bits, signed */
1147 case 'L': /* 128 bits, signed */
1148 if (xcode == CONST_INT
1149 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1150 {
1151 gcc_assert (logical_immediate_p (x, mode)
1152 || iohl_immediate_p (x, mode));
1153 constant_to_array (mode, x, arr);
1154 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1155 val = trunc_int_for_mode (val, SImode);
1156 switch (which_logical_immediate (val))
1157 {
1158 case SPU_ORI:
1159 case SPU_IOHL:
1160 break;
1161 case SPU_ORHI:
1162 val = trunc_int_for_mode (val, HImode);
1163 break;
1164 case SPU_ORBI:
1165 val = trunc_int_for_mode (val, QImode);
1166 break;
1167 default:
1168 gcc_unreachable();
1169 }
1170 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1171 }
1172 else
1173 gcc_unreachable();
1174 return;
1175
1176 case 't': /* 128 bits, signed */
1177 case 'd': /* 64 bits, signed */
1178 case 's': /* 32 bits, signed */
1179 if (CONSTANT_P (x))
1180 {
1181 enum immediate_class c = classify_immediate (x, mode);
1182 switch (c)
1183 {
1184 case IC_IL1:
1185 constant_to_array (mode, x, arr);
1186 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1187 val = trunc_int_for_mode (val, SImode);
1188 switch (which_immediate_load (val))
1189 {
1190 case SPU_IL:
1191 break;
1192 case SPU_ILA:
1193 fprintf (file, "a");
1194 break;
1195 case SPU_ILH:
1196 fprintf (file, "h");
1197 break;
1198 case SPU_ILHU:
1199 fprintf (file, "hu");
1200 break;
1201 default:
1202 gcc_unreachable ();
1203 }
1204 break;
1205 case IC_CPAT:
1206 constant_to_array (mode, x, arr);
1207 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1208 if (info == 1)
1209 fprintf (file, "b");
1210 else if (info == 2)
1211 fprintf (file, "h");
1212 else if (info == 4)
1213 fprintf (file, "w");
1214 else if (info == 8)
1215 fprintf (file, "d");
1216 break;
1217 case IC_IL1s:
1218 if (xcode == CONST_VECTOR)
1219 {
1220 x = CONST_VECTOR_ELT (x, 0);
1221 xcode = GET_CODE (x);
1222 }
1223 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1224 fprintf (file, "a");
1225 else if (xcode == HIGH)
1226 fprintf (file, "hu");
1227 break;
1228 case IC_FSMBI:
1229 case IC_FSMBI2:
1230 case IC_IL2:
1231 case IC_IL2s:
1232 case IC_POOL:
1233 abort ();
1234 }
1235 }
1236 else
1237 gcc_unreachable ();
1238 return;
1239
1240 case 'T': /* 128 bits, signed */
1241 case 'D': /* 64 bits, signed */
1242 case 'S': /* 32 bits, signed */
1243 if (CONSTANT_P (x))
1244 {
1245 enum immediate_class c = classify_immediate (x, mode);
1246 switch (c)
1247 {
1248 case IC_IL1:
1249 constant_to_array (mode, x, arr);
1250 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1251 val = trunc_int_for_mode (val, SImode);
1252 switch (which_immediate_load (val))
1253 {
1254 case SPU_IL:
1255 case SPU_ILA:
1256 break;
1257 case SPU_ILH:
1258 case SPU_ILHU:
1259 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1260 break;
1261 default:
1262 gcc_unreachable ();
1263 }
1264 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1265 break;
1266 case IC_FSMBI:
1267 constant_to_array (mode, x, arr);
1268 val = 0;
1269 for (i = 0; i < 16; i++)
1270 {
1271 val <<= 1;
1272 val |= arr[i] & 1;
1273 }
1274 print_operand (file, GEN_INT (val), 0);
1275 break;
1276 case IC_CPAT:
1277 constant_to_array (mode, x, arr);
1278 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1279 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1280 break;
1281 case IC_IL1s:
1282 if (xcode == HIGH)
1283 x = XEXP (x, 0);
1284 if (GET_CODE (x) == CONST_VECTOR)
1285 x = CONST_VECTOR_ELT (x, 0);
1286 output_addr_const (file, x);
1287 if (xcode == HIGH)
1288 fprintf (file, "@h");
1289 break;
1290 case IC_IL2:
1291 case IC_IL2s:
1292 case IC_FSMBI2:
1293 case IC_POOL:
1294 abort ();
1295 }
1296 }
1297 else
1298 gcc_unreachable ();
1299 return;
1300
1301 case 'C':
1302 if (xcode == CONST_INT)
1303 {
1304 /* Only 4 least significant bits are relevant for generate
1305 control word instructions. */
1306 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1307 return;
1308 }
1309 break;
1310
1311 case 'M': /* print code for c*d */
1312 if (GET_CODE (x) == CONST_INT)
1313 switch (INTVAL (x))
1314 {
1315 case 1:
1316 fprintf (file, "b");
1317 break;
1318 case 2:
1319 fprintf (file, "h");
1320 break;
1321 case 4:
1322 fprintf (file, "w");
1323 break;
1324 case 8:
1325 fprintf (file, "d");
1326 break;
1327 default:
1328 gcc_unreachable();
1329 }
1330 else
1331 gcc_unreachable();
1332 return;
1333
1334 case 'N': /* Negate the operand */
1335 if (xcode == CONST_INT)
1336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1337 else if (xcode == CONST_VECTOR)
1338 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1339 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1340 return;
1341
1342 case 'I': /* enable/disable interrupts */
1343 if (xcode == CONST_INT)
1344 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1345 return;
1346
1347 case 'b': /* branch modifiers */
1348 if (xcode == REG)
1349 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1350 else if (COMPARISON_P (x))
1351 fprintf (file, "%s", xcode == NE ? "n" : "");
1352 return;
1353
1354 case 'i': /* indirect call */
1355 if (xcode == MEM)
1356 {
1357 if (GET_CODE (XEXP (x, 0)) == REG)
1358 /* Used in indirect function calls. */
1359 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1360 else
1361 output_address (XEXP (x, 0));
1362 }
1363 return;
1364
1365 case 'p': /* load/store */
1366 if (xcode == MEM)
1367 {
1368 x = XEXP (x, 0);
1369 xcode = GET_CODE (x);
1370 }
1371 if (xcode == AND)
1372 {
1373 x = XEXP (x, 0);
1374 xcode = GET_CODE (x);
1375 }
1376 if (xcode == REG)
1377 fprintf (file, "d");
1378 else if (xcode == CONST_INT)
1379 fprintf (file, "a");
1380 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1381 fprintf (file, "r");
1382 else if (xcode == PLUS || xcode == LO_SUM)
1383 {
1384 if (GET_CODE (XEXP (x, 1)) == REG)
1385 fprintf (file, "x");
1386 else
1387 fprintf (file, "d");
1388 }
1389 return;
1390
1391 case 'e':
1392 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1393 val &= 0x7;
1394 output_addr_const (file, GEN_INT (val));
1395 return;
1396
1397 case 'f':
1398 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1399 val &= 0x1f;
1400 output_addr_const (file, GEN_INT (val));
1401 return;
1402
1403 case 'g':
1404 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1405 val &= 0x3f;
1406 output_addr_const (file, GEN_INT (val));
1407 return;
1408
1409 case 'h':
1410 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1411 val = (val >> 3) & 0x1f;
1412 output_addr_const (file, GEN_INT (val));
1413 return;
1414
1415 case 'E':
1416 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1417 val = -val;
1418 val &= 0x7;
1419 output_addr_const (file, GEN_INT (val));
1420 return;
1421
1422 case 'F':
1423 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424 val = -val;
1425 val &= 0x1f;
1426 output_addr_const (file, GEN_INT (val));
1427 return;
1428
1429 case 'G':
1430 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1431 val = -val;
1432 val &= 0x3f;
1433 output_addr_const (file, GEN_INT (val));
1434 return;
1435
1436 case 'H':
1437 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1438 val = -(val & -8ll);
1439 val = (val >> 3) & 0x1f;
1440 output_addr_const (file, GEN_INT (val));
1441 return;
1442
1443 case 'v':
1444 case 'w':
1445 constant_to_array (mode, x, arr);
1446 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1447 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1448 return;
1449
1450 case 0:
1451 if (xcode == REG)
1452 fprintf (file, "%s", reg_names[REGNO (x)]);
1453 else if (xcode == MEM)
1454 output_address (XEXP (x, 0));
1455 else if (xcode == CONST_VECTOR)
1456 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1457 else
1458 output_addr_const (file, x);
1459 return;
1460
1461 /* unused letters
1462 o qr u yz
1463 AB OPQR UVWXYZ */
1464 default:
1465 output_operand_lossage ("invalid %%xn code");
1466 }
1467 gcc_unreachable ();
1468 }
1469
1470 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1471 caller saved register. For leaf functions it is more efficient to
1472 use a volatile register because we won't need to save and restore the
1473 pic register. This routine is only valid after register allocation
1474 is completed, so we can pick an unused register. */
1475 static rtx
1476 get_pic_reg (void)
1477 {
1478 if (!reload_completed && !reload_in_progress)
1479 abort ();
1480
1481 /* If we've already made the decision, we need to keep with it. Once we've
1482 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1483 return true since the register is now live; this should not cause us to
1484 "switch back" to using pic_offset_table_rtx. */
1485 if (!cfun->machine->pic_reg)
1486 {
1487 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1488 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1489 else
1490 cfun->machine->pic_reg = pic_offset_table_rtx;
1491 }
1492
1493 return cfun->machine->pic_reg;
1494 }
1495
1496 /* Split constant addresses to handle cases that are too large.
1497 Add in the pic register when in PIC mode.
1498 Split immediates that require more than 1 instruction. */
1499 int
1500 spu_split_immediate (rtx * ops)
1501 {
1502 machine_mode mode = GET_MODE (ops[0]);
1503 enum immediate_class c = classify_immediate (ops[1], mode);
1504
1505 switch (c)
1506 {
1507 case IC_IL2:
1508 {
1509 unsigned char arrhi[16];
1510 unsigned char arrlo[16];
1511 rtx to, temp, hi, lo;
1512 int i;
1513 machine_mode imode = mode;
1514 /* We need to do reals as ints because the constant used in the
1515 IOR might not be a legitimate real constant. */
1516 imode = int_mode_for_mode (mode);
1517 constant_to_array (mode, ops[1], arrhi);
1518 if (imode != mode)
1519 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1520 else
1521 to = ops[0];
1522 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1523 for (i = 0; i < 16; i += 4)
1524 {
1525 arrlo[i + 2] = arrhi[i + 2];
1526 arrlo[i + 3] = arrhi[i + 3];
1527 arrlo[i + 0] = arrlo[i + 1] = 0;
1528 arrhi[i + 2] = arrhi[i + 3] = 0;
1529 }
1530 hi = array_to_constant (imode, arrhi);
1531 lo = array_to_constant (imode, arrlo);
1532 emit_move_insn (temp, hi);
1533 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1534 return 1;
1535 }
1536 case IC_FSMBI2:
1537 {
1538 unsigned char arr_fsmbi[16];
1539 unsigned char arr_andbi[16];
1540 rtx to, reg_fsmbi, reg_and;
1541 int i;
1542 machine_mode imode = mode;
1543 /* We need to do reals as ints because the constant used in the
1544 * AND might not be a legitimate real constant. */
1545 imode = int_mode_for_mode (mode);
1546 constant_to_array (mode, ops[1], arr_fsmbi);
1547 if (imode != mode)
1548 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1549 else
1550 to = ops[0];
1551 for (i = 0; i < 16; i++)
1552 if (arr_fsmbi[i] != 0)
1553 {
1554 arr_andbi[0] = arr_fsmbi[i];
1555 arr_fsmbi[i] = 0xff;
1556 }
1557 for (i = 1; i < 16; i++)
1558 arr_andbi[i] = arr_andbi[0];
1559 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1560 reg_and = array_to_constant (imode, arr_andbi);
1561 emit_move_insn (to, reg_fsmbi);
1562 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1563 return 1;
1564 }
1565 case IC_POOL:
1566 if (reload_in_progress || reload_completed)
1567 {
1568 rtx mem = force_const_mem (mode, ops[1]);
1569 if (TARGET_LARGE_MEM)
1570 {
1571 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1572 emit_move_insn (addr, XEXP (mem, 0));
1573 mem = replace_equiv_address (mem, addr);
1574 }
1575 emit_move_insn (ops[0], mem);
1576 return 1;
1577 }
1578 break;
1579 case IC_IL1s:
1580 case IC_IL2s:
1581 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1582 {
1583 if (c == IC_IL2s)
1584 {
1585 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1586 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1587 }
1588 else if (flag_pic)
1589 emit_insn (gen_pic (ops[0], ops[1]));
1590 if (flag_pic)
1591 {
1592 rtx pic_reg = get_pic_reg ();
1593 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1594 }
1595 return flag_pic || c == IC_IL2s;
1596 }
1597 break;
1598 case IC_IL1:
1599 case IC_FSMBI:
1600 case IC_CPAT:
1601 break;
1602 }
1603 return 0;
1604 }
1605
1606 /* SAVING is TRUE when we are generating the actual load and store
1607 instructions for REGNO. When determining the size of the stack
1608 needed for saving register we must allocate enough space for the
1609 worst case, because we don't always have the information early enough
1610 to not allocate it. But we can at least eliminate the actual loads
1611 and stores during the prologue/epilogue. */
1612 static int
1613 need_to_save_reg (int regno, int saving)
1614 {
1615 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1616 return 1;
1617 if (flag_pic
1618 && regno == PIC_OFFSET_TABLE_REGNUM
1619 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1620 return 1;
1621 return 0;
1622 }
1623
1624 /* This function is only correct starting with local register
1625 allocation */
1626 int
1627 spu_saved_regs_size (void)
1628 {
1629 int reg_save_size = 0;
1630 int regno;
1631
1632 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1633 if (need_to_save_reg (regno, 0))
1634 reg_save_size += 0x10;
1635 return reg_save_size;
1636 }
1637
1638 static rtx_insn *
1639 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1640 {
1641 rtx reg = gen_rtx_REG (V4SImode, regno);
1642 rtx mem =
1643 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1644 return emit_insn (gen_movv4si (mem, reg));
1645 }
1646
1647 static rtx_insn *
1648 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1649 {
1650 rtx reg = gen_rtx_REG (V4SImode, regno);
1651 rtx mem =
1652 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1653 return emit_insn (gen_movv4si (reg, mem));
1654 }
1655
1656 /* This happens after reload, so we need to expand it. */
1657 static rtx_insn *
1658 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1659 {
1660 rtx_insn *insn;
1661 if (satisfies_constraint_K (GEN_INT (imm)))
1662 {
1663 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1664 }
1665 else
1666 {
1667 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1668 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1669 if (REGNO (src) == REGNO (scratch))
1670 abort ();
1671 }
1672 return insn;
1673 }
1674
1675 /* Return nonzero if this function is known to have a null epilogue. */
1676
1677 int
1678 direct_return (void)
1679 {
1680 if (reload_completed)
1681 {
1682 if (cfun->static_chain_decl == 0
1683 && (spu_saved_regs_size ()
1684 + get_frame_size ()
1685 + crtl->outgoing_args_size
1686 + crtl->args.pretend_args_size == 0)
1687 && crtl->is_leaf)
1688 return 1;
1689 }
1690 return 0;
1691 }
1692
1693 /*
1694 The stack frame looks like this:
1695 +-------------+
1696 | incoming |
1697 | args |
1698 AP -> +-------------+
1699 | $lr save |
1700 +-------------+
1701 prev SP | back chain |
1702 +-------------+
1703 | var args |
1704 | reg save | crtl->args.pretend_args_size bytes
1705 +-------------+
1706 | ... |
1707 | saved regs | spu_saved_regs_size() bytes
1708 FP -> +-------------+
1709 | ... |
1710 | vars | get_frame_size() bytes
1711 HFP -> +-------------+
1712 | ... |
1713 | outgoing |
1714 | args | crtl->outgoing_args_size bytes
1715 +-------------+
1716 | $lr of next |
1717 | frame |
1718 +-------------+
1719 | back chain |
1720 SP -> +-------------+
1721
1722 */
1723 void
1724 spu_expand_prologue (void)
1725 {
1726 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1727 HOST_WIDE_INT total_size;
1728 HOST_WIDE_INT saved_regs_size;
1729 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1730 rtx scratch_reg_0, scratch_reg_1;
1731 rtx_insn *insn;
1732 rtx real;
1733
1734 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1735 cfun->machine->pic_reg = pic_offset_table_rtx;
1736
1737 if (spu_naked_function_p (current_function_decl))
1738 return;
1739
1740 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1741 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1742
1743 saved_regs_size = spu_saved_regs_size ();
1744 total_size = size + saved_regs_size
1745 + crtl->outgoing_args_size
1746 + crtl->args.pretend_args_size;
1747
1748 if (!crtl->is_leaf
1749 || cfun->calls_alloca || total_size > 0)
1750 total_size += STACK_POINTER_OFFSET;
1751
1752 /* Save this first because code after this might use the link
1753 register as a scratch register. */
1754 if (!crtl->is_leaf)
1755 {
1756 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1757 RTX_FRAME_RELATED_P (insn) = 1;
1758 }
1759
1760 if (total_size > 0)
1761 {
1762 offset = -crtl->args.pretend_args_size;
1763 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1764 if (need_to_save_reg (regno, 1))
1765 {
1766 offset -= 16;
1767 insn = frame_emit_store (regno, sp_reg, offset);
1768 RTX_FRAME_RELATED_P (insn) = 1;
1769 }
1770 }
1771
1772 if (flag_pic && cfun->machine->pic_reg)
1773 {
1774 rtx pic_reg = cfun->machine->pic_reg;
1775 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1776 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1777 }
1778
1779 if (total_size > 0)
1780 {
1781 if (flag_stack_check)
1782 {
1783 /* We compare against total_size-1 because
1784 ($sp >= total_size) <=> ($sp > total_size-1) */
1785 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1786 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1787 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1788 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1789 {
1790 emit_move_insn (scratch_v4si, size_v4si);
1791 size_v4si = scratch_v4si;
1792 }
1793 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1794 emit_insn (gen_vec_extractv4si
1795 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1796 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1797 }
1798
1799 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1800 the value of the previous $sp because we save it as the back
1801 chain. */
1802 if (total_size <= 2000)
1803 {
1804 /* In this case we save the back chain first. */
1805 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1806 insn =
1807 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1808 }
1809 else
1810 {
1811 insn = emit_move_insn (scratch_reg_0, sp_reg);
1812 insn =
1813 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1814 }
1815 RTX_FRAME_RELATED_P (insn) = 1;
1816 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1817 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1818
1819 if (total_size > 2000)
1820 {
1821 /* Save the back chain ptr */
1822 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1823 }
1824
1825 if (frame_pointer_needed)
1826 {
1827 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1828 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1829 + crtl->outgoing_args_size;
1830 /* Set the new frame_pointer */
1831 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1832 RTX_FRAME_RELATED_P (insn) = 1;
1833 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1834 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1835 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1836 }
1837 }
1838
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = total_size;
1841 }
1842
1843 void
1844 spu_expand_epilogue (bool sibcall_p)
1845 {
1846 int size = get_frame_size (), offset, regno;
1847 HOST_WIDE_INT saved_regs_size, total_size;
1848 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1849 rtx scratch_reg_0;
1850
1851 if (spu_naked_function_p (current_function_decl))
1852 return;
1853
1854 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1855
1856 saved_regs_size = spu_saved_regs_size ();
1857 total_size = size + saved_regs_size
1858 + crtl->outgoing_args_size
1859 + crtl->args.pretend_args_size;
1860
1861 if (!crtl->is_leaf
1862 || cfun->calls_alloca || total_size > 0)
1863 total_size += STACK_POINTER_OFFSET;
1864
1865 if (total_size > 0)
1866 {
1867 if (cfun->calls_alloca)
1868 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1869 else
1870 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1871
1872
1873 if (saved_regs_size > 0)
1874 {
1875 offset = -crtl->args.pretend_args_size;
1876 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1877 if (need_to_save_reg (regno, 1))
1878 {
1879 offset -= 0x10;
1880 frame_emit_load (regno, sp_reg, offset);
1881 }
1882 }
1883 }
1884
1885 if (!crtl->is_leaf)
1886 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1887
1888 if (!sibcall_p)
1889 {
1890 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1891 emit_jump_insn (gen__return ());
1892 }
1893 }
1894
1895 rtx
1896 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1897 {
1898 if (count != 0)
1899 return 0;
1900 /* This is inefficient because it ends up copying to a save-register
1901 which then gets saved even though $lr has already been saved. But
1902 it does generate better code for leaf functions and we don't need
1903 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1904 used for __builtin_return_address anyway, so maybe we don't care if
1905 it's inefficient. */
1906 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1907 }
1908 \f
1909
1910 /* Given VAL, generate a constant appropriate for MODE.
1911 If MODE is a vector mode, every element will be VAL.
1912 For TImode, VAL will be zero extended to 128 bits. */
1913 rtx
1914 spu_const (machine_mode mode, HOST_WIDE_INT val)
1915 {
1916 rtx inner;
1917 rtvec v;
1918 int units, i;
1919
1920 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1921 || GET_MODE_CLASS (mode) == MODE_FLOAT
1922 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1923 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1924
1925 if (GET_MODE_CLASS (mode) == MODE_INT)
1926 return immed_double_const (val, 0, mode);
1927
1928 /* val is the bit representation of the float */
1929 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1930 return hwint_to_const_double (mode, val);
1931
1932 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1933 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1934 else
1935 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1936
1937 units = GET_MODE_NUNITS (mode);
1938
1939 v = rtvec_alloc (units);
1940
1941 for (i = 0; i < units; ++i)
1942 RTVEC_ELT (v, i) = inner;
1943
1944 return gen_rtx_CONST_VECTOR (mode, v);
1945 }
1946
1947 /* Create a MODE vector constant from 4 ints. */
1948 rtx
1949 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1950 {
1951 unsigned char arr[16];
1952 arr[0] = (a >> 24) & 0xff;
1953 arr[1] = (a >> 16) & 0xff;
1954 arr[2] = (a >> 8) & 0xff;
1955 arr[3] = (a >> 0) & 0xff;
1956 arr[4] = (b >> 24) & 0xff;
1957 arr[5] = (b >> 16) & 0xff;
1958 arr[6] = (b >> 8) & 0xff;
1959 arr[7] = (b >> 0) & 0xff;
1960 arr[8] = (c >> 24) & 0xff;
1961 arr[9] = (c >> 16) & 0xff;
1962 arr[10] = (c >> 8) & 0xff;
1963 arr[11] = (c >> 0) & 0xff;
1964 arr[12] = (d >> 24) & 0xff;
1965 arr[13] = (d >> 16) & 0xff;
1966 arr[14] = (d >> 8) & 0xff;
1967 arr[15] = (d >> 0) & 0xff;
1968 return array_to_constant(mode, arr);
1969 }
1970 \f
1971 /* branch hint stuff */
1972
1973 /* An array of these is used to propagate hints to predecessor blocks. */
1974 struct spu_bb_info
1975 {
1976 rtx_insn *prop_jump; /* propagated from another block */
1977 int bb_index; /* the original block. */
1978 };
1979 static struct spu_bb_info *spu_bb_info;
1980
1981 #define STOP_HINT_P(INSN) \
1982 (CALL_P(INSN) \
1983 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1984 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1985
1986 /* 1 when RTX is a hinted branch or its target. We keep track of
1987 what has been hinted so the safe-hint code can test it easily. */
1988 #define HINTED_P(RTX) \
1989 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1990
1991 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1992 #define SCHED_ON_EVEN_P(RTX) \
1993 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1994
1995 /* Emit a nop for INSN such that the two will dual issue. This assumes
1996 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1997 We check for TImode to handle a MULTI1 insn which has dual issued its
1998 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1999 static void
2000 emit_nop_for_insn (rtx_insn *insn)
2001 {
2002 int p;
2003 rtx_insn *new_insn;
2004
2005 /* We need to handle JUMP_TABLE_DATA separately. */
2006 if (JUMP_TABLE_DATA_P (insn))
2007 {
2008 new_insn = emit_insn_after (gen_lnop(), insn);
2009 recog_memoized (new_insn);
2010 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2011 return;
2012 }
2013
2014 p = get_pipe (insn);
2015 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2016 new_insn = emit_insn_after (gen_lnop (), insn);
2017 else if (p == 1 && GET_MODE (insn) == TImode)
2018 {
2019 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2020 PUT_MODE (new_insn, TImode);
2021 PUT_MODE (insn, VOIDmode);
2022 }
2023 else
2024 new_insn = emit_insn_after (gen_lnop (), insn);
2025 recog_memoized (new_insn);
2026 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2027 }
2028
2029 /* Insert nops in basic blocks to meet dual issue alignment
2030 requirements. Also make sure hbrp and hint instructions are at least
2031 one cycle apart, possibly inserting a nop. */
2032 static void
2033 pad_bb(void)
2034 {
2035 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2036 int length;
2037 int addr;
2038
2039 /* This sets up INSN_ADDRESSES. */
2040 shorten_branches (get_insns ());
2041
2042 /* Keep track of length added by nops. */
2043 length = 0;
2044
2045 prev_insn = 0;
2046 insn = get_insns ();
2047 if (!active_insn_p (insn))
2048 insn = next_active_insn (insn);
2049 for (; insn; insn = next_insn)
2050 {
2051 next_insn = next_active_insn (insn);
2052 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2053 || INSN_CODE (insn) == CODE_FOR_hbr)
2054 {
2055 if (hbr_insn)
2056 {
2057 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2058 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2059 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2060 || (a1 - a0 == 4))
2061 {
2062 prev_insn = emit_insn_before (gen_lnop (), insn);
2063 PUT_MODE (prev_insn, GET_MODE (insn));
2064 PUT_MODE (insn, TImode);
2065 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2066 length += 4;
2067 }
2068 }
2069 hbr_insn = insn;
2070 }
2071 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2072 {
2073 if (GET_MODE (insn) == TImode)
2074 PUT_MODE (next_insn, TImode);
2075 insn = next_insn;
2076 next_insn = next_active_insn (insn);
2077 }
2078 addr = INSN_ADDRESSES (INSN_UID (insn));
2079 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2080 {
2081 if (((addr + length) & 7) != 0)
2082 {
2083 emit_nop_for_insn (prev_insn);
2084 length += 4;
2085 }
2086 }
2087 else if (GET_MODE (insn) == TImode
2088 && ((next_insn && GET_MODE (next_insn) != TImode)
2089 || get_attr_type (insn) == TYPE_MULTI0)
2090 && ((addr + length) & 7) != 0)
2091 {
2092 /* prev_insn will always be set because the first insn is
2093 always 8-byte aligned. */
2094 emit_nop_for_insn (prev_insn);
2095 length += 4;
2096 }
2097 prev_insn = insn;
2098 }
2099 }
2100
2101 \f
2102 /* Routines for branch hints. */
2103
2104 static void
2105 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2106 int distance, sbitmap blocks)
2107 {
2108 rtx branch_label = 0;
2109 rtx_insn *hint;
2110 rtx_insn *insn;
2111 rtx_jump_table_data *table;
2112
2113 if (before == 0 || branch == 0 || target == 0)
2114 return;
2115
2116 /* While scheduling we require hints to be no further than 600, so
2117 we need to enforce that here too */
2118 if (distance > 600)
2119 return;
2120
2121 /* If we have a Basic block note, emit it after the basic block note. */
2122 if (NOTE_INSN_BASIC_BLOCK_P (before))
2123 before = NEXT_INSN (before);
2124
2125 branch_label = gen_label_rtx ();
2126 LABEL_NUSES (branch_label)++;
2127 LABEL_PRESERVE_P (branch_label) = 1;
2128 insn = emit_label_before (branch_label, branch);
2129 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2130 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2131
2132 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2133 recog_memoized (hint);
2134 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2135 HINTED_P (branch) = 1;
2136
2137 if (GET_CODE (target) == LABEL_REF)
2138 HINTED_P (XEXP (target, 0)) = 1;
2139 else if (tablejump_p (branch, 0, &table))
2140 {
2141 rtvec vec;
2142 int j;
2143 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2144 vec = XVEC (PATTERN (table), 0);
2145 else
2146 vec = XVEC (PATTERN (table), 1);
2147 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2148 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2149 }
2150
2151 if (distance >= 588)
2152 {
2153 /* Make sure the hint isn't scheduled any earlier than this point,
2154 which could make it too far for the branch offest to fit */
2155 insn = emit_insn_before (gen_blockage (), hint);
2156 recog_memoized (insn);
2157 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2158 }
2159 else if (distance <= 8 * 4)
2160 {
2161 /* To guarantee at least 8 insns between the hint and branch we
2162 insert nops. */
2163 int d;
2164 for (d = distance; d < 8 * 4; d += 4)
2165 {
2166 insn =
2167 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2168 recog_memoized (insn);
2169 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2170 }
2171
2172 /* Make sure any nops inserted aren't scheduled before the hint. */
2173 insn = emit_insn_after (gen_blockage (), hint);
2174 recog_memoized (insn);
2175 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2176
2177 /* Make sure any nops inserted aren't scheduled after the call. */
2178 if (CALL_P (branch) && distance < 8 * 4)
2179 {
2180 insn = emit_insn_before (gen_blockage (), branch);
2181 recog_memoized (insn);
2182 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2183 }
2184 }
2185 }
2186
2187 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2188 the rtx for the branch target. */
2189 static rtx
2190 get_branch_target (rtx_insn *branch)
2191 {
2192 if (JUMP_P (branch))
2193 {
2194 rtx set, src;
2195
2196 /* Return statements */
2197 if (GET_CODE (PATTERN (branch)) == RETURN)
2198 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2199
2200 /* ASM GOTOs. */
2201 if (extract_asm_operands (PATTERN (branch)) != NULL)
2202 return NULL;
2203
2204 set = single_set (branch);
2205 src = SET_SRC (set);
2206 if (GET_CODE (SET_DEST (set)) != PC)
2207 abort ();
2208
2209 if (GET_CODE (src) == IF_THEN_ELSE)
2210 {
2211 rtx lab = 0;
2212 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2213 if (note)
2214 {
2215 /* If the more probable case is not a fall through, then
2216 try a branch hint. */
2217 int prob = XINT (note, 0);
2218 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2219 && GET_CODE (XEXP (src, 1)) != PC)
2220 lab = XEXP (src, 1);
2221 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2222 && GET_CODE (XEXP (src, 2)) != PC)
2223 lab = XEXP (src, 2);
2224 }
2225 if (lab)
2226 {
2227 if (GET_CODE (lab) == RETURN)
2228 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2229 return lab;
2230 }
2231 return 0;
2232 }
2233
2234 return src;
2235 }
2236 else if (CALL_P (branch))
2237 {
2238 rtx call;
2239 /* All of our call patterns are in a PARALLEL and the CALL is
2240 the first pattern in the PARALLEL. */
2241 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2242 abort ();
2243 call = XVECEXP (PATTERN (branch), 0, 0);
2244 if (GET_CODE (call) == SET)
2245 call = SET_SRC (call);
2246 if (GET_CODE (call) != CALL)
2247 abort ();
2248 return XEXP (XEXP (call, 0), 0);
2249 }
2250 return 0;
2251 }
2252
2253 /* The special $hbr register is used to prevent the insn scheduler from
2254 moving hbr insns across instructions which invalidate them. It
2255 should only be used in a clobber, and this function searches for
2256 insns which clobber it. */
2257 static bool
2258 insn_clobbers_hbr (rtx_insn *insn)
2259 {
2260 if (INSN_P (insn)
2261 && GET_CODE (PATTERN (insn)) == PARALLEL)
2262 {
2263 rtx parallel = PATTERN (insn);
2264 rtx clobber;
2265 int j;
2266 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2267 {
2268 clobber = XVECEXP (parallel, 0, j);
2269 if (GET_CODE (clobber) == CLOBBER
2270 && GET_CODE (XEXP (clobber, 0)) == REG
2271 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2272 return 1;
2273 }
2274 }
2275 return 0;
2276 }
2277
2278 /* Search up to 32 insns starting at FIRST:
2279 - at any kind of hinted branch, just return
2280 - at any unconditional branch in the first 15 insns, just return
2281 - at a call or indirect branch, after the first 15 insns, force it to
2282 an even address and return
2283 - at any unconditional branch, after the first 15 insns, force it to
2284 an even address.
2285 At then end of the search, insert an hbrp within 4 insns of FIRST,
2286 and an hbrp within 16 instructions of FIRST.
2287 */
2288 static void
2289 insert_hbrp_for_ilb_runout (rtx_insn *first)
2290 {
2291 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2292 int addr = 0, length, first_addr = -1;
2293 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2294 int insert_lnop_after = 0;
2295 for (insn = first; insn; insn = NEXT_INSN (insn))
2296 if (INSN_P (insn))
2297 {
2298 if (first_addr == -1)
2299 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2300 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2301 length = get_attr_length (insn);
2302
2303 if (before_4 == 0 && addr + length >= 4 * 4)
2304 before_4 = insn;
2305 /* We test for 14 instructions because the first hbrp will add
2306 up to 2 instructions. */
2307 if (before_16 == 0 && addr + length >= 14 * 4)
2308 before_16 = insn;
2309
2310 if (INSN_CODE (insn) == CODE_FOR_hbr)
2311 {
2312 /* Make sure an hbrp is at least 2 cycles away from a hint.
2313 Insert an lnop after the hbrp when necessary. */
2314 if (before_4 == 0 && addr > 0)
2315 {
2316 before_4 = insn;
2317 insert_lnop_after |= 1;
2318 }
2319 else if (before_4 && addr <= 4 * 4)
2320 insert_lnop_after |= 1;
2321 if (before_16 == 0 && addr > 10 * 4)
2322 {
2323 before_16 = insn;
2324 insert_lnop_after |= 2;
2325 }
2326 else if (before_16 && addr <= 14 * 4)
2327 insert_lnop_after |= 2;
2328 }
2329
2330 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2331 {
2332 if (addr < hbrp_addr0)
2333 hbrp_addr0 = addr;
2334 else if (addr < hbrp_addr1)
2335 hbrp_addr1 = addr;
2336 }
2337
2338 if (CALL_P (insn) || JUMP_P (insn))
2339 {
2340 if (HINTED_P (insn))
2341 return;
2342
2343 /* Any branch after the first 15 insns should be on an even
2344 address to avoid a special case branch. There might be
2345 some nops and/or hbrps inserted, so we test after 10
2346 insns. */
2347 if (addr > 10 * 4)
2348 SCHED_ON_EVEN_P (insn) = 1;
2349 }
2350
2351 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2352 return;
2353
2354
2355 if (addr + length >= 32 * 4)
2356 {
2357 gcc_assert (before_4 && before_16);
2358 if (hbrp_addr0 > 4 * 4)
2359 {
2360 insn =
2361 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2362 recog_memoized (insn);
2363 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2364 INSN_ADDRESSES_NEW (insn,
2365 INSN_ADDRESSES (INSN_UID (before_4)));
2366 PUT_MODE (insn, GET_MODE (before_4));
2367 PUT_MODE (before_4, TImode);
2368 if (insert_lnop_after & 1)
2369 {
2370 insn = emit_insn_before (gen_lnop (), before_4);
2371 recog_memoized (insn);
2372 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2373 INSN_ADDRESSES_NEW (insn,
2374 INSN_ADDRESSES (INSN_UID (before_4)));
2375 PUT_MODE (insn, TImode);
2376 }
2377 }
2378 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2379 && hbrp_addr1 > 16 * 4)
2380 {
2381 insn =
2382 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2383 recog_memoized (insn);
2384 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2385 INSN_ADDRESSES_NEW (insn,
2386 INSN_ADDRESSES (INSN_UID (before_16)));
2387 PUT_MODE (insn, GET_MODE (before_16));
2388 PUT_MODE (before_16, TImode);
2389 if (insert_lnop_after & 2)
2390 {
2391 insn = emit_insn_before (gen_lnop (), before_16);
2392 recog_memoized (insn);
2393 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2394 INSN_ADDRESSES_NEW (insn,
2395 INSN_ADDRESSES (INSN_UID
2396 (before_16)));
2397 PUT_MODE (insn, TImode);
2398 }
2399 }
2400 return;
2401 }
2402 }
2403 else if (BARRIER_P (insn))
2404 return;
2405
2406 }
2407
2408 /* The SPU might hang when it executes 48 inline instructions after a
2409 hinted branch jumps to its hinted target. The beginning of a
2410 function and the return from a call might have been hinted, and
2411 must be handled as well. To prevent a hang we insert 2 hbrps. The
2412 first should be within 6 insns of the branch target. The second
2413 should be within 22 insns of the branch target. When determining
2414 if hbrps are necessary, we look for only 32 inline instructions,
2415 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2416 when inserting new hbrps, we insert them within 4 and 16 insns of
2417 the target. */
2418 static void
2419 insert_hbrp (void)
2420 {
2421 rtx_insn *insn;
2422 if (TARGET_SAFE_HINTS)
2423 {
2424 shorten_branches (get_insns ());
2425 /* Insert hbrp at beginning of function */
2426 insn = next_active_insn (get_insns ());
2427 if (insn)
2428 insert_hbrp_for_ilb_runout (insn);
2429 /* Insert hbrp after hinted targets. */
2430 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2431 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2432 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2433 }
2434 }
2435
2436 static int in_spu_reorg;
2437
2438 static void
2439 spu_var_tracking (void)
2440 {
2441 if (flag_var_tracking)
2442 {
2443 df_analyze ();
2444 timevar_push (TV_VAR_TRACKING);
2445 variable_tracking_main ();
2446 timevar_pop (TV_VAR_TRACKING);
2447 df_finish_pass (false);
2448 }
2449 }
2450
2451 /* Insert branch hints. There are no branch optimizations after this
2452 pass, so it's safe to set our branch hints now. */
2453 static void
2454 spu_machine_dependent_reorg (void)
2455 {
2456 sbitmap blocks;
2457 basic_block bb;
2458 rtx_insn *branch, *insn;
2459 rtx branch_target = 0;
2460 int branch_addr = 0, insn_addr, required_dist = 0;
2461 int i;
2462 unsigned int j;
2463
2464 if (!TARGET_BRANCH_HINTS || optimize == 0)
2465 {
2466 /* We still do it for unoptimized code because an external
2467 function might have hinted a call or return. */
2468 compute_bb_for_insn ();
2469 insert_hbrp ();
2470 pad_bb ();
2471 spu_var_tracking ();
2472 free_bb_for_insn ();
2473 return;
2474 }
2475
2476 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2477 bitmap_clear (blocks);
2478
2479 in_spu_reorg = 1;
2480 compute_bb_for_insn ();
2481
2482 /* (Re-)discover loops so that bb->loop_father can be used
2483 in the analysis below. */
2484 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2485
2486 compact_blocks ();
2487
2488 spu_bb_info =
2489 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2490 sizeof (struct spu_bb_info));
2491
2492 /* We need exact insn addresses and lengths. */
2493 shorten_branches (get_insns ());
2494
2495 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2496 {
2497 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2498 branch = 0;
2499 if (spu_bb_info[i].prop_jump)
2500 {
2501 branch = spu_bb_info[i].prop_jump;
2502 branch_target = get_branch_target (branch);
2503 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2504 required_dist = spu_hint_dist;
2505 }
2506 /* Search from end of a block to beginning. In this loop, find
2507 jumps which need a branch and emit them only when:
2508 - it's an indirect branch and we're at the insn which sets
2509 the register
2510 - we're at an insn that will invalidate the hint. e.g., a
2511 call, another hint insn, inline asm that clobbers $hbr, and
2512 some inlined operations (divmodsi4). Don't consider jumps
2513 because they are only at the end of a block and are
2514 considered when we are deciding whether to propagate
2515 - we're getting too far away from the branch. The hbr insns
2516 only have a signed 10 bit offset
2517 We go back as far as possible so the branch will be considered
2518 for propagation when we get to the beginning of the block. */
2519 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2520 {
2521 if (INSN_P (insn))
2522 {
2523 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2524 if (branch
2525 && ((GET_CODE (branch_target) == REG
2526 && set_of (branch_target, insn) != NULL_RTX)
2527 || insn_clobbers_hbr (insn)
2528 || branch_addr - insn_addr > 600))
2529 {
2530 rtx_insn *next = NEXT_INSN (insn);
2531 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2532 if (insn != BB_END (bb)
2533 && branch_addr - next_addr >= required_dist)
2534 {
2535 if (dump_file)
2536 fprintf (dump_file,
2537 "hint for %i in block %i before %i\n",
2538 INSN_UID (branch), bb->index,
2539 INSN_UID (next));
2540 spu_emit_branch_hint (next, branch, branch_target,
2541 branch_addr - next_addr, blocks);
2542 }
2543 branch = 0;
2544 }
2545
2546 /* JUMP_P will only be true at the end of a block. When
2547 branch is already set it means we've previously decided
2548 to propagate a hint for that branch into this block. */
2549 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2550 {
2551 branch = 0;
2552 if ((branch_target = get_branch_target (insn)))
2553 {
2554 branch = insn;
2555 branch_addr = insn_addr;
2556 required_dist = spu_hint_dist;
2557 }
2558 }
2559 }
2560 if (insn == BB_HEAD (bb))
2561 break;
2562 }
2563
2564 if (branch)
2565 {
2566 /* If we haven't emitted a hint for this branch yet, it might
2567 be profitable to emit it in one of the predecessor blocks,
2568 especially for loops. */
2569 rtx_insn *bbend;
2570 basic_block prev = 0, prop = 0, prev2 = 0;
2571 int loop_exit = 0, simple_loop = 0;
2572 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2573
2574 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2575 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2576 prev = EDGE_PRED (bb, j)->src;
2577 else
2578 prev2 = EDGE_PRED (bb, j)->src;
2579
2580 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2581 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2582 loop_exit = 1;
2583 else if (EDGE_SUCC (bb, j)->dest == bb)
2584 simple_loop = 1;
2585
2586 /* If this branch is a loop exit then propagate to previous
2587 fallthru block. This catches the cases when it is a simple
2588 loop or when there is an initial branch into the loop. */
2589 if (prev && (loop_exit || simple_loop)
2590 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2591 prop = prev;
2592
2593 /* If there is only one adjacent predecessor. Don't propagate
2594 outside this loop. */
2595 else if (prev && single_pred_p (bb)
2596 && prev->loop_father == bb->loop_father)
2597 prop = prev;
2598
2599 /* If this is the JOIN block of a simple IF-THEN then
2600 propagate the hint to the HEADER block. */
2601 else if (prev && prev2
2602 && EDGE_COUNT (bb->preds) == 2
2603 && EDGE_COUNT (prev->preds) == 1
2604 && EDGE_PRED (prev, 0)->src == prev2
2605 && prev2->loop_father == bb->loop_father
2606 && GET_CODE (branch_target) != REG)
2607 prop = prev;
2608
2609 /* Don't propagate when:
2610 - this is a simple loop and the hint would be too far
2611 - this is not a simple loop and there are 16 insns in
2612 this block already
2613 - the predecessor block ends in a branch that will be
2614 hinted
2615 - the predecessor block ends in an insn that invalidates
2616 the hint */
2617 if (prop
2618 && prop->index >= 0
2619 && (bbend = BB_END (prop))
2620 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2621 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2622 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2623 {
2624 if (dump_file)
2625 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2626 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2627 bb->index, prop->index, bb_loop_depth (bb),
2628 INSN_UID (branch), loop_exit, simple_loop,
2629 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2630
2631 spu_bb_info[prop->index].prop_jump = branch;
2632 spu_bb_info[prop->index].bb_index = i;
2633 }
2634 else if (branch_addr - next_addr >= required_dist)
2635 {
2636 if (dump_file)
2637 fprintf (dump_file, "hint for %i in block %i before %i\n",
2638 INSN_UID (branch), bb->index,
2639 INSN_UID (NEXT_INSN (insn)));
2640 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2641 branch_addr - next_addr, blocks);
2642 }
2643 branch = 0;
2644 }
2645 }
2646 free (spu_bb_info);
2647
2648 if (!bitmap_empty_p (blocks))
2649 find_many_sub_basic_blocks (blocks);
2650
2651 /* We have to schedule to make sure alignment is ok. */
2652 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2653
2654 /* The hints need to be scheduled, so call it again. */
2655 schedule_insns ();
2656 df_finish_pass (true);
2657
2658 insert_hbrp ();
2659
2660 pad_bb ();
2661
2662 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2663 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2664 {
2665 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2666 between its branch label and the branch . We don't move the
2667 label because GCC expects it at the beginning of the block. */
2668 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2669 rtx label_ref = XVECEXP (unspec, 0, 0);
2670 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2671 rtx_insn *branch;
2672 int offset = 0;
2673 for (branch = NEXT_INSN (label);
2674 !JUMP_P (branch) && !CALL_P (branch);
2675 branch = NEXT_INSN (branch))
2676 if (NONJUMP_INSN_P (branch))
2677 offset += get_attr_length (branch);
2678 if (offset > 0)
2679 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2680 }
2681
2682 spu_var_tracking ();
2683
2684 loop_optimizer_finalize ();
2685
2686 free_bb_for_insn ();
2687
2688 in_spu_reorg = 0;
2689 }
2690 \f
2691
2692 /* Insn scheduling routines, primarily for dual issue. */
2693 static int
2694 spu_sched_issue_rate (void)
2695 {
2696 return 2;
2697 }
2698
2699 static int
2700 uses_ls_unit(rtx_insn *insn)
2701 {
2702 rtx set = single_set (insn);
2703 if (set != 0
2704 && (GET_CODE (SET_DEST (set)) == MEM
2705 || GET_CODE (SET_SRC (set)) == MEM))
2706 return 1;
2707 return 0;
2708 }
2709
2710 static int
2711 get_pipe (rtx_insn *insn)
2712 {
2713 enum attr_type t;
2714 /* Handle inline asm */
2715 if (INSN_CODE (insn) == -1)
2716 return -1;
2717 t = get_attr_type (insn);
2718 switch (t)
2719 {
2720 case TYPE_CONVERT:
2721 return -2;
2722 case TYPE_MULTI0:
2723 return -1;
2724
2725 case TYPE_FX2:
2726 case TYPE_FX3:
2727 case TYPE_SPR:
2728 case TYPE_NOP:
2729 case TYPE_FXB:
2730 case TYPE_FPD:
2731 case TYPE_FP6:
2732 case TYPE_FP7:
2733 return 0;
2734
2735 case TYPE_LNOP:
2736 case TYPE_SHUF:
2737 case TYPE_LOAD:
2738 case TYPE_STORE:
2739 case TYPE_BR:
2740 case TYPE_MULTI1:
2741 case TYPE_HBR:
2742 case TYPE_IPREFETCH:
2743 return 1;
2744 default:
2745 abort ();
2746 }
2747 }
2748
2749
2750 /* haifa-sched.c has a static variable that keeps track of the current
2751 cycle. It is passed to spu_sched_reorder, and we record it here for
2752 use by spu_sched_variable_issue. It won't be accurate if the
2753 scheduler updates it's clock_var between the two calls. */
2754 static int clock_var;
2755
2756 /* This is used to keep track of insn alignment. Set to 0 at the
2757 beginning of each block and increased by the "length" attr of each
2758 insn scheduled. */
2759 static int spu_sched_length;
2760
2761 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2762 ready list appropriately in spu_sched_reorder(). */
2763 static int pipe0_clock;
2764 static int pipe1_clock;
2765
2766 static int prev_clock_var;
2767
2768 static int prev_priority;
2769
2770 /* The SPU needs to load the next ilb sometime during the execution of
2771 the previous ilb. There is a potential conflict if every cycle has a
2772 load or store. To avoid the conflict we make sure the load/store
2773 unit is free for at least one cycle during the execution of insns in
2774 the previous ilb. */
2775 static int spu_ls_first;
2776 static int prev_ls_clock;
2777
2778 static void
2779 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2780 int max_ready ATTRIBUTE_UNUSED)
2781 {
2782 spu_sched_length = 0;
2783 }
2784
2785 static void
2786 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2787 int max_ready ATTRIBUTE_UNUSED)
2788 {
2789 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2790 {
2791 /* When any block might be at least 8-byte aligned, assume they
2792 will all be at least 8-byte aligned to make sure dual issue
2793 works out correctly. */
2794 spu_sched_length = 0;
2795 }
2796 spu_ls_first = INT_MAX;
2797 clock_var = -1;
2798 prev_ls_clock = -1;
2799 pipe0_clock = -1;
2800 pipe1_clock = -1;
2801 prev_clock_var = -1;
2802 prev_priority = -1;
2803 }
2804
2805 static int
2806 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2807 int verbose ATTRIBUTE_UNUSED,
2808 rtx_insn *insn, int more)
2809 {
2810 int len;
2811 int p;
2812 if (GET_CODE (PATTERN (insn)) == USE
2813 || GET_CODE (PATTERN (insn)) == CLOBBER
2814 || (len = get_attr_length (insn)) == 0)
2815 return more;
2816
2817 spu_sched_length += len;
2818
2819 /* Reset on inline asm */
2820 if (INSN_CODE (insn) == -1)
2821 {
2822 spu_ls_first = INT_MAX;
2823 pipe0_clock = -1;
2824 pipe1_clock = -1;
2825 return 0;
2826 }
2827 p = get_pipe (insn);
2828 if (p == 0)
2829 pipe0_clock = clock_var;
2830 else
2831 pipe1_clock = clock_var;
2832
2833 if (in_spu_reorg)
2834 {
2835 if (clock_var - prev_ls_clock > 1
2836 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2837 spu_ls_first = INT_MAX;
2838 if (uses_ls_unit (insn))
2839 {
2840 if (spu_ls_first == INT_MAX)
2841 spu_ls_first = spu_sched_length;
2842 prev_ls_clock = clock_var;
2843 }
2844
2845 /* The scheduler hasn't inserted the nop, but we will later on.
2846 Include those nops in spu_sched_length. */
2847 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2848 spu_sched_length += 4;
2849 prev_clock_var = clock_var;
2850
2851 /* more is -1 when called from spu_sched_reorder for new insns
2852 that don't have INSN_PRIORITY */
2853 if (more >= 0)
2854 prev_priority = INSN_PRIORITY (insn);
2855 }
2856
2857 /* Always try issuing more insns. spu_sched_reorder will decide
2858 when the cycle should be advanced. */
2859 return 1;
2860 }
2861
2862 /* This function is called for both TARGET_SCHED_REORDER and
2863 TARGET_SCHED_REORDER2. */
2864 static int
2865 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2866 rtx_insn **ready, int *nreadyp, int clock)
2867 {
2868 int i, nready = *nreadyp;
2869 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2870 rtx_insn *insn;
2871
2872 clock_var = clock;
2873
2874 if (nready <= 0 || pipe1_clock >= clock)
2875 return 0;
2876
2877 /* Find any rtl insns that don't generate assembly insns and schedule
2878 them first. */
2879 for (i = nready - 1; i >= 0; i--)
2880 {
2881 insn = ready[i];
2882 if (INSN_CODE (insn) == -1
2883 || INSN_CODE (insn) == CODE_FOR_blockage
2884 || (INSN_P (insn) && get_attr_length (insn) == 0))
2885 {
2886 ready[i] = ready[nready - 1];
2887 ready[nready - 1] = insn;
2888 return 1;
2889 }
2890 }
2891
2892 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2893 for (i = 0; i < nready; i++)
2894 if (INSN_CODE (ready[i]) != -1)
2895 {
2896 insn = ready[i];
2897 switch (get_attr_type (insn))
2898 {
2899 default:
2900 case TYPE_MULTI0:
2901 case TYPE_CONVERT:
2902 case TYPE_FX2:
2903 case TYPE_FX3:
2904 case TYPE_SPR:
2905 case TYPE_NOP:
2906 case TYPE_FXB:
2907 case TYPE_FPD:
2908 case TYPE_FP6:
2909 case TYPE_FP7:
2910 pipe_0 = i;
2911 break;
2912 case TYPE_LOAD:
2913 case TYPE_STORE:
2914 pipe_ls = i;
2915 case TYPE_LNOP:
2916 case TYPE_SHUF:
2917 case TYPE_BR:
2918 case TYPE_MULTI1:
2919 case TYPE_HBR:
2920 pipe_1 = i;
2921 break;
2922 case TYPE_IPREFETCH:
2923 pipe_hbrp = i;
2924 break;
2925 }
2926 }
2927
2928 /* In the first scheduling phase, schedule loads and stores together
2929 to increase the chance they will get merged during postreload CSE. */
2930 if (!reload_completed && pipe_ls >= 0)
2931 {
2932 insn = ready[pipe_ls];
2933 ready[pipe_ls] = ready[nready - 1];
2934 ready[nready - 1] = insn;
2935 return 1;
2936 }
2937
2938 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2939 if (pipe_hbrp >= 0)
2940 pipe_1 = pipe_hbrp;
2941
2942 /* When we have loads/stores in every cycle of the last 15 insns and
2943 we are about to schedule another load/store, emit an hbrp insn
2944 instead. */
2945 if (in_spu_reorg
2946 && spu_sched_length - spu_ls_first >= 4 * 15
2947 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2948 {
2949 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2950 recog_memoized (insn);
2951 if (pipe0_clock < clock)
2952 PUT_MODE (insn, TImode);
2953 spu_sched_variable_issue (file, verbose, insn, -1);
2954 return 0;
2955 }
2956
2957 /* In general, we want to emit nops to increase dual issue, but dual
2958 issue isn't faster when one of the insns could be scheduled later
2959 without effecting the critical path. We look at INSN_PRIORITY to
2960 make a good guess, but it isn't perfect so -mdual-nops=n can be
2961 used to effect it. */
2962 if (in_spu_reorg && spu_dual_nops < 10)
2963 {
2964 /* When we are at an even address and we are not issuing nops to
2965 improve scheduling then we need to advance the cycle. */
2966 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2967 && (spu_dual_nops == 0
2968 || (pipe_1 != -1
2969 && prev_priority >
2970 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2971 return 0;
2972
2973 /* When at an odd address, schedule the highest priority insn
2974 without considering pipeline. */
2975 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2976 && (spu_dual_nops == 0
2977 || (prev_priority >
2978 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2979 return 1;
2980 }
2981
2982
2983 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2984 pipe0 insn in the ready list, schedule it. */
2985 if (pipe0_clock < clock && pipe_0 >= 0)
2986 schedule_i = pipe_0;
2987
2988 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2989 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2990 else
2991 schedule_i = pipe_1;
2992
2993 if (schedule_i > -1)
2994 {
2995 insn = ready[schedule_i];
2996 ready[schedule_i] = ready[nready - 1];
2997 ready[nready - 1] = insn;
2998 return 1;
2999 }
3000 return 0;
3001 }
3002
3003 /* INSN is dependent on DEP_INSN. */
3004 static int
3005 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3006 {
3007 rtx set;
3008
3009 /* The blockage pattern is used to prevent instructions from being
3010 moved across it and has no cost. */
3011 if (INSN_CODE (insn) == CODE_FOR_blockage
3012 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3013 return 0;
3014
3015 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3016 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3017 return 0;
3018
3019 /* Make sure hbrps are spread out. */
3020 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3021 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3022 return 8;
3023
3024 /* Make sure hints and hbrps are 2 cycles apart. */
3025 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3026 || INSN_CODE (insn) == CODE_FOR_hbr)
3027 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3028 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3029 return 2;
3030
3031 /* An hbrp has no real dependency on other insns. */
3032 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3033 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3034 return 0;
3035
3036 /* Assuming that it is unlikely an argument register will be used in
3037 the first cycle of the called function, we reduce the cost for
3038 slightly better scheduling of dep_insn. When not hinted, the
3039 mispredicted branch would hide the cost as well. */
3040 if (CALL_P (insn))
3041 {
3042 rtx target = get_branch_target (insn);
3043 if (GET_CODE (target) != REG || !set_of (target, insn))
3044 return cost - 2;
3045 return cost;
3046 }
3047
3048 /* And when returning from a function, let's assume the return values
3049 are completed sooner too. */
3050 if (CALL_P (dep_insn))
3051 return cost - 2;
3052
3053 /* Make sure an instruction that loads from the back chain is schedule
3054 away from the return instruction so a hint is more likely to get
3055 issued. */
3056 if (INSN_CODE (insn) == CODE_FOR__return
3057 && (set = single_set (dep_insn))
3058 && GET_CODE (SET_DEST (set)) == REG
3059 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3060 return 20;
3061
3062 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3063 scheduler makes every insn in a block anti-dependent on the final
3064 jump_insn. We adjust here so higher cost insns will get scheduled
3065 earlier. */
3066 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3067 return insn_cost (dep_insn) - 3;
3068
3069 return cost;
3070 }
3071 \f
3072 /* Create a CONST_DOUBLE from a string. */
3073 rtx
3074 spu_float_const (const char *string, machine_mode mode)
3075 {
3076 REAL_VALUE_TYPE value;
3077 value = REAL_VALUE_ATOF (string, mode);
3078 return const_double_from_real_value (value, mode);
3079 }
3080
3081 int
3082 spu_constant_address_p (rtx x)
3083 {
3084 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3085 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3086 || GET_CODE (x) == HIGH);
3087 }
3088
3089 static enum spu_immediate
3090 which_immediate_load (HOST_WIDE_INT val)
3091 {
3092 gcc_assert (val == trunc_int_for_mode (val, SImode));
3093
3094 if (val >= -0x8000 && val <= 0x7fff)
3095 return SPU_IL;
3096 if (val >= 0 && val <= 0x3ffff)
3097 return SPU_ILA;
3098 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3099 return SPU_ILH;
3100 if ((val & 0xffff) == 0)
3101 return SPU_ILHU;
3102
3103 return SPU_NONE;
3104 }
3105
3106 /* Return true when OP can be loaded by one of the il instructions, or
3107 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3108 int
3109 immediate_load_p (rtx op, machine_mode mode)
3110 {
3111 if (CONSTANT_P (op))
3112 {
3113 enum immediate_class c = classify_immediate (op, mode);
3114 return c == IC_IL1 || c == IC_IL1s
3115 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3116 }
3117 return 0;
3118 }
3119
3120 /* Return true if the first SIZE bytes of arr is a constant that can be
3121 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3122 represent the size and offset of the instruction to use. */
3123 static int
3124 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3125 {
3126 int cpat, run, i, start;
3127 cpat = 1;
3128 run = 0;
3129 start = -1;
3130 for (i = 0; i < size && cpat; i++)
3131 if (arr[i] != i+16)
3132 {
3133 if (!run)
3134 {
3135 start = i;
3136 if (arr[i] == 3)
3137 run = 1;
3138 else if (arr[i] == 2 && arr[i+1] == 3)
3139 run = 2;
3140 else if (arr[i] == 0)
3141 {
3142 while (arr[i+run] == run && i+run < 16)
3143 run++;
3144 if (run != 4 && run != 8)
3145 cpat = 0;
3146 }
3147 else
3148 cpat = 0;
3149 if ((i & (run-1)) != 0)
3150 cpat = 0;
3151 i += run;
3152 }
3153 else
3154 cpat = 0;
3155 }
3156 if (cpat && (run || size < 16))
3157 {
3158 if (run == 0)
3159 run = 1;
3160 if (prun)
3161 *prun = run;
3162 if (pstart)
3163 *pstart = start == -1 ? 16-run : start;
3164 return 1;
3165 }
3166 return 0;
3167 }
3168
3169 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3170 it into a register. MODE is only valid when OP is a CONST_INT. */
3171 static enum immediate_class
3172 classify_immediate (rtx op, machine_mode mode)
3173 {
3174 HOST_WIDE_INT val;
3175 unsigned char arr[16];
3176 int i, j, repeated, fsmbi, repeat;
3177
3178 gcc_assert (CONSTANT_P (op));
3179
3180 if (GET_MODE (op) != VOIDmode)
3181 mode = GET_MODE (op);
3182
3183 /* A V4SI const_vector with all identical symbols is ok. */
3184 if (!flag_pic
3185 && mode == V4SImode
3186 && GET_CODE (op) == CONST_VECTOR
3187 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3188 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3189 op = unwrap_const_vec_duplicate (op);
3190
3191 switch (GET_CODE (op))
3192 {
3193 case SYMBOL_REF:
3194 case LABEL_REF:
3195 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3196
3197 case CONST:
3198 /* We can never know if the resulting address fits in 18 bits and can be
3199 loaded with ila. For now, assume the address will not overflow if
3200 the displacement is "small" (fits 'K' constraint). */
3201 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3202 {
3203 rtx sym = XEXP (XEXP (op, 0), 0);
3204 rtx cst = XEXP (XEXP (op, 0), 1);
3205
3206 if (GET_CODE (sym) == SYMBOL_REF
3207 && GET_CODE (cst) == CONST_INT
3208 && satisfies_constraint_K (cst))
3209 return IC_IL1s;
3210 }
3211 return IC_IL2s;
3212
3213 case HIGH:
3214 return IC_IL1s;
3215
3216 case CONST_VECTOR:
3217 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3218 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3219 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3220 return IC_POOL;
3221 /* Fall through. */
3222
3223 case CONST_INT:
3224 case CONST_DOUBLE:
3225 constant_to_array (mode, op, arr);
3226
3227 /* Check that each 4-byte slot is identical. */
3228 repeated = 1;
3229 for (i = 4; i < 16; i += 4)
3230 for (j = 0; j < 4; j++)
3231 if (arr[j] != arr[i + j])
3232 repeated = 0;
3233
3234 if (repeated)
3235 {
3236 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3237 val = trunc_int_for_mode (val, SImode);
3238
3239 if (which_immediate_load (val) != SPU_NONE)
3240 return IC_IL1;
3241 }
3242
3243 /* Any mode of 2 bytes or smaller can be loaded with an il
3244 instruction. */
3245 gcc_assert (GET_MODE_SIZE (mode) > 2);
3246
3247 fsmbi = 1;
3248 repeat = 0;
3249 for (i = 0; i < 16 && fsmbi; i++)
3250 if (arr[i] != 0 && repeat == 0)
3251 repeat = arr[i];
3252 else if (arr[i] != 0 && arr[i] != repeat)
3253 fsmbi = 0;
3254 if (fsmbi)
3255 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3256
3257 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3258 return IC_CPAT;
3259
3260 if (repeated)
3261 return IC_IL2;
3262
3263 return IC_POOL;
3264 default:
3265 break;
3266 }
3267 gcc_unreachable ();
3268 }
3269
3270 static enum spu_immediate
3271 which_logical_immediate (HOST_WIDE_INT val)
3272 {
3273 gcc_assert (val == trunc_int_for_mode (val, SImode));
3274
3275 if (val >= -0x200 && val <= 0x1ff)
3276 return SPU_ORI;
3277 if (val >= 0 && val <= 0xffff)
3278 return SPU_IOHL;
3279 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3280 {
3281 val = trunc_int_for_mode (val, HImode);
3282 if (val >= -0x200 && val <= 0x1ff)
3283 return SPU_ORHI;
3284 if ((val & 0xff) == ((val >> 8) & 0xff))
3285 {
3286 val = trunc_int_for_mode (val, QImode);
3287 if (val >= -0x200 && val <= 0x1ff)
3288 return SPU_ORBI;
3289 }
3290 }
3291 return SPU_NONE;
3292 }
3293
3294 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3295 CONST_DOUBLEs. */
3296 static int
3297 const_vector_immediate_p (rtx x)
3298 {
3299 int i;
3300 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3301 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3302 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3303 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3304 return 0;
3305 return 1;
3306 }
3307
3308 int
3309 logical_immediate_p (rtx op, machine_mode mode)
3310 {
3311 HOST_WIDE_INT val;
3312 unsigned char arr[16];
3313 int i, j;
3314
3315 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3316 || GET_CODE (op) == CONST_VECTOR);
3317
3318 if (GET_CODE (op) == CONST_VECTOR
3319 && !const_vector_immediate_p (op))
3320 return 0;
3321
3322 if (GET_MODE (op) != VOIDmode)
3323 mode = GET_MODE (op);
3324
3325 constant_to_array (mode, op, arr);
3326
3327 /* Check that bytes are repeated. */
3328 for (i = 4; i < 16; i += 4)
3329 for (j = 0; j < 4; j++)
3330 if (arr[j] != arr[i + j])
3331 return 0;
3332
3333 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3334 val = trunc_int_for_mode (val, SImode);
3335
3336 i = which_logical_immediate (val);
3337 return i != SPU_NONE && i != SPU_IOHL;
3338 }
3339
3340 int
3341 iohl_immediate_p (rtx op, machine_mode mode)
3342 {
3343 HOST_WIDE_INT val;
3344 unsigned char arr[16];
3345 int i, j;
3346
3347 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3348 || GET_CODE (op) == CONST_VECTOR);
3349
3350 if (GET_CODE (op) == CONST_VECTOR
3351 && !const_vector_immediate_p (op))
3352 return 0;
3353
3354 if (GET_MODE (op) != VOIDmode)
3355 mode = GET_MODE (op);
3356
3357 constant_to_array (mode, op, arr);
3358
3359 /* Check that bytes are repeated. */
3360 for (i = 4; i < 16; i += 4)
3361 for (j = 0; j < 4; j++)
3362 if (arr[j] != arr[i + j])
3363 return 0;
3364
3365 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3366 val = trunc_int_for_mode (val, SImode);
3367
3368 return val >= 0 && val <= 0xffff;
3369 }
3370
3371 int
3372 arith_immediate_p (rtx op, machine_mode mode,
3373 HOST_WIDE_INT low, HOST_WIDE_INT high)
3374 {
3375 HOST_WIDE_INT val;
3376 unsigned char arr[16];
3377 int bytes, i, j;
3378
3379 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3380 || GET_CODE (op) == CONST_VECTOR);
3381
3382 if (GET_CODE (op) == CONST_VECTOR
3383 && !const_vector_immediate_p (op))
3384 return 0;
3385
3386 if (GET_MODE (op) != VOIDmode)
3387 mode = GET_MODE (op);
3388
3389 constant_to_array (mode, op, arr);
3390
3391 bytes = GET_MODE_UNIT_SIZE (mode);
3392 mode = mode_for_size (GET_MODE_UNIT_BITSIZE (mode), MODE_INT, 0);
3393
3394 /* Check that bytes are repeated. */
3395 for (i = bytes; i < 16; i += bytes)
3396 for (j = 0; j < bytes; j++)
3397 if (arr[j] != arr[i + j])
3398 return 0;
3399
3400 val = arr[0];
3401 for (j = 1; j < bytes; j++)
3402 val = (val << 8) | arr[j];
3403
3404 val = trunc_int_for_mode (val, mode);
3405
3406 return val >= low && val <= high;
3407 }
3408
3409 /* TRUE when op is an immediate and an exact power of 2, and given that
3410 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3411 all entries must be the same. */
3412 bool
3413 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3414 {
3415 machine_mode int_mode;
3416 HOST_WIDE_INT val;
3417 unsigned char arr[16];
3418 int bytes, i, j;
3419
3420 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3421 || GET_CODE (op) == CONST_VECTOR);
3422
3423 if (GET_CODE (op) == CONST_VECTOR
3424 && !const_vector_immediate_p (op))
3425 return 0;
3426
3427 if (GET_MODE (op) != VOIDmode)
3428 mode = GET_MODE (op);
3429
3430 constant_to_array (mode, op, arr);
3431
3432 mode = GET_MODE_INNER (mode);
3433
3434 bytes = GET_MODE_SIZE (mode);
3435 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3436
3437 /* Check that bytes are repeated. */
3438 for (i = bytes; i < 16; i += bytes)
3439 for (j = 0; j < bytes; j++)
3440 if (arr[j] != arr[i + j])
3441 return 0;
3442
3443 val = arr[0];
3444 for (j = 1; j < bytes; j++)
3445 val = (val << 8) | arr[j];
3446
3447 val = trunc_int_for_mode (val, int_mode);
3448
3449 /* Currently, we only handle SFmode */
3450 gcc_assert (mode == SFmode);
3451 if (mode == SFmode)
3452 {
3453 int exp = (val >> 23) - 127;
3454 return val > 0 && (val & 0x007fffff) == 0
3455 && exp >= low && exp <= high;
3456 }
3457 return FALSE;
3458 }
3459
3460 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3461
3462 static bool
3463 ea_symbol_ref_p (const_rtx x)
3464 {
3465 tree decl;
3466
3467 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3468 {
3469 rtx plus = XEXP (x, 0);
3470 rtx op0 = XEXP (plus, 0);
3471 rtx op1 = XEXP (plus, 1);
3472 if (GET_CODE (op1) == CONST_INT)
3473 x = op0;
3474 }
3475
3476 return (GET_CODE (x) == SYMBOL_REF
3477 && (decl = SYMBOL_REF_DECL (x)) != 0
3478 && TREE_CODE (decl) == VAR_DECL
3479 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3480 }
3481
3482 /* We accept:
3483 - any 32-bit constant (SImode, SFmode)
3484 - any constant that can be generated with fsmbi (any mode)
3485 - a 64-bit constant where the high and low bits are identical
3486 (DImode, DFmode)
3487 - a 128-bit constant where the four 32-bit words match. */
3488 bool
3489 spu_legitimate_constant_p (machine_mode mode, rtx x)
3490 {
3491 subrtx_iterator::array_type array;
3492 if (GET_CODE (x) == HIGH)
3493 x = XEXP (x, 0);
3494
3495 /* Reject any __ea qualified reference. These can't appear in
3496 instructions but must be forced to the constant pool. */
3497 FOR_EACH_SUBRTX (iter, array, x, ALL)
3498 if (ea_symbol_ref_p (*iter))
3499 return 0;
3500
3501 /* V4SI with all identical symbols is valid. */
3502 if (!flag_pic
3503 && mode == V4SImode
3504 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3505 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3506 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3507 return const_vec_duplicate_p (x);
3508
3509 if (GET_CODE (x) == CONST_VECTOR
3510 && !const_vector_immediate_p (x))
3511 return 0;
3512 return 1;
3513 }
3514
3515 /* Valid address are:
3516 - symbol_ref, label_ref, const
3517 - reg
3518 - reg + const_int, where const_int is 16 byte aligned
3519 - reg + reg, alignment doesn't matter
3520 The alignment matters in the reg+const case because lqd and stqd
3521 ignore the 4 least significant bits of the const. We only care about
3522 16 byte modes because the expand phase will change all smaller MEM
3523 references to TImode. */
3524 static bool
3525 spu_legitimate_address_p (machine_mode mode,
3526 rtx x, bool reg_ok_strict)
3527 {
3528 int aligned = GET_MODE_SIZE (mode) >= 16;
3529 if (aligned
3530 && GET_CODE (x) == AND
3531 && GET_CODE (XEXP (x, 1)) == CONST_INT
3532 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3533 x = XEXP (x, 0);
3534 switch (GET_CODE (x))
3535 {
3536 case LABEL_REF:
3537 return !TARGET_LARGE_MEM;
3538
3539 case SYMBOL_REF:
3540 case CONST:
3541 /* Keep __ea references until reload so that spu_expand_mov can see them
3542 in MEMs. */
3543 if (ea_symbol_ref_p (x))
3544 return !reload_in_progress && !reload_completed;
3545 return !TARGET_LARGE_MEM;
3546
3547 case CONST_INT:
3548 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3549
3550 case SUBREG:
3551 x = XEXP (x, 0);
3552 if (REG_P (x))
3553 return 0;
3554
3555 case REG:
3556 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3557
3558 case PLUS:
3559 case LO_SUM:
3560 {
3561 rtx op0 = XEXP (x, 0);
3562 rtx op1 = XEXP (x, 1);
3563 if (GET_CODE (op0) == SUBREG)
3564 op0 = XEXP (op0, 0);
3565 if (GET_CODE (op1) == SUBREG)
3566 op1 = XEXP (op1, 0);
3567 if (GET_CODE (op0) == REG
3568 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3569 && GET_CODE (op1) == CONST_INT
3570 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3571 /* If virtual registers are involved, the displacement will
3572 change later on anyway, so checking would be premature.
3573 Reload will make sure the final displacement after
3574 register elimination is OK. */
3575 || op0 == arg_pointer_rtx
3576 || op0 == frame_pointer_rtx
3577 || op0 == virtual_stack_vars_rtx)
3578 && (!aligned || (INTVAL (op1) & 15) == 0))
3579 return TRUE;
3580 if (GET_CODE (op0) == REG
3581 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3582 && GET_CODE (op1) == REG
3583 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3584 return TRUE;
3585 }
3586 break;
3587
3588 default:
3589 break;
3590 }
3591 return FALSE;
3592 }
3593
3594 /* Like spu_legitimate_address_p, except with named addresses. */
3595 static bool
3596 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3597 bool reg_ok_strict, addr_space_t as)
3598 {
3599 if (as == ADDR_SPACE_EA)
3600 return (REG_P (x) && (GET_MODE (x) == EAmode));
3601
3602 else if (as != ADDR_SPACE_GENERIC)
3603 gcc_unreachable ();
3604
3605 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3606 }
3607
3608 /* When the address is reg + const_int, force the const_int into a
3609 register. */
3610 static rtx
3611 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3612 machine_mode mode ATTRIBUTE_UNUSED)
3613 {
3614 rtx op0, op1;
3615 /* Make sure both operands are registers. */
3616 if (GET_CODE (x) == PLUS)
3617 {
3618 op0 = XEXP (x, 0);
3619 op1 = XEXP (x, 1);
3620 if (ALIGNED_SYMBOL_REF_P (op0))
3621 {
3622 op0 = force_reg (Pmode, op0);
3623 mark_reg_pointer (op0, 128);
3624 }
3625 else if (GET_CODE (op0) != REG)
3626 op0 = force_reg (Pmode, op0);
3627 if (ALIGNED_SYMBOL_REF_P (op1))
3628 {
3629 op1 = force_reg (Pmode, op1);
3630 mark_reg_pointer (op1, 128);
3631 }
3632 else if (GET_CODE (op1) != REG)
3633 op1 = force_reg (Pmode, op1);
3634 x = gen_rtx_PLUS (Pmode, op0, op1);
3635 }
3636 return x;
3637 }
3638
3639 /* Like spu_legitimate_address, except with named address support. */
3640 static rtx
3641 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3642 addr_space_t as)
3643 {
3644 if (as != ADDR_SPACE_GENERIC)
3645 return x;
3646
3647 return spu_legitimize_address (x, oldx, mode);
3648 }
3649
3650 /* Reload reg + const_int for out-of-range displacements. */
3651 rtx
3652 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3653 int opnum, int type)
3654 {
3655 bool removed_and = false;
3656
3657 if (GET_CODE (ad) == AND
3658 && CONST_INT_P (XEXP (ad, 1))
3659 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3660 {
3661 ad = XEXP (ad, 0);
3662 removed_and = true;
3663 }
3664
3665 if (GET_CODE (ad) == PLUS
3666 && REG_P (XEXP (ad, 0))
3667 && CONST_INT_P (XEXP (ad, 1))
3668 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3669 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3670 {
3671 /* Unshare the sum. */
3672 ad = copy_rtx (ad);
3673
3674 /* Reload the displacement. */
3675 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3676 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3677 opnum, (enum reload_type) type);
3678
3679 /* Add back AND for alignment if we stripped it. */
3680 if (removed_and)
3681 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3682
3683 return ad;
3684 }
3685
3686 return NULL_RTX;
3687 }
3688
3689 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3690 struct attribute_spec.handler. */
3691 static tree
3692 spu_handle_fndecl_attribute (tree * node,
3693 tree name,
3694 tree args ATTRIBUTE_UNUSED,
3695 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3696 {
3697 if (TREE_CODE (*node) != FUNCTION_DECL)
3698 {
3699 warning (0, "%qE attribute only applies to functions",
3700 name);
3701 *no_add_attrs = true;
3702 }
3703
3704 return NULL_TREE;
3705 }
3706
3707 /* Handle the "vector" attribute. */
3708 static tree
3709 spu_handle_vector_attribute (tree * node, tree name,
3710 tree args ATTRIBUTE_UNUSED,
3711 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3712 {
3713 tree type = *node, result = NULL_TREE;
3714 machine_mode mode;
3715 int unsigned_p;
3716
3717 while (POINTER_TYPE_P (type)
3718 || TREE_CODE (type) == FUNCTION_TYPE
3719 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3720 type = TREE_TYPE (type);
3721
3722 mode = TYPE_MODE (type);
3723
3724 unsigned_p = TYPE_UNSIGNED (type);
3725 switch (mode)
3726 {
3727 case DImode:
3728 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3729 break;
3730 case SImode:
3731 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3732 break;
3733 case HImode:
3734 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3735 break;
3736 case QImode:
3737 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3738 break;
3739 case SFmode:
3740 result = V4SF_type_node;
3741 break;
3742 case DFmode:
3743 result = V2DF_type_node;
3744 break;
3745 default:
3746 break;
3747 }
3748
3749 /* Propagate qualifiers attached to the element type
3750 onto the vector type. */
3751 if (result && result != type && TYPE_QUALS (type))
3752 result = build_qualified_type (result, TYPE_QUALS (type));
3753
3754 *no_add_attrs = true; /* No need to hang on to the attribute. */
3755
3756 if (!result)
3757 warning (0, "%qE attribute ignored", name);
3758 else
3759 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3760
3761 return NULL_TREE;
3762 }
3763
3764 /* Return nonzero if FUNC is a naked function. */
3765 static int
3766 spu_naked_function_p (tree func)
3767 {
3768 tree a;
3769
3770 if (TREE_CODE (func) != FUNCTION_DECL)
3771 abort ();
3772
3773 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3774 return a != NULL_TREE;
3775 }
3776
3777 int
3778 spu_initial_elimination_offset (int from, int to)
3779 {
3780 int saved_regs_size = spu_saved_regs_size ();
3781 int sp_offset = 0;
3782 if (!crtl->is_leaf || crtl->outgoing_args_size
3783 || get_frame_size () || saved_regs_size)
3784 sp_offset = STACK_POINTER_OFFSET;
3785 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3786 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3787 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3788 return get_frame_size ();
3789 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3790 return sp_offset + crtl->outgoing_args_size
3791 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3792 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3793 return get_frame_size () + saved_regs_size + sp_offset;
3794 else
3795 gcc_unreachable ();
3796 }
3797
3798 rtx
3799 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3800 {
3801 machine_mode mode = TYPE_MODE (type);
3802 int byte_size = ((mode == BLKmode)
3803 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3804
3805 /* Make sure small structs are left justified in a register. */
3806 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3807 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3808 {
3809 machine_mode smode;
3810 rtvec v;
3811 int i;
3812 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3813 int n = byte_size / UNITS_PER_WORD;
3814 v = rtvec_alloc (nregs);
3815 for (i = 0; i < n; i++)
3816 {
3817 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3818 gen_rtx_REG (TImode,
3819 FIRST_RETURN_REGNUM
3820 + i),
3821 GEN_INT (UNITS_PER_WORD * i));
3822 byte_size -= UNITS_PER_WORD;
3823 }
3824
3825 if (n < nregs)
3826 {
3827 if (byte_size < 4)
3828 byte_size = 4;
3829 smode =
3830 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3831 RTVEC_ELT (v, n) =
3832 gen_rtx_EXPR_LIST (VOIDmode,
3833 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3834 GEN_INT (UNITS_PER_WORD * n));
3835 }
3836 return gen_rtx_PARALLEL (mode, v);
3837 }
3838 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3839 }
3840
3841 static rtx
3842 spu_function_arg (cumulative_args_t cum_v,
3843 machine_mode mode,
3844 const_tree type, bool named ATTRIBUTE_UNUSED)
3845 {
3846 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3847 int byte_size;
3848
3849 if (*cum >= MAX_REGISTER_ARGS)
3850 return 0;
3851
3852 byte_size = ((mode == BLKmode)
3853 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3854
3855 /* The ABI does not allow parameters to be passed partially in
3856 reg and partially in stack. */
3857 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3858 return 0;
3859
3860 /* Make sure small structs are left justified in a register. */
3861 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3862 && byte_size < UNITS_PER_WORD && byte_size > 0)
3863 {
3864 machine_mode smode;
3865 rtx gr_reg;
3866 if (byte_size < 4)
3867 byte_size = 4;
3868 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3869 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3870 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3871 const0_rtx);
3872 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3873 }
3874 else
3875 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3876 }
3877
3878 static void
3879 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3880 const_tree type, bool named ATTRIBUTE_UNUSED)
3881 {
3882 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3883
3884 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3885 ? 1
3886 : mode == BLKmode
3887 ? ((int_size_in_bytes (type) + 15) / 16)
3888 : mode == VOIDmode
3889 ? 1
3890 : HARD_REGNO_NREGS (cum, mode));
3891 }
3892
3893 /* Variable sized types are passed by reference. */
3894 static bool
3895 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3896 machine_mode mode ATTRIBUTE_UNUSED,
3897 const_tree type, bool named ATTRIBUTE_UNUSED)
3898 {
3899 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3900 }
3901 \f
3902
3903 /* Var args. */
3904
3905 /* Create and return the va_list datatype.
3906
3907 On SPU, va_list is an array type equivalent to
3908
3909 typedef struct __va_list_tag
3910 {
3911 void *__args __attribute__((__aligned(16)));
3912 void *__skip __attribute__((__aligned(16)));
3913
3914 } va_list[1];
3915
3916 where __args points to the arg that will be returned by the next
3917 va_arg(), and __skip points to the previous stack frame such that
3918 when __args == __skip we should advance __args by 32 bytes. */
3919 static tree
3920 spu_build_builtin_va_list (void)
3921 {
3922 tree f_args, f_skip, record, type_decl;
3923 bool owp;
3924
3925 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3926
3927 type_decl =
3928 build_decl (BUILTINS_LOCATION,
3929 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3930
3931 f_args = build_decl (BUILTINS_LOCATION,
3932 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3933 f_skip = build_decl (BUILTINS_LOCATION,
3934 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3935
3936 DECL_FIELD_CONTEXT (f_args) = record;
3937 DECL_ALIGN (f_args) = 128;
3938 DECL_USER_ALIGN (f_args) = 1;
3939
3940 DECL_FIELD_CONTEXT (f_skip) = record;
3941 DECL_ALIGN (f_skip) = 128;
3942 DECL_USER_ALIGN (f_skip) = 1;
3943
3944 TYPE_STUB_DECL (record) = type_decl;
3945 TYPE_NAME (record) = type_decl;
3946 TYPE_FIELDS (record) = f_args;
3947 DECL_CHAIN (f_args) = f_skip;
3948
3949 /* We know this is being padded and we want it too. It is an internal
3950 type so hide the warnings from the user. */
3951 owp = warn_padded;
3952 warn_padded = false;
3953
3954 layout_type (record);
3955
3956 warn_padded = owp;
3957
3958 /* The correct type is an array type of one element. */
3959 return build_array_type (record, build_index_type (size_zero_node));
3960 }
3961
3962 /* Implement va_start by filling the va_list structure VALIST.
3963 NEXTARG points to the first anonymous stack argument.
3964
3965 The following global variables are used to initialize
3966 the va_list structure:
3967
3968 crtl->args.info;
3969 the CUMULATIVE_ARGS for this function
3970
3971 crtl->args.arg_offset_rtx:
3972 holds the offset of the first anonymous stack argument
3973 (relative to the virtual arg pointer). */
3974
3975 static void
3976 spu_va_start (tree valist, rtx nextarg)
3977 {
3978 tree f_args, f_skip;
3979 tree args, skip, t;
3980
3981 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3982 f_skip = DECL_CHAIN (f_args);
3983
3984 valist = build_simple_mem_ref (valist);
3985 args =
3986 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3987 skip =
3988 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3989
3990 /* Find the __args area. */
3991 t = make_tree (TREE_TYPE (args), nextarg);
3992 if (crtl->args.pretend_args_size > 0)
3993 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3994 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3995 TREE_SIDE_EFFECTS (t) = 1;
3996 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3997
3998 /* Find the __skip area. */
3999 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4000 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4001 - STACK_POINTER_OFFSET));
4002 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4003 TREE_SIDE_EFFECTS (t) = 1;
4004 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4005 }
4006
4007 /* Gimplify va_arg by updating the va_list structure
4008 VALIST as required to retrieve an argument of type
4009 TYPE, and returning that argument.
4010
4011 ret = va_arg(VALIST, TYPE);
4012
4013 generates code equivalent to:
4014
4015 paddedsize = (sizeof(TYPE) + 15) & -16;
4016 if (VALIST.__args + paddedsize > VALIST.__skip
4017 && VALIST.__args <= VALIST.__skip)
4018 addr = VALIST.__skip + 32;
4019 else
4020 addr = VALIST.__args;
4021 VALIST.__args = addr + paddedsize;
4022 ret = *(TYPE *)addr;
4023 */
4024 static tree
4025 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4026 gimple_seq * post_p ATTRIBUTE_UNUSED)
4027 {
4028 tree f_args, f_skip;
4029 tree args, skip;
4030 HOST_WIDE_INT size, rsize;
4031 tree addr, tmp;
4032 bool pass_by_reference_p;
4033
4034 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4035 f_skip = DECL_CHAIN (f_args);
4036
4037 args =
4038 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4039 skip =
4040 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4041
4042 addr = create_tmp_var (ptr_type_node, "va_arg");
4043
4044 /* if an object is dynamically sized, a pointer to it is passed
4045 instead of the object itself. */
4046 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4047 false);
4048 if (pass_by_reference_p)
4049 type = build_pointer_type (type);
4050 size = int_size_in_bytes (type);
4051 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4052
4053 /* build conditional expression to calculate addr. The expression
4054 will be gimplified later. */
4055 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4056 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4057 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4058 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4059 unshare_expr (skip)));
4060
4061 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4062 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4063 unshare_expr (args));
4064
4065 gimplify_assign (addr, tmp, pre_p);
4066
4067 /* update VALIST.__args */
4068 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4069 gimplify_assign (unshare_expr (args), tmp, pre_p);
4070
4071 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4072 addr);
4073
4074 if (pass_by_reference_p)
4075 addr = build_va_arg_indirect_ref (addr);
4076
4077 return build_va_arg_indirect_ref (addr);
4078 }
4079
4080 /* Save parameter registers starting with the register that corresponds
4081 to the first unnamed parameters. If the first unnamed parameter is
4082 in the stack then save no registers. Set pretend_args_size to the
4083 amount of space needed to save the registers. */
4084 static void
4085 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4086 tree type, int *pretend_size, int no_rtl)
4087 {
4088 if (!no_rtl)
4089 {
4090 rtx tmp;
4091 int regno;
4092 int offset;
4093 int ncum = *get_cumulative_args (cum);
4094
4095 /* cum currently points to the last named argument, we want to
4096 start at the next argument. */
4097 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4098
4099 offset = -STACK_POINTER_OFFSET;
4100 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4101 {
4102 tmp = gen_frame_mem (V4SImode,
4103 plus_constant (Pmode, virtual_incoming_args_rtx,
4104 offset));
4105 emit_move_insn (tmp,
4106 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4107 offset += 16;
4108 }
4109 *pretend_size = offset + STACK_POINTER_OFFSET;
4110 }
4111 }
4112 \f
4113 static void
4114 spu_conditional_register_usage (void)
4115 {
4116 if (flag_pic)
4117 {
4118 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4119 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4120 }
4121 }
4122
4123 /* This is called any time we inspect the alignment of a register for
4124 addresses. */
4125 static int
4126 reg_aligned_for_addr (rtx x)
4127 {
4128 int regno =
4129 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4130 return REGNO_POINTER_ALIGN (regno) >= 128;
4131 }
4132
4133 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4134 into its SYMBOL_REF_FLAGS. */
4135 static void
4136 spu_encode_section_info (tree decl, rtx rtl, int first)
4137 {
4138 default_encode_section_info (decl, rtl, first);
4139
4140 /* If a variable has a forced alignment to < 16 bytes, mark it with
4141 SYMBOL_FLAG_ALIGN1. */
4142 if (TREE_CODE (decl) == VAR_DECL
4143 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4144 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4145 }
4146
4147 /* Return TRUE if we are certain the mem refers to a complete object
4148 which is both 16-byte aligned and padded to a 16-byte boundary. This
4149 would make it safe to store with a single instruction.
4150 We guarantee the alignment and padding for static objects by aligning
4151 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4152 FIXME: We currently cannot guarantee this for objects on the stack
4153 because assign_parm_setup_stack calls assign_stack_local with the
4154 alignment of the parameter mode and in that case the alignment never
4155 gets adjusted by LOCAL_ALIGNMENT. */
4156 static int
4157 store_with_one_insn_p (rtx mem)
4158 {
4159 machine_mode mode = GET_MODE (mem);
4160 rtx addr = XEXP (mem, 0);
4161 if (mode == BLKmode)
4162 return 0;
4163 if (GET_MODE_SIZE (mode) >= 16)
4164 return 1;
4165 /* Only static objects. */
4166 if (GET_CODE (addr) == SYMBOL_REF)
4167 {
4168 /* We use the associated declaration to make sure the access is
4169 referring to the whole object.
4170 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4171 if it is necessary. Will there be cases where one exists, and
4172 the other does not? Will there be cases where both exist, but
4173 have different types? */
4174 tree decl = MEM_EXPR (mem);
4175 if (decl
4176 && TREE_CODE (decl) == VAR_DECL
4177 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4178 return 1;
4179 decl = SYMBOL_REF_DECL (addr);
4180 if (decl
4181 && TREE_CODE (decl) == VAR_DECL
4182 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4183 return 1;
4184 }
4185 return 0;
4186 }
4187
4188 /* Return 1 when the address is not valid for a simple load and store as
4189 required by the '_mov*' patterns. We could make this less strict
4190 for loads, but we prefer mem's to look the same so they are more
4191 likely to be merged. */
4192 static int
4193 address_needs_split (rtx mem)
4194 {
4195 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4196 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4197 || !(store_with_one_insn_p (mem)
4198 || mem_is_padded_component_ref (mem))))
4199 return 1;
4200
4201 return 0;
4202 }
4203
4204 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4205 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4206 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4207
4208 /* MEM is known to be an __ea qualified memory access. Emit a call to
4209 fetch the ppu memory to local store, and return its address in local
4210 store. */
4211
4212 static void
4213 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4214 {
4215 if (is_store)
4216 {
4217 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4218 if (!cache_fetch_dirty)
4219 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4220 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4221 2, ea_addr, EAmode, ndirty, SImode);
4222 }
4223 else
4224 {
4225 if (!cache_fetch)
4226 cache_fetch = init_one_libfunc ("__cache_fetch");
4227 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4228 1, ea_addr, EAmode);
4229 }
4230 }
4231
4232 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4233 dirty bit marking, inline.
4234
4235 The cache control data structure is an array of
4236
4237 struct __cache_tag_array
4238 {
4239 unsigned int tag_lo[4];
4240 unsigned int tag_hi[4];
4241 void *data_pointer[4];
4242 int reserved[4];
4243 vector unsigned short dirty_bits[4];
4244 } */
4245
4246 static void
4247 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4248 {
4249 rtx ea_addr_si;
4250 HOST_WIDE_INT v;
4251 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4252 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4253 rtx index_mask = gen_reg_rtx (SImode);
4254 rtx tag_arr = gen_reg_rtx (Pmode);
4255 rtx splat_mask = gen_reg_rtx (TImode);
4256 rtx splat = gen_reg_rtx (V4SImode);
4257 rtx splat_hi = NULL_RTX;
4258 rtx tag_index = gen_reg_rtx (Pmode);
4259 rtx block_off = gen_reg_rtx (SImode);
4260 rtx tag_addr = gen_reg_rtx (Pmode);
4261 rtx tag = gen_reg_rtx (V4SImode);
4262 rtx cache_tag = gen_reg_rtx (V4SImode);
4263 rtx cache_tag_hi = NULL_RTX;
4264 rtx cache_ptrs = gen_reg_rtx (TImode);
4265 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4266 rtx tag_equal = gen_reg_rtx (V4SImode);
4267 rtx tag_equal_hi = NULL_RTX;
4268 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4269 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4270 rtx eq_index = gen_reg_rtx (SImode);
4271 rtx bcomp, hit_label, hit_ref, cont_label;
4272 rtx_insn *insn;
4273
4274 if (spu_ea_model != 32)
4275 {
4276 splat_hi = gen_reg_rtx (V4SImode);
4277 cache_tag_hi = gen_reg_rtx (V4SImode);
4278 tag_equal_hi = gen_reg_rtx (V4SImode);
4279 }
4280
4281 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4282 emit_move_insn (tag_arr, tag_arr_sym);
4283 v = 0x0001020300010203LL;
4284 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4285 ea_addr_si = ea_addr;
4286 if (spu_ea_model != 32)
4287 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4288
4289 /* tag_index = ea_addr & (tag_array_size - 128) */
4290 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4291
4292 /* splat ea_addr to all 4 slots. */
4293 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4294 /* Similarly for high 32 bits of ea_addr. */
4295 if (spu_ea_model != 32)
4296 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4297
4298 /* block_off = ea_addr & 127 */
4299 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4300
4301 /* tag_addr = tag_arr + tag_index */
4302 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4303
4304 /* Read cache tags. */
4305 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4306 if (spu_ea_model != 32)
4307 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4308 plus_constant (Pmode,
4309 tag_addr, 16)));
4310
4311 /* tag = ea_addr & -128 */
4312 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4313
4314 /* Read all four cache data pointers. */
4315 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4316 plus_constant (Pmode,
4317 tag_addr, 32)));
4318
4319 /* Compare tags. */
4320 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4321 if (spu_ea_model != 32)
4322 {
4323 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4324 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4325 }
4326
4327 /* At most one of the tags compare equal, so tag_equal has one
4328 32-bit slot set to all 1's, with the other slots all zero.
4329 gbb picks off low bit from each byte in the 128-bit registers,
4330 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4331 we have a hit. */
4332 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4333 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4334
4335 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4336 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4337
4338 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4339 (rotating eq_index mod 16 bytes). */
4340 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4341 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4342
4343 /* Add block offset to form final data address. */
4344 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4345
4346 /* Check that we did hit. */
4347 hit_label = gen_label_rtx ();
4348 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4349 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4350 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4351 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4352 hit_ref, pc_rtx)));
4353 /* Say that this branch is very likely to happen. */
4354 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4355 add_int_reg_note (insn, REG_BR_PROB, v);
4356
4357 ea_load_store (mem, is_store, ea_addr, data_addr);
4358 cont_label = gen_label_rtx ();
4359 emit_jump_insn (gen_jump (cont_label));
4360 emit_barrier ();
4361
4362 emit_label (hit_label);
4363
4364 if (is_store)
4365 {
4366 HOST_WIDE_INT v_hi;
4367 rtx dirty_bits = gen_reg_rtx (TImode);
4368 rtx dirty_off = gen_reg_rtx (SImode);
4369 rtx dirty_128 = gen_reg_rtx (TImode);
4370 rtx neg_block_off = gen_reg_rtx (SImode);
4371
4372 /* Set up mask with one dirty bit per byte of the mem we are
4373 writing, starting from top bit. */
4374 v_hi = v = -1;
4375 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4376 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4377 {
4378 v_hi = v;
4379 v = 0;
4380 }
4381 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4382
4383 /* Form index into cache dirty_bits. eq_index is one of
4384 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4385 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4386 offset to each of the four dirty_bits elements. */
4387 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4388
4389 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4390
4391 /* Rotate bit mask to proper bit. */
4392 emit_insn (gen_negsi2 (neg_block_off, block_off));
4393 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4394 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4395
4396 /* Or in the new dirty bits. */
4397 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4398
4399 /* Store. */
4400 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4401 }
4402
4403 emit_label (cont_label);
4404 }
4405
4406 static rtx
4407 expand_ea_mem (rtx mem, bool is_store)
4408 {
4409 rtx ea_addr;
4410 rtx data_addr = gen_reg_rtx (Pmode);
4411 rtx new_mem;
4412
4413 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4414 if (optimize_size || optimize == 0)
4415 ea_load_store (mem, is_store, ea_addr, data_addr);
4416 else
4417 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4418
4419 if (ea_alias_set == -1)
4420 ea_alias_set = new_alias_set ();
4421
4422 /* We generate a new MEM RTX to refer to the copy of the data
4423 in the cache. We do not copy memory attributes (except the
4424 alignment) from the original MEM, as they may no longer apply
4425 to the cache copy. */
4426 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4427 set_mem_alias_set (new_mem, ea_alias_set);
4428 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4429
4430 return new_mem;
4431 }
4432
4433 int
4434 spu_expand_mov (rtx * ops, machine_mode mode)
4435 {
4436 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4437 {
4438 /* Perform the move in the destination SUBREG's inner mode. */
4439 ops[0] = SUBREG_REG (ops[0]);
4440 mode = GET_MODE (ops[0]);
4441 ops[1] = gen_lowpart_common (mode, ops[1]);
4442 gcc_assert (ops[1]);
4443 }
4444
4445 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4446 {
4447 rtx from = SUBREG_REG (ops[1]);
4448 machine_mode imode = int_mode_for_mode (GET_MODE (from));
4449
4450 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4451 && GET_MODE_CLASS (imode) == MODE_INT
4452 && subreg_lowpart_p (ops[1]));
4453
4454 if (GET_MODE_SIZE (imode) < 4)
4455 imode = SImode;
4456 if (imode != GET_MODE (from))
4457 from = gen_rtx_SUBREG (imode, from, 0);
4458
4459 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4460 {
4461 enum insn_code icode = convert_optab_handler (trunc_optab,
4462 mode, imode);
4463 emit_insn (GEN_FCN (icode) (ops[0], from));
4464 }
4465 else
4466 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4467 return 1;
4468 }
4469
4470 /* At least one of the operands needs to be a register. */
4471 if ((reload_in_progress | reload_completed) == 0
4472 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4473 {
4474 rtx temp = force_reg (mode, ops[1]);
4475 emit_move_insn (ops[0], temp);
4476 return 1;
4477 }
4478 if (reload_in_progress || reload_completed)
4479 {
4480 if (CONSTANT_P (ops[1]))
4481 return spu_split_immediate (ops);
4482 return 0;
4483 }
4484
4485 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4486 extend them. */
4487 if (GET_CODE (ops[1]) == CONST_INT)
4488 {
4489 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4490 if (val != INTVAL (ops[1]))
4491 {
4492 emit_move_insn (ops[0], GEN_INT (val));
4493 return 1;
4494 }
4495 }
4496 if (MEM_P (ops[0]))
4497 {
4498 if (MEM_ADDR_SPACE (ops[0]))
4499 ops[0] = expand_ea_mem (ops[0], true);
4500 return spu_split_store (ops);
4501 }
4502 if (MEM_P (ops[1]))
4503 {
4504 if (MEM_ADDR_SPACE (ops[1]))
4505 ops[1] = expand_ea_mem (ops[1], false);
4506 return spu_split_load (ops);
4507 }
4508
4509 return 0;
4510 }
4511
4512 static void
4513 spu_convert_move (rtx dst, rtx src)
4514 {
4515 machine_mode mode = GET_MODE (dst);
4516 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4517 rtx reg;
4518 gcc_assert (GET_MODE (src) == TImode);
4519 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4520 emit_insn (gen_rtx_SET (reg,
4521 gen_rtx_TRUNCATE (int_mode,
4522 gen_rtx_LSHIFTRT (TImode, src,
4523 GEN_INT (int_mode == DImode ? 64 : 96)))));
4524 if (int_mode != mode)
4525 {
4526 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4527 emit_move_insn (dst, reg);
4528 }
4529 }
4530
4531 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4532 the address from SRC and SRC+16. Return a REG or CONST_INT that
4533 specifies how many bytes to rotate the loaded registers, plus any
4534 extra from EXTRA_ROTQBY. The address and rotate amounts are
4535 normalized to improve merging of loads and rotate computations. */
4536 static rtx
4537 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4538 {
4539 rtx addr = XEXP (src, 0);
4540 rtx p0, p1, rot, addr0, addr1;
4541 int rot_amt;
4542
4543 rot = 0;
4544 rot_amt = 0;
4545
4546 if (MEM_ALIGN (src) >= 128)
4547 /* Address is already aligned; simply perform a TImode load. */ ;
4548 else if (GET_CODE (addr) == PLUS)
4549 {
4550 /* 8 cases:
4551 aligned reg + aligned reg => lqx
4552 aligned reg + unaligned reg => lqx, rotqby
4553 aligned reg + aligned const => lqd
4554 aligned reg + unaligned const => lqd, rotqbyi
4555 unaligned reg + aligned reg => lqx, rotqby
4556 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4557 unaligned reg + aligned const => lqd, rotqby
4558 unaligned reg + unaligned const -> not allowed by legitimate address
4559 */
4560 p0 = XEXP (addr, 0);
4561 p1 = XEXP (addr, 1);
4562 if (!reg_aligned_for_addr (p0))
4563 {
4564 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4565 {
4566 rot = gen_reg_rtx (SImode);
4567 emit_insn (gen_addsi3 (rot, p0, p1));
4568 }
4569 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4570 {
4571 if (INTVAL (p1) > 0
4572 && REG_POINTER (p0)
4573 && INTVAL (p1) * BITS_PER_UNIT
4574 < REGNO_POINTER_ALIGN (REGNO (p0)))
4575 {
4576 rot = gen_reg_rtx (SImode);
4577 emit_insn (gen_addsi3 (rot, p0, p1));
4578 addr = p0;
4579 }
4580 else
4581 {
4582 rtx x = gen_reg_rtx (SImode);
4583 emit_move_insn (x, p1);
4584 if (!spu_arith_operand (p1, SImode))
4585 p1 = x;
4586 rot = gen_reg_rtx (SImode);
4587 emit_insn (gen_addsi3 (rot, p0, p1));
4588 addr = gen_rtx_PLUS (Pmode, p0, x);
4589 }
4590 }
4591 else
4592 rot = p0;
4593 }
4594 else
4595 {
4596 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4597 {
4598 rot_amt = INTVAL (p1) & 15;
4599 if (INTVAL (p1) & -16)
4600 {
4601 p1 = GEN_INT (INTVAL (p1) & -16);
4602 addr = gen_rtx_PLUS (SImode, p0, p1);
4603 }
4604 else
4605 addr = p0;
4606 }
4607 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4608 rot = p1;
4609 }
4610 }
4611 else if (REG_P (addr))
4612 {
4613 if (!reg_aligned_for_addr (addr))
4614 rot = addr;
4615 }
4616 else if (GET_CODE (addr) == CONST)
4617 {
4618 if (GET_CODE (XEXP (addr, 0)) == PLUS
4619 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4620 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4621 {
4622 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4623 if (rot_amt & -16)
4624 addr = gen_rtx_CONST (Pmode,
4625 gen_rtx_PLUS (Pmode,
4626 XEXP (XEXP (addr, 0), 0),
4627 GEN_INT (rot_amt & -16)));
4628 else
4629 addr = XEXP (XEXP (addr, 0), 0);
4630 }
4631 else
4632 {
4633 rot = gen_reg_rtx (Pmode);
4634 emit_move_insn (rot, addr);
4635 }
4636 }
4637 else if (GET_CODE (addr) == CONST_INT)
4638 {
4639 rot_amt = INTVAL (addr);
4640 addr = GEN_INT (rot_amt & -16);
4641 }
4642 else if (!ALIGNED_SYMBOL_REF_P (addr))
4643 {
4644 rot = gen_reg_rtx (Pmode);
4645 emit_move_insn (rot, addr);
4646 }
4647
4648 rot_amt += extra_rotby;
4649
4650 rot_amt &= 15;
4651
4652 if (rot && rot_amt)
4653 {
4654 rtx x = gen_reg_rtx (SImode);
4655 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4656 rot = x;
4657 rot_amt = 0;
4658 }
4659 if (!rot && rot_amt)
4660 rot = GEN_INT (rot_amt);
4661
4662 addr0 = copy_rtx (addr);
4663 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4664 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4665
4666 if (dst1)
4667 {
4668 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4669 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4670 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4671 }
4672
4673 return rot;
4674 }
4675
4676 int
4677 spu_split_load (rtx * ops)
4678 {
4679 machine_mode mode = GET_MODE (ops[0]);
4680 rtx addr, load, rot;
4681 int rot_amt;
4682
4683 if (GET_MODE_SIZE (mode) >= 16)
4684 return 0;
4685
4686 addr = XEXP (ops[1], 0);
4687 gcc_assert (GET_CODE (addr) != AND);
4688
4689 if (!address_needs_split (ops[1]))
4690 {
4691 ops[1] = change_address (ops[1], TImode, addr);
4692 load = gen_reg_rtx (TImode);
4693 emit_insn (gen__movti (load, ops[1]));
4694 spu_convert_move (ops[0], load);
4695 return 1;
4696 }
4697
4698 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4699
4700 load = gen_reg_rtx (TImode);
4701 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4702
4703 if (rot)
4704 emit_insn (gen_rotqby_ti (load, load, rot));
4705
4706 spu_convert_move (ops[0], load);
4707 return 1;
4708 }
4709
4710 int
4711 spu_split_store (rtx * ops)
4712 {
4713 machine_mode mode = GET_MODE (ops[0]);
4714 rtx reg;
4715 rtx addr, p0, p1, p1_lo, smem;
4716 int aform;
4717 int scalar;
4718
4719 if (GET_MODE_SIZE (mode) >= 16)
4720 return 0;
4721
4722 addr = XEXP (ops[0], 0);
4723 gcc_assert (GET_CODE (addr) != AND);
4724
4725 if (!address_needs_split (ops[0]))
4726 {
4727 reg = gen_reg_rtx (TImode);
4728 emit_insn (gen_spu_convert (reg, ops[1]));
4729 ops[0] = change_address (ops[0], TImode, addr);
4730 emit_move_insn (ops[0], reg);
4731 return 1;
4732 }
4733
4734 if (GET_CODE (addr) == PLUS)
4735 {
4736 /* 8 cases:
4737 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4738 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4739 aligned reg + aligned const => lqd, c?d, shuf, stqx
4740 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4741 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4742 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4743 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4744 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4745 */
4746 aform = 0;
4747 p0 = XEXP (addr, 0);
4748 p1 = p1_lo = XEXP (addr, 1);
4749 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4750 {
4751 p1_lo = GEN_INT (INTVAL (p1) & 15);
4752 if (reg_aligned_for_addr (p0))
4753 {
4754 p1 = GEN_INT (INTVAL (p1) & -16);
4755 if (p1 == const0_rtx)
4756 addr = p0;
4757 else
4758 addr = gen_rtx_PLUS (SImode, p0, p1);
4759 }
4760 else
4761 {
4762 rtx x = gen_reg_rtx (SImode);
4763 emit_move_insn (x, p1);
4764 addr = gen_rtx_PLUS (SImode, p0, x);
4765 }
4766 }
4767 }
4768 else if (REG_P (addr))
4769 {
4770 aform = 0;
4771 p0 = addr;
4772 p1 = p1_lo = const0_rtx;
4773 }
4774 else
4775 {
4776 aform = 1;
4777 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4778 p1 = 0; /* aform doesn't use p1 */
4779 p1_lo = addr;
4780 if (ALIGNED_SYMBOL_REF_P (addr))
4781 p1_lo = const0_rtx;
4782 else if (GET_CODE (addr) == CONST
4783 && GET_CODE (XEXP (addr, 0)) == PLUS
4784 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4785 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4786 {
4787 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4788 if ((v & -16) != 0)
4789 addr = gen_rtx_CONST (Pmode,
4790 gen_rtx_PLUS (Pmode,
4791 XEXP (XEXP (addr, 0), 0),
4792 GEN_INT (v & -16)));
4793 else
4794 addr = XEXP (XEXP (addr, 0), 0);
4795 p1_lo = GEN_INT (v & 15);
4796 }
4797 else if (GET_CODE (addr) == CONST_INT)
4798 {
4799 p1_lo = GEN_INT (INTVAL (addr) & 15);
4800 addr = GEN_INT (INTVAL (addr) & -16);
4801 }
4802 else
4803 {
4804 p1_lo = gen_reg_rtx (SImode);
4805 emit_move_insn (p1_lo, addr);
4806 }
4807 }
4808
4809 gcc_assert (aform == 0 || aform == 1);
4810 reg = gen_reg_rtx (TImode);
4811
4812 scalar = store_with_one_insn_p (ops[0]);
4813 if (!scalar)
4814 {
4815 /* We could copy the flags from the ops[0] MEM to mem here,
4816 We don't because we want this load to be optimized away if
4817 possible, and copying the flags will prevent that in certain
4818 cases, e.g. consider the volatile flag. */
4819
4820 rtx pat = gen_reg_rtx (TImode);
4821 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4822 set_mem_alias_set (lmem, 0);
4823 emit_insn (gen_movti (reg, lmem));
4824
4825 if (!p0 || reg_aligned_for_addr (p0))
4826 p0 = stack_pointer_rtx;
4827 if (!p1_lo)
4828 p1_lo = const0_rtx;
4829
4830 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4831 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4832 }
4833 else
4834 {
4835 if (GET_CODE (ops[1]) == REG)
4836 emit_insn (gen_spu_convert (reg, ops[1]));
4837 else if (GET_CODE (ops[1]) == SUBREG)
4838 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4839 else
4840 abort ();
4841 }
4842
4843 if (GET_MODE_SIZE (mode) < 4 && scalar)
4844 emit_insn (gen_ashlti3
4845 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4846
4847 smem = change_address (ops[0], TImode, copy_rtx (addr));
4848 /* We can't use the previous alias set because the memory has changed
4849 size and can potentially overlap objects of other types. */
4850 set_mem_alias_set (smem, 0);
4851
4852 emit_insn (gen_movti (smem, reg));
4853 return 1;
4854 }
4855
4856 /* Return TRUE if X is MEM which is a struct member reference
4857 and the member can safely be loaded and stored with a single
4858 instruction because it is padded. */
4859 static int
4860 mem_is_padded_component_ref (rtx x)
4861 {
4862 tree t = MEM_EXPR (x);
4863 tree r;
4864 if (!t || TREE_CODE (t) != COMPONENT_REF)
4865 return 0;
4866 t = TREE_OPERAND (t, 1);
4867 if (!t || TREE_CODE (t) != FIELD_DECL
4868 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4869 return 0;
4870 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4871 r = DECL_FIELD_CONTEXT (t);
4872 if (!r || TREE_CODE (r) != RECORD_TYPE)
4873 return 0;
4874 /* Make sure they are the same mode */
4875 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4876 return 0;
4877 /* If there are no following fields then the field alignment assures
4878 the structure is padded to the alignment which means this field is
4879 padded too. */
4880 if (TREE_CHAIN (t) == 0)
4881 return 1;
4882 /* If the following field is also aligned then this field will be
4883 padded. */
4884 t = TREE_CHAIN (t);
4885 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4886 return 1;
4887 return 0;
4888 }
4889
4890 /* Parse the -mfixed-range= option string. */
4891 static void
4892 fix_range (const char *const_str)
4893 {
4894 int i, first, last;
4895 char *str, *dash, *comma;
4896
4897 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4898 REG2 are either register names or register numbers. The effect
4899 of this option is to mark the registers in the range from REG1 to
4900 REG2 as ``fixed'' so they won't be used by the compiler. */
4901
4902 i = strlen (const_str);
4903 str = (char *) alloca (i + 1);
4904 memcpy (str, const_str, i + 1);
4905
4906 while (1)
4907 {
4908 dash = strchr (str, '-');
4909 if (!dash)
4910 {
4911 warning (0, "value of -mfixed-range must have form REG1-REG2");
4912 return;
4913 }
4914 *dash = '\0';
4915 comma = strchr (dash + 1, ',');
4916 if (comma)
4917 *comma = '\0';
4918
4919 first = decode_reg_name (str);
4920 if (first < 0)
4921 {
4922 warning (0, "unknown register name: %s", str);
4923 return;
4924 }
4925
4926 last = decode_reg_name (dash + 1);
4927 if (last < 0)
4928 {
4929 warning (0, "unknown register name: %s", dash + 1);
4930 return;
4931 }
4932
4933 *dash = '-';
4934
4935 if (first > last)
4936 {
4937 warning (0, "%s-%s is an empty range", str, dash + 1);
4938 return;
4939 }
4940
4941 for (i = first; i <= last; ++i)
4942 fixed_regs[i] = call_used_regs[i] = 1;
4943
4944 if (!comma)
4945 break;
4946
4947 *comma = ',';
4948 str = comma + 1;
4949 }
4950 }
4951
4952 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4953 can be generated using the fsmbi instruction. */
4954 int
4955 fsmbi_const_p (rtx x)
4956 {
4957 if (CONSTANT_P (x))
4958 {
4959 /* We can always choose TImode for CONST_INT because the high bits
4960 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4961 enum immediate_class c = classify_immediate (x, TImode);
4962 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4963 }
4964 return 0;
4965 }
4966
4967 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4968 can be generated using the cbd, chd, cwd or cdd instruction. */
4969 int
4970 cpat_const_p (rtx x, machine_mode mode)
4971 {
4972 if (CONSTANT_P (x))
4973 {
4974 enum immediate_class c = classify_immediate (x, mode);
4975 return c == IC_CPAT;
4976 }
4977 return 0;
4978 }
4979
4980 rtx
4981 gen_cpat_const (rtx * ops)
4982 {
4983 unsigned char dst[16];
4984 int i, offset, shift, isize;
4985 if (GET_CODE (ops[3]) != CONST_INT
4986 || GET_CODE (ops[2]) != CONST_INT
4987 || (GET_CODE (ops[1]) != CONST_INT
4988 && GET_CODE (ops[1]) != REG))
4989 return 0;
4990 if (GET_CODE (ops[1]) == REG
4991 && (!REG_POINTER (ops[1])
4992 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4993 return 0;
4994
4995 for (i = 0; i < 16; i++)
4996 dst[i] = i + 16;
4997 isize = INTVAL (ops[3]);
4998 if (isize == 1)
4999 shift = 3;
5000 else if (isize == 2)
5001 shift = 2;
5002 else
5003 shift = 0;
5004 offset = (INTVAL (ops[2]) +
5005 (GET_CODE (ops[1]) ==
5006 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5007 for (i = 0; i < isize; i++)
5008 dst[offset + i] = i + shift;
5009 return array_to_constant (TImode, dst);
5010 }
5011
5012 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5013 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5014 than 16 bytes, the value is repeated across the rest of the array. */
5015 void
5016 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5017 {
5018 HOST_WIDE_INT val;
5019 int i, j, first;
5020
5021 memset (arr, 0, 16);
5022 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5023 if (GET_CODE (x) == CONST_INT
5024 || (GET_CODE (x) == CONST_DOUBLE
5025 && (mode == SFmode || mode == DFmode)))
5026 {
5027 gcc_assert (mode != VOIDmode && mode != BLKmode);
5028
5029 if (GET_CODE (x) == CONST_DOUBLE)
5030 val = const_double_to_hwint (x);
5031 else
5032 val = INTVAL (x);
5033 first = GET_MODE_SIZE (mode) - 1;
5034 for (i = first; i >= 0; i--)
5035 {
5036 arr[i] = val & 0xff;
5037 val >>= 8;
5038 }
5039 /* Splat the constant across the whole array. */
5040 for (j = 0, i = first + 1; i < 16; i++)
5041 {
5042 arr[i] = arr[j];
5043 j = (j == first) ? 0 : j + 1;
5044 }
5045 }
5046 else if (GET_CODE (x) == CONST_DOUBLE)
5047 {
5048 val = CONST_DOUBLE_LOW (x);
5049 for (i = 15; i >= 8; i--)
5050 {
5051 arr[i] = val & 0xff;
5052 val >>= 8;
5053 }
5054 val = CONST_DOUBLE_HIGH (x);
5055 for (i = 7; i >= 0; i--)
5056 {
5057 arr[i] = val & 0xff;
5058 val >>= 8;
5059 }
5060 }
5061 else if (GET_CODE (x) == CONST_VECTOR)
5062 {
5063 int units;
5064 rtx elt;
5065 mode = GET_MODE_INNER (mode);
5066 units = CONST_VECTOR_NUNITS (x);
5067 for (i = 0; i < units; i++)
5068 {
5069 elt = CONST_VECTOR_ELT (x, i);
5070 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5071 {
5072 if (GET_CODE (elt) == CONST_DOUBLE)
5073 val = const_double_to_hwint (elt);
5074 else
5075 val = INTVAL (elt);
5076 first = GET_MODE_SIZE (mode) - 1;
5077 if (first + i * GET_MODE_SIZE (mode) > 16)
5078 abort ();
5079 for (j = first; j >= 0; j--)
5080 {
5081 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5082 val >>= 8;
5083 }
5084 }
5085 }
5086 }
5087 else
5088 gcc_unreachable();
5089 }
5090
5091 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5092 smaller than 16 bytes, use the bytes that would represent that value
5093 in a register, e.g., for QImode return the value of arr[3]. */
5094 rtx
5095 array_to_constant (machine_mode mode, const unsigned char arr[16])
5096 {
5097 machine_mode inner_mode;
5098 rtvec v;
5099 int units, size, i, j, k;
5100 HOST_WIDE_INT val;
5101
5102 if (GET_MODE_CLASS (mode) == MODE_INT
5103 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5104 {
5105 j = GET_MODE_SIZE (mode);
5106 i = j < 4 ? 4 - j : 0;
5107 for (val = 0; i < j; i++)
5108 val = (val << 8) | arr[i];
5109 val = trunc_int_for_mode (val, mode);
5110 return GEN_INT (val);
5111 }
5112
5113 if (mode == TImode)
5114 {
5115 HOST_WIDE_INT high;
5116 for (i = high = 0; i < 8; i++)
5117 high = (high << 8) | arr[i];
5118 for (i = 8, val = 0; i < 16; i++)
5119 val = (val << 8) | arr[i];
5120 return immed_double_const (val, high, TImode);
5121 }
5122 if (mode == SFmode)
5123 {
5124 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5125 val = trunc_int_for_mode (val, SImode);
5126 return hwint_to_const_double (SFmode, val);
5127 }
5128 if (mode == DFmode)
5129 {
5130 for (i = 0, val = 0; i < 8; i++)
5131 val = (val << 8) | arr[i];
5132 return hwint_to_const_double (DFmode, val);
5133 }
5134
5135 if (!VECTOR_MODE_P (mode))
5136 abort ();
5137
5138 units = GET_MODE_NUNITS (mode);
5139 size = GET_MODE_UNIT_SIZE (mode);
5140 inner_mode = GET_MODE_INNER (mode);
5141 v = rtvec_alloc (units);
5142
5143 for (k = i = 0; i < units; ++i)
5144 {
5145 val = 0;
5146 for (j = 0; j < size; j++, k++)
5147 val = (val << 8) | arr[k];
5148
5149 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5150 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5151 else
5152 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5153 }
5154 if (k > 16)
5155 abort ();
5156
5157 return gen_rtx_CONST_VECTOR (mode, v);
5158 }
5159
5160 static void
5161 reloc_diagnostic (rtx x)
5162 {
5163 tree decl = 0;
5164 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5165 return;
5166
5167 if (GET_CODE (x) == SYMBOL_REF)
5168 decl = SYMBOL_REF_DECL (x);
5169 else if (GET_CODE (x) == CONST
5170 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5171 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5172
5173 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5174 if (decl && !DECL_P (decl))
5175 decl = 0;
5176
5177 /* The decl could be a string constant. */
5178 if (decl && DECL_P (decl))
5179 {
5180 location_t loc;
5181 /* We use last_assemble_variable_decl to get line information. It's
5182 not always going to be right and might not even be close, but will
5183 be right for the more common cases. */
5184 if (!last_assemble_variable_decl || in_section == ctors_section)
5185 loc = DECL_SOURCE_LOCATION (decl);
5186 else
5187 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5188
5189 if (TARGET_WARN_RELOC)
5190 warning_at (loc, 0,
5191 "creating run-time relocation for %qD", decl);
5192 else
5193 error_at (loc,
5194 "creating run-time relocation for %qD", decl);
5195 }
5196 else
5197 {
5198 if (TARGET_WARN_RELOC)
5199 warning_at (input_location, 0, "creating run-time relocation");
5200 else
5201 error_at (input_location, "creating run-time relocation");
5202 }
5203 }
5204
5205 /* Hook into assemble_integer so we can generate an error for run-time
5206 relocations. The SPU ABI disallows them. */
5207 static bool
5208 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5209 {
5210 /* By default run-time relocations aren't supported, but we allow them
5211 in case users support it in their own run-time loader. And we provide
5212 a warning for those users that don't. */
5213 if ((GET_CODE (x) == SYMBOL_REF)
5214 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5215 reloc_diagnostic (x);
5216
5217 return default_assemble_integer (x, size, aligned_p);
5218 }
5219
5220 static void
5221 spu_asm_globalize_label (FILE * file, const char *name)
5222 {
5223 fputs ("\t.global\t", file);
5224 assemble_name (file, name);
5225 fputs ("\n", file);
5226 }
5227
5228 static bool
5229 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5230 int opno ATTRIBUTE_UNUSED, int *total,
5231 bool speed ATTRIBUTE_UNUSED)
5232 {
5233 int code = GET_CODE (x);
5234 int cost = COSTS_N_INSNS (2);
5235
5236 /* Folding to a CONST_VECTOR will use extra space but there might
5237 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5238 only if it allows us to fold away multiple insns. Changing the cost
5239 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5240 because this cost will only be compared against a single insn.
5241 if (code == CONST_VECTOR)
5242 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5243 */
5244
5245 /* Use defaults for float operations. Not accurate but good enough. */
5246 if (mode == DFmode)
5247 {
5248 *total = COSTS_N_INSNS (13);
5249 return true;
5250 }
5251 if (mode == SFmode)
5252 {
5253 *total = COSTS_N_INSNS (6);
5254 return true;
5255 }
5256 switch (code)
5257 {
5258 case CONST_INT:
5259 if (satisfies_constraint_K (x))
5260 *total = 0;
5261 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5262 *total = COSTS_N_INSNS (1);
5263 else
5264 *total = COSTS_N_INSNS (3);
5265 return true;
5266
5267 case CONST:
5268 *total = COSTS_N_INSNS (3);
5269 return true;
5270
5271 case LABEL_REF:
5272 case SYMBOL_REF:
5273 *total = COSTS_N_INSNS (0);
5274 return true;
5275
5276 case CONST_DOUBLE:
5277 *total = COSTS_N_INSNS (5);
5278 return true;
5279
5280 case FLOAT_EXTEND:
5281 case FLOAT_TRUNCATE:
5282 case FLOAT:
5283 case UNSIGNED_FLOAT:
5284 case FIX:
5285 case UNSIGNED_FIX:
5286 *total = COSTS_N_INSNS (7);
5287 return true;
5288
5289 case PLUS:
5290 if (mode == TImode)
5291 {
5292 *total = COSTS_N_INSNS (9);
5293 return true;
5294 }
5295 break;
5296
5297 case MULT:
5298 cost =
5299 GET_CODE (XEXP (x, 0)) ==
5300 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5301 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5302 {
5303 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5304 {
5305 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5306 cost = COSTS_N_INSNS (14);
5307 if ((val & 0xffff) == 0)
5308 cost = COSTS_N_INSNS (9);
5309 else if (val > 0 && val < 0x10000)
5310 cost = COSTS_N_INSNS (11);
5311 }
5312 }
5313 *total = cost;
5314 return true;
5315 case DIV:
5316 case UDIV:
5317 case MOD:
5318 case UMOD:
5319 *total = COSTS_N_INSNS (20);
5320 return true;
5321 case ROTATE:
5322 case ROTATERT:
5323 case ASHIFT:
5324 case ASHIFTRT:
5325 case LSHIFTRT:
5326 *total = COSTS_N_INSNS (4);
5327 return true;
5328 case UNSPEC:
5329 if (XINT (x, 1) == UNSPEC_CONVERT)
5330 *total = COSTS_N_INSNS (0);
5331 else
5332 *total = COSTS_N_INSNS (4);
5333 return true;
5334 }
5335 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5336 if (GET_MODE_CLASS (mode) == MODE_INT
5337 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5338 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5339 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5340 *total = cost;
5341 return true;
5342 }
5343
5344 static machine_mode
5345 spu_unwind_word_mode (void)
5346 {
5347 return SImode;
5348 }
5349
5350 /* Decide whether we can make a sibling call to a function. DECL is the
5351 declaration of the function being targeted by the call and EXP is the
5352 CALL_EXPR representing the call. */
5353 static bool
5354 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5355 {
5356 return decl && !TARGET_LARGE_MEM;
5357 }
5358
5359 /* We need to correctly update the back chain pointer and the Available
5360 Stack Size (which is in the second slot of the sp register.) */
5361 void
5362 spu_allocate_stack (rtx op0, rtx op1)
5363 {
5364 HOST_WIDE_INT v;
5365 rtx chain = gen_reg_rtx (V4SImode);
5366 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5367 rtx sp = gen_reg_rtx (V4SImode);
5368 rtx splatted = gen_reg_rtx (V4SImode);
5369 rtx pat = gen_reg_rtx (TImode);
5370
5371 /* copy the back chain so we can save it back again. */
5372 emit_move_insn (chain, stack_bot);
5373
5374 op1 = force_reg (SImode, op1);
5375
5376 v = 0x1020300010203ll;
5377 emit_move_insn (pat, immed_double_const (v, v, TImode));
5378 emit_insn (gen_shufb (splatted, op1, op1, pat));
5379
5380 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5381 emit_insn (gen_subv4si3 (sp, sp, splatted));
5382
5383 if (flag_stack_check)
5384 {
5385 rtx avail = gen_reg_rtx(SImode);
5386 rtx result = gen_reg_rtx(SImode);
5387 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5388 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5389 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5390 }
5391
5392 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5393
5394 emit_move_insn (stack_bot, chain);
5395
5396 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5397 }
5398
5399 void
5400 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5401 {
5402 static unsigned char arr[16] =
5403 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5404 rtx temp = gen_reg_rtx (SImode);
5405 rtx temp2 = gen_reg_rtx (SImode);
5406 rtx temp3 = gen_reg_rtx (V4SImode);
5407 rtx temp4 = gen_reg_rtx (V4SImode);
5408 rtx pat = gen_reg_rtx (TImode);
5409 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5410
5411 /* Restore the backchain from the first word, sp from the second. */
5412 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5413 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5414
5415 emit_move_insn (pat, array_to_constant (TImode, arr));
5416
5417 /* Compute Available Stack Size for sp */
5418 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5419 emit_insn (gen_shufb (temp3, temp, temp, pat));
5420
5421 /* Compute Available Stack Size for back chain */
5422 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5423 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5424 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5425
5426 emit_insn (gen_addv4si3 (sp, sp, temp3));
5427 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5428 }
5429
5430 static void
5431 spu_init_libfuncs (void)
5432 {
5433 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5434 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5435 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5436 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5437 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5438 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5439 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5440 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5441 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5442 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5443 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5444 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5445
5446 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5447 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5448
5449 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5450 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5451 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5452 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5453 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5454 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5455 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5456 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5457 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5458 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5459 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5460 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5461
5462 set_optab_libfunc (smul_optab, TImode, "__multi3");
5463 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5464 set_optab_libfunc (smod_optab, TImode, "__modti3");
5465 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5466 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5467 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5468 }
5469
5470 /* Make a subreg, stripping any existing subreg. We could possibly just
5471 call simplify_subreg, but in this case we know what we want. */
5472 rtx
5473 spu_gen_subreg (machine_mode mode, rtx x)
5474 {
5475 if (GET_CODE (x) == SUBREG)
5476 x = SUBREG_REG (x);
5477 if (GET_MODE (x) == mode)
5478 return x;
5479 return gen_rtx_SUBREG (mode, x, 0);
5480 }
5481
5482 static bool
5483 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5484 {
5485 return (TYPE_MODE (type) == BLKmode
5486 && ((type) == 0
5487 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5488 || int_size_in_bytes (type) >
5489 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5490 }
5491 \f
5492 /* Create the built-in types and functions */
5493
5494 enum spu_function_code
5495 {
5496 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5497 #include "spu-builtins.def"
5498 #undef DEF_BUILTIN
5499 NUM_SPU_BUILTINS
5500 };
5501
5502 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5503
5504 struct spu_builtin_description spu_builtins[] = {
5505 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5506 {fcode, icode, name, type, params},
5507 #include "spu-builtins.def"
5508 #undef DEF_BUILTIN
5509 };
5510
5511 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5512
5513 /* Returns the spu builtin decl for CODE. */
5514
5515 static tree
5516 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5517 {
5518 if (code >= NUM_SPU_BUILTINS)
5519 return error_mark_node;
5520
5521 return spu_builtin_decls[code];
5522 }
5523
5524
5525 static void
5526 spu_init_builtins (void)
5527 {
5528 struct spu_builtin_description *d;
5529 unsigned int i;
5530
5531 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5532 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5533 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5534 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5535 V4SF_type_node = build_vector_type (float_type_node, 4);
5536 V2DF_type_node = build_vector_type (double_type_node, 2);
5537
5538 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5539 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5540 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5541 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5542
5543 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5544
5545 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5546 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5547 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5548 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5557
5558 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5559 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5560 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5561 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5562 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5563 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5564 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5566
5567 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5568 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5569
5570 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5571
5572 spu_builtin_types[SPU_BTI_PTR] =
5573 build_pointer_type (build_qualified_type
5574 (void_type_node,
5575 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5576
5577 /* For each builtin we build a new prototype. The tree code will make
5578 sure nodes are shared. */
5579 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5580 {
5581 tree p;
5582 char name[64]; /* build_function will make a copy. */
5583 int parm;
5584
5585 if (d->name == 0)
5586 continue;
5587
5588 /* Find last parm. */
5589 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5590 ;
5591
5592 p = void_list_node;
5593 while (parm > 1)
5594 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5595
5596 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5597
5598 sprintf (name, "__builtin_%s", d->name);
5599 spu_builtin_decls[i] =
5600 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5601 if (d->fcode == SPU_MASK_FOR_LOAD)
5602 TREE_READONLY (spu_builtin_decls[i]) = 1;
5603
5604 /* These builtins don't throw. */
5605 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5606 }
5607 }
5608
5609 void
5610 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5611 {
5612 static unsigned char arr[16] =
5613 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5614
5615 rtx temp = gen_reg_rtx (Pmode);
5616 rtx temp2 = gen_reg_rtx (V4SImode);
5617 rtx temp3 = gen_reg_rtx (V4SImode);
5618 rtx pat = gen_reg_rtx (TImode);
5619 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5620
5621 emit_move_insn (pat, array_to_constant (TImode, arr));
5622
5623 /* Restore the sp. */
5624 emit_move_insn (temp, op1);
5625 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5626
5627 /* Compute available stack size for sp. */
5628 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5629 emit_insn (gen_shufb (temp3, temp, temp, pat));
5630
5631 emit_insn (gen_addv4si3 (sp, sp, temp3));
5632 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5633 }
5634
5635 int
5636 spu_safe_dma (HOST_WIDE_INT channel)
5637 {
5638 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5639 }
5640
5641 void
5642 spu_builtin_splats (rtx ops[])
5643 {
5644 machine_mode mode = GET_MODE (ops[0]);
5645 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5646 {
5647 unsigned char arr[16];
5648 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5649 emit_move_insn (ops[0], array_to_constant (mode, arr));
5650 }
5651 else
5652 {
5653 rtx reg = gen_reg_rtx (TImode);
5654 rtx shuf;
5655 if (GET_CODE (ops[1]) != REG
5656 && GET_CODE (ops[1]) != SUBREG)
5657 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5658 switch (mode)
5659 {
5660 case V2DImode:
5661 case V2DFmode:
5662 shuf =
5663 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5664 TImode);
5665 break;
5666 case V4SImode:
5667 case V4SFmode:
5668 shuf =
5669 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5670 TImode);
5671 break;
5672 case V8HImode:
5673 shuf =
5674 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5675 TImode);
5676 break;
5677 case V16QImode:
5678 shuf =
5679 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5680 TImode);
5681 break;
5682 default:
5683 abort ();
5684 }
5685 emit_move_insn (reg, shuf);
5686 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5687 }
5688 }
5689
5690 void
5691 spu_builtin_extract (rtx ops[])
5692 {
5693 machine_mode mode;
5694 rtx rot, from, tmp;
5695
5696 mode = GET_MODE (ops[1]);
5697
5698 if (GET_CODE (ops[2]) == CONST_INT)
5699 {
5700 switch (mode)
5701 {
5702 case V16QImode:
5703 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5704 break;
5705 case V8HImode:
5706 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5707 break;
5708 case V4SFmode:
5709 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5710 break;
5711 case V4SImode:
5712 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5713 break;
5714 case V2DImode:
5715 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5716 break;
5717 case V2DFmode:
5718 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5719 break;
5720 default:
5721 abort ();
5722 }
5723 return;
5724 }
5725
5726 from = spu_gen_subreg (TImode, ops[1]);
5727 rot = gen_reg_rtx (TImode);
5728 tmp = gen_reg_rtx (SImode);
5729
5730 switch (mode)
5731 {
5732 case V16QImode:
5733 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5734 break;
5735 case V8HImode:
5736 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5737 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5738 break;
5739 case V4SFmode:
5740 case V4SImode:
5741 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5742 break;
5743 case V2DImode:
5744 case V2DFmode:
5745 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5746 break;
5747 default:
5748 abort ();
5749 }
5750 emit_insn (gen_rotqby_ti (rot, from, tmp));
5751
5752 emit_insn (gen_spu_convert (ops[0], rot));
5753 }
5754
5755 void
5756 spu_builtin_insert (rtx ops[])
5757 {
5758 machine_mode mode = GET_MODE (ops[0]);
5759 machine_mode imode = GET_MODE_INNER (mode);
5760 rtx mask = gen_reg_rtx (TImode);
5761 rtx offset;
5762
5763 if (GET_CODE (ops[3]) == CONST_INT)
5764 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5765 else
5766 {
5767 offset = gen_reg_rtx (SImode);
5768 emit_insn (gen_mulsi3
5769 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5770 }
5771 emit_insn (gen_cpat
5772 (mask, stack_pointer_rtx, offset,
5773 GEN_INT (GET_MODE_SIZE (imode))));
5774 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5775 }
5776
5777 void
5778 spu_builtin_promote (rtx ops[])
5779 {
5780 machine_mode mode, imode;
5781 rtx rot, from, offset;
5782 HOST_WIDE_INT pos;
5783
5784 mode = GET_MODE (ops[0]);
5785 imode = GET_MODE_INNER (mode);
5786
5787 from = gen_reg_rtx (TImode);
5788 rot = spu_gen_subreg (TImode, ops[0]);
5789
5790 emit_insn (gen_spu_convert (from, ops[1]));
5791
5792 if (GET_CODE (ops[2]) == CONST_INT)
5793 {
5794 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5795 if (GET_MODE_SIZE (imode) < 4)
5796 pos += 4 - GET_MODE_SIZE (imode);
5797 offset = GEN_INT (pos & 15);
5798 }
5799 else
5800 {
5801 offset = gen_reg_rtx (SImode);
5802 switch (mode)
5803 {
5804 case V16QImode:
5805 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5806 break;
5807 case V8HImode:
5808 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5809 emit_insn (gen_addsi3 (offset, offset, offset));
5810 break;
5811 case V4SFmode:
5812 case V4SImode:
5813 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5814 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5815 break;
5816 case V2DImode:
5817 case V2DFmode:
5818 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5819 break;
5820 default:
5821 abort ();
5822 }
5823 }
5824 emit_insn (gen_rotqby_ti (rot, from, offset));
5825 }
5826
5827 static void
5828 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5829 {
5830 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5831 rtx shuf = gen_reg_rtx (V4SImode);
5832 rtx insn = gen_reg_rtx (V4SImode);
5833 rtx shufc;
5834 rtx insnc;
5835 rtx mem;
5836
5837 fnaddr = force_reg (SImode, fnaddr);
5838 cxt = force_reg (SImode, cxt);
5839
5840 if (TARGET_LARGE_MEM)
5841 {
5842 rtx rotl = gen_reg_rtx (V4SImode);
5843 rtx mask = gen_reg_rtx (V4SImode);
5844 rtx bi = gen_reg_rtx (SImode);
5845 static unsigned char const shufa[16] = {
5846 2, 3, 0, 1, 18, 19, 16, 17,
5847 0, 1, 2, 3, 16, 17, 18, 19
5848 };
5849 static unsigned char const insna[16] = {
5850 0x41, 0, 0, 79,
5851 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5852 0x60, 0x80, 0, 79,
5853 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5854 };
5855
5856 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5857 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5858
5859 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5860 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5861 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5862 emit_insn (gen_selb (insn, insnc, rotl, mask));
5863
5864 mem = adjust_address (m_tramp, V4SImode, 0);
5865 emit_move_insn (mem, insn);
5866
5867 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5868 mem = adjust_address (m_tramp, Pmode, 16);
5869 emit_move_insn (mem, bi);
5870 }
5871 else
5872 {
5873 rtx scxt = gen_reg_rtx (SImode);
5874 rtx sfnaddr = gen_reg_rtx (SImode);
5875 static unsigned char const insna[16] = {
5876 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5877 0x30, 0, 0, 0,
5878 0, 0, 0, 0,
5879 0, 0, 0, 0
5880 };
5881
5882 shufc = gen_reg_rtx (TImode);
5883 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5884
5885 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5886 fits 18 bits and the last 4 are zeros. This will be true if
5887 the stack pointer is initialized to 0x3fff0 at program start,
5888 otherwise the ila instruction will be garbage. */
5889
5890 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5891 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5892 emit_insn (gen_cpat
5893 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5894 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5895 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5896
5897 mem = adjust_address (m_tramp, V4SImode, 0);
5898 emit_move_insn (mem, insn);
5899 }
5900 emit_insn (gen_sync ());
5901 }
5902
5903 static bool
5904 spu_warn_func_return (tree decl)
5905 {
5906 /* Naked functions are implemented entirely in assembly, including the
5907 return sequence, so suppress warnings about this. */
5908 return !spu_naked_function_p (decl);
5909 }
5910
5911 void
5912 spu_expand_sign_extend (rtx ops[])
5913 {
5914 unsigned char arr[16];
5915 rtx pat = gen_reg_rtx (TImode);
5916 rtx sign, c;
5917 int i, last;
5918 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5919 if (GET_MODE (ops[1]) == QImode)
5920 {
5921 sign = gen_reg_rtx (HImode);
5922 emit_insn (gen_extendqihi2 (sign, ops[1]));
5923 for (i = 0; i < 16; i++)
5924 arr[i] = 0x12;
5925 arr[last] = 0x13;
5926 }
5927 else
5928 {
5929 for (i = 0; i < 16; i++)
5930 arr[i] = 0x10;
5931 switch (GET_MODE (ops[1]))
5932 {
5933 case HImode:
5934 sign = gen_reg_rtx (SImode);
5935 emit_insn (gen_extendhisi2 (sign, ops[1]));
5936 arr[last] = 0x03;
5937 arr[last - 1] = 0x02;
5938 break;
5939 case SImode:
5940 sign = gen_reg_rtx (SImode);
5941 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5942 for (i = 0; i < 4; i++)
5943 arr[last - i] = 3 - i;
5944 break;
5945 case DImode:
5946 sign = gen_reg_rtx (SImode);
5947 c = gen_reg_rtx (SImode);
5948 emit_insn (gen_spu_convert (c, ops[1]));
5949 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5950 for (i = 0; i < 8; i++)
5951 arr[last - i] = 7 - i;
5952 break;
5953 default:
5954 abort ();
5955 }
5956 }
5957 emit_move_insn (pat, array_to_constant (TImode, arr));
5958 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5959 }
5960
5961 /* expand vector initialization. If there are any constant parts,
5962 load constant parts first. Then load any non-constant parts. */
5963 void
5964 spu_expand_vector_init (rtx target, rtx vals)
5965 {
5966 machine_mode mode = GET_MODE (target);
5967 int n_elts = GET_MODE_NUNITS (mode);
5968 int n_var = 0;
5969 bool all_same = true;
5970 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5971 int i;
5972
5973 first = XVECEXP (vals, 0, 0);
5974 for (i = 0; i < n_elts; ++i)
5975 {
5976 x = XVECEXP (vals, 0, i);
5977 if (!(CONST_INT_P (x)
5978 || GET_CODE (x) == CONST_DOUBLE
5979 || GET_CODE (x) == CONST_FIXED))
5980 ++n_var;
5981 else
5982 {
5983 if (first_constant == NULL_RTX)
5984 first_constant = x;
5985 }
5986 if (i > 0 && !rtx_equal_p (x, first))
5987 all_same = false;
5988 }
5989
5990 /* if all elements are the same, use splats to repeat elements */
5991 if (all_same)
5992 {
5993 if (!CONSTANT_P (first)
5994 && !register_operand (first, GET_MODE (x)))
5995 first = force_reg (GET_MODE (first), first);
5996 emit_insn (gen_spu_splats (target, first));
5997 return;
5998 }
5999
6000 /* load constant parts */
6001 if (n_var != n_elts)
6002 {
6003 if (n_var == 0)
6004 {
6005 emit_move_insn (target,
6006 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6007 }
6008 else
6009 {
6010 rtx constant_parts_rtx = copy_rtx (vals);
6011
6012 gcc_assert (first_constant != NULL_RTX);
6013 /* fill empty slots with the first constant, this increases
6014 our chance of using splats in the recursive call below. */
6015 for (i = 0; i < n_elts; ++i)
6016 {
6017 x = XVECEXP (constant_parts_rtx, 0, i);
6018 if (!(CONST_INT_P (x)
6019 || GET_CODE (x) == CONST_DOUBLE
6020 || GET_CODE (x) == CONST_FIXED))
6021 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6022 }
6023
6024 spu_expand_vector_init (target, constant_parts_rtx);
6025 }
6026 }
6027
6028 /* load variable parts */
6029 if (n_var != 0)
6030 {
6031 rtx insert_operands[4];
6032
6033 insert_operands[0] = target;
6034 insert_operands[2] = target;
6035 for (i = 0; i < n_elts; ++i)
6036 {
6037 x = XVECEXP (vals, 0, i);
6038 if (!(CONST_INT_P (x)
6039 || GET_CODE (x) == CONST_DOUBLE
6040 || GET_CODE (x) == CONST_FIXED))
6041 {
6042 if (!register_operand (x, GET_MODE (x)))
6043 x = force_reg (GET_MODE (x), x);
6044 insert_operands[1] = x;
6045 insert_operands[3] = GEN_INT (i);
6046 spu_builtin_insert (insert_operands);
6047 }
6048 }
6049 }
6050 }
6051
6052 /* Return insn index for the vector compare instruction for given CODE,
6053 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6054
6055 static int
6056 get_vec_cmp_insn (enum rtx_code code,
6057 machine_mode dest_mode,
6058 machine_mode op_mode)
6059
6060 {
6061 switch (code)
6062 {
6063 case EQ:
6064 if (dest_mode == V16QImode && op_mode == V16QImode)
6065 return CODE_FOR_ceq_v16qi;
6066 if (dest_mode == V8HImode && op_mode == V8HImode)
6067 return CODE_FOR_ceq_v8hi;
6068 if (dest_mode == V4SImode && op_mode == V4SImode)
6069 return CODE_FOR_ceq_v4si;
6070 if (dest_mode == V4SImode && op_mode == V4SFmode)
6071 return CODE_FOR_ceq_v4sf;
6072 if (dest_mode == V2DImode && op_mode == V2DFmode)
6073 return CODE_FOR_ceq_v2df;
6074 break;
6075 case GT:
6076 if (dest_mode == V16QImode && op_mode == V16QImode)
6077 return CODE_FOR_cgt_v16qi;
6078 if (dest_mode == V8HImode && op_mode == V8HImode)
6079 return CODE_FOR_cgt_v8hi;
6080 if (dest_mode == V4SImode && op_mode == V4SImode)
6081 return CODE_FOR_cgt_v4si;
6082 if (dest_mode == V4SImode && op_mode == V4SFmode)
6083 return CODE_FOR_cgt_v4sf;
6084 if (dest_mode == V2DImode && op_mode == V2DFmode)
6085 return CODE_FOR_cgt_v2df;
6086 break;
6087 case GTU:
6088 if (dest_mode == V16QImode && op_mode == V16QImode)
6089 return CODE_FOR_clgt_v16qi;
6090 if (dest_mode == V8HImode && op_mode == V8HImode)
6091 return CODE_FOR_clgt_v8hi;
6092 if (dest_mode == V4SImode && op_mode == V4SImode)
6093 return CODE_FOR_clgt_v4si;
6094 break;
6095 default:
6096 break;
6097 }
6098 return -1;
6099 }
6100
6101 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6102 DMODE is expected destination mode. This is a recursive function. */
6103
6104 static rtx
6105 spu_emit_vector_compare (enum rtx_code rcode,
6106 rtx op0, rtx op1,
6107 machine_mode dmode)
6108 {
6109 int vec_cmp_insn;
6110 rtx mask;
6111 machine_mode dest_mode;
6112 machine_mode op_mode = GET_MODE (op1);
6113
6114 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6115
6116 /* Floating point vector compare instructions uses destination V4SImode.
6117 Double floating point vector compare instructions uses destination V2DImode.
6118 Move destination to appropriate mode later. */
6119 if (dmode == V4SFmode)
6120 dest_mode = V4SImode;
6121 else if (dmode == V2DFmode)
6122 dest_mode = V2DImode;
6123 else
6124 dest_mode = dmode;
6125
6126 mask = gen_reg_rtx (dest_mode);
6127 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6128
6129 if (vec_cmp_insn == -1)
6130 {
6131 bool swap_operands = false;
6132 bool try_again = false;
6133 switch (rcode)
6134 {
6135 case LT:
6136 rcode = GT;
6137 swap_operands = true;
6138 try_again = true;
6139 break;
6140 case LTU:
6141 rcode = GTU;
6142 swap_operands = true;
6143 try_again = true;
6144 break;
6145 case NE:
6146 case UNEQ:
6147 case UNLE:
6148 case UNLT:
6149 case UNGE:
6150 case UNGT:
6151 case UNORDERED:
6152 /* Treat A != B as ~(A==B). */
6153 {
6154 enum rtx_code rev_code;
6155 enum insn_code nor_code;
6156 rtx rev_mask;
6157
6158 rev_code = reverse_condition_maybe_unordered (rcode);
6159 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6160
6161 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6162 gcc_assert (nor_code != CODE_FOR_nothing);
6163 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6164 if (dmode != dest_mode)
6165 {
6166 rtx temp = gen_reg_rtx (dest_mode);
6167 convert_move (temp, mask, 0);
6168 return temp;
6169 }
6170 return mask;
6171 }
6172 break;
6173 case GE:
6174 case GEU:
6175 case LE:
6176 case LEU:
6177 /* Try GT/GTU/LT/LTU OR EQ */
6178 {
6179 rtx c_rtx, eq_rtx;
6180 enum insn_code ior_code;
6181 enum rtx_code new_code;
6182
6183 switch (rcode)
6184 {
6185 case GE: new_code = GT; break;
6186 case GEU: new_code = GTU; break;
6187 case LE: new_code = LT; break;
6188 case LEU: new_code = LTU; break;
6189 default:
6190 gcc_unreachable ();
6191 }
6192
6193 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6194 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6195
6196 ior_code = optab_handler (ior_optab, dest_mode);
6197 gcc_assert (ior_code != CODE_FOR_nothing);
6198 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6199 if (dmode != dest_mode)
6200 {
6201 rtx temp = gen_reg_rtx (dest_mode);
6202 convert_move (temp, mask, 0);
6203 return temp;
6204 }
6205 return mask;
6206 }
6207 break;
6208 case LTGT:
6209 /* Try LT OR GT */
6210 {
6211 rtx lt_rtx, gt_rtx;
6212 enum insn_code ior_code;
6213
6214 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6215 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6216
6217 ior_code = optab_handler (ior_optab, dest_mode);
6218 gcc_assert (ior_code != CODE_FOR_nothing);
6219 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6220 if (dmode != dest_mode)
6221 {
6222 rtx temp = gen_reg_rtx (dest_mode);
6223 convert_move (temp, mask, 0);
6224 return temp;
6225 }
6226 return mask;
6227 }
6228 break;
6229 case ORDERED:
6230 /* Implement as (A==A) & (B==B) */
6231 {
6232 rtx a_rtx, b_rtx;
6233 enum insn_code and_code;
6234
6235 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6236 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6237
6238 and_code = optab_handler (and_optab, dest_mode);
6239 gcc_assert (and_code != CODE_FOR_nothing);
6240 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6241 if (dmode != dest_mode)
6242 {
6243 rtx temp = gen_reg_rtx (dest_mode);
6244 convert_move (temp, mask, 0);
6245 return temp;
6246 }
6247 return mask;
6248 }
6249 break;
6250 default:
6251 gcc_unreachable ();
6252 }
6253
6254 /* You only get two chances. */
6255 if (try_again)
6256 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6257
6258 gcc_assert (vec_cmp_insn != -1);
6259
6260 if (swap_operands)
6261 {
6262 rtx tmp;
6263 tmp = op0;
6264 op0 = op1;
6265 op1 = tmp;
6266 }
6267 }
6268
6269 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6270 if (dmode != dest_mode)
6271 {
6272 rtx temp = gen_reg_rtx (dest_mode);
6273 convert_move (temp, mask, 0);
6274 return temp;
6275 }
6276 return mask;
6277 }
6278
6279
6280 /* Emit vector conditional expression.
6281 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6282 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6283
6284 int
6285 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6286 rtx cond, rtx cc_op0, rtx cc_op1)
6287 {
6288 machine_mode dest_mode = GET_MODE (dest);
6289 enum rtx_code rcode = GET_CODE (cond);
6290 rtx mask;
6291
6292 /* Get the vector mask for the given relational operations. */
6293 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6294
6295 emit_insn(gen_selb (dest, op2, op1, mask));
6296
6297 return 1;
6298 }
6299
6300 static rtx
6301 spu_force_reg (machine_mode mode, rtx op)
6302 {
6303 rtx x, r;
6304 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6305 {
6306 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6307 || GET_MODE (op) == BLKmode)
6308 return force_reg (mode, convert_to_mode (mode, op, 0));
6309 abort ();
6310 }
6311
6312 r = force_reg (GET_MODE (op), op);
6313 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6314 {
6315 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6316 if (x)
6317 return x;
6318 }
6319
6320 x = gen_reg_rtx (mode);
6321 emit_insn (gen_spu_convert (x, r));
6322 return x;
6323 }
6324
6325 static void
6326 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6327 {
6328 HOST_WIDE_INT v = 0;
6329 int lsbits;
6330 /* Check the range of immediate operands. */
6331 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6332 {
6333 int range = p - SPU_BTI_7;
6334
6335 if (!CONSTANT_P (op))
6336 error ("%s expects an integer literal in the range [%d, %d]",
6337 d->name,
6338 spu_builtin_range[range].low, spu_builtin_range[range].high);
6339
6340 if (GET_CODE (op) == CONST
6341 && (GET_CODE (XEXP (op, 0)) == PLUS
6342 || GET_CODE (XEXP (op, 0)) == MINUS))
6343 {
6344 v = INTVAL (XEXP (XEXP (op, 0), 1));
6345 op = XEXP (XEXP (op, 0), 0);
6346 }
6347 else if (GET_CODE (op) == CONST_INT)
6348 v = INTVAL (op);
6349 else if (GET_CODE (op) == CONST_VECTOR
6350 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6351 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6352
6353 /* The default for v is 0 which is valid in every range. */
6354 if (v < spu_builtin_range[range].low
6355 || v > spu_builtin_range[range].high)
6356 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6357 d->name,
6358 spu_builtin_range[range].low, spu_builtin_range[range].high,
6359 v);
6360
6361 switch (p)
6362 {
6363 case SPU_BTI_S10_4:
6364 lsbits = 4;
6365 break;
6366 case SPU_BTI_U16_2:
6367 /* This is only used in lqa, and stqa. Even though the insns
6368 encode 16 bits of the address (all but the 2 least
6369 significant), only 14 bits are used because it is masked to
6370 be 16 byte aligned. */
6371 lsbits = 4;
6372 break;
6373 case SPU_BTI_S16_2:
6374 /* This is used for lqr and stqr. */
6375 lsbits = 2;
6376 break;
6377 default:
6378 lsbits = 0;
6379 }
6380
6381 if (GET_CODE (op) == LABEL_REF
6382 || (GET_CODE (op) == SYMBOL_REF
6383 && SYMBOL_REF_FUNCTION_P (op))
6384 || (v & ((1 << lsbits) - 1)) != 0)
6385 warning (0, "%d least significant bits of %s are ignored", lsbits,
6386 d->name);
6387 }
6388 }
6389
6390
6391 static int
6392 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6393 rtx target, rtx ops[])
6394 {
6395 enum insn_code icode = (enum insn_code) d->icode;
6396 int i = 0, a;
6397
6398 /* Expand the arguments into rtl. */
6399
6400 if (d->parm[0] != SPU_BTI_VOID)
6401 ops[i++] = target;
6402
6403 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6404 {
6405 tree arg = CALL_EXPR_ARG (exp, a);
6406 if (arg == 0)
6407 abort ();
6408 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6409 }
6410
6411 gcc_assert (i == insn_data[icode].n_generator_args);
6412 return i;
6413 }
6414
6415 static rtx
6416 spu_expand_builtin_1 (struct spu_builtin_description *d,
6417 tree exp, rtx target)
6418 {
6419 rtx pat;
6420 rtx ops[8];
6421 enum insn_code icode = (enum insn_code) d->icode;
6422 machine_mode mode, tmode;
6423 int i, p;
6424 int n_operands;
6425 tree return_type;
6426
6427 /* Set up ops[] with values from arglist. */
6428 n_operands = expand_builtin_args (d, exp, target, ops);
6429
6430 /* Handle the target operand which must be operand 0. */
6431 i = 0;
6432 if (d->parm[0] != SPU_BTI_VOID)
6433 {
6434
6435 /* We prefer the mode specified for the match_operand otherwise
6436 use the mode from the builtin function prototype. */
6437 tmode = insn_data[d->icode].operand[0].mode;
6438 if (tmode == VOIDmode)
6439 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6440
6441 /* Try to use target because not using it can lead to extra copies
6442 and when we are using all of the registers extra copies leads
6443 to extra spills. */
6444 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6445 ops[0] = target;
6446 else
6447 target = ops[0] = gen_reg_rtx (tmode);
6448
6449 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6450 abort ();
6451
6452 i++;
6453 }
6454
6455 if (d->fcode == SPU_MASK_FOR_LOAD)
6456 {
6457 machine_mode mode = insn_data[icode].operand[1].mode;
6458 tree arg;
6459 rtx addr, op, pat;
6460
6461 /* get addr */
6462 arg = CALL_EXPR_ARG (exp, 0);
6463 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6464 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6465 addr = memory_address (mode, op);
6466
6467 /* negate addr */
6468 op = gen_reg_rtx (GET_MODE (addr));
6469 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6470 op = gen_rtx_MEM (mode, op);
6471
6472 pat = GEN_FCN (icode) (target, op);
6473 if (!pat)
6474 return 0;
6475 emit_insn (pat);
6476 return target;
6477 }
6478
6479 /* Ignore align_hint, but still expand it's args in case they have
6480 side effects. */
6481 if (icode == CODE_FOR_spu_align_hint)
6482 return 0;
6483
6484 /* Handle the rest of the operands. */
6485 for (p = 1; i < n_operands; i++, p++)
6486 {
6487 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6488 mode = insn_data[d->icode].operand[i].mode;
6489 else
6490 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6491
6492 /* mode can be VOIDmode here for labels */
6493
6494 /* For specific intrinsics with an immediate operand, e.g.,
6495 si_ai(), we sometimes need to convert the scalar argument to a
6496 vector argument by splatting the scalar. */
6497 if (VECTOR_MODE_P (mode)
6498 && (GET_CODE (ops[i]) == CONST_INT
6499 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6500 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6501 {
6502 if (GET_CODE (ops[i]) == CONST_INT)
6503 ops[i] = spu_const (mode, INTVAL (ops[i]));
6504 else
6505 {
6506 rtx reg = gen_reg_rtx (mode);
6507 machine_mode imode = GET_MODE_INNER (mode);
6508 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6509 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6510 if (imode != GET_MODE (ops[i]))
6511 ops[i] = convert_to_mode (imode, ops[i],
6512 TYPE_UNSIGNED (spu_builtin_types
6513 [d->parm[i]]));
6514 emit_insn (gen_spu_splats (reg, ops[i]));
6515 ops[i] = reg;
6516 }
6517 }
6518
6519 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6520
6521 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6522 ops[i] = spu_force_reg (mode, ops[i]);
6523 }
6524
6525 switch (n_operands)
6526 {
6527 case 0:
6528 pat = GEN_FCN (icode) (0);
6529 break;
6530 case 1:
6531 pat = GEN_FCN (icode) (ops[0]);
6532 break;
6533 case 2:
6534 pat = GEN_FCN (icode) (ops[0], ops[1]);
6535 break;
6536 case 3:
6537 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6538 break;
6539 case 4:
6540 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6541 break;
6542 case 5:
6543 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6544 break;
6545 case 6:
6546 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6547 break;
6548 default:
6549 abort ();
6550 }
6551
6552 if (!pat)
6553 abort ();
6554
6555 if (d->type == B_CALL || d->type == B_BISLED)
6556 emit_call_insn (pat);
6557 else if (d->type == B_JUMP)
6558 {
6559 emit_jump_insn (pat);
6560 emit_barrier ();
6561 }
6562 else
6563 emit_insn (pat);
6564
6565 return_type = spu_builtin_types[d->parm[0]];
6566 if (d->parm[0] != SPU_BTI_VOID
6567 && GET_MODE (target) != TYPE_MODE (return_type))
6568 {
6569 /* target is the return value. It should always be the mode of
6570 the builtin function prototype. */
6571 target = spu_force_reg (TYPE_MODE (return_type), target);
6572 }
6573
6574 return target;
6575 }
6576
6577 rtx
6578 spu_expand_builtin (tree exp,
6579 rtx target,
6580 rtx subtarget ATTRIBUTE_UNUSED,
6581 machine_mode mode ATTRIBUTE_UNUSED,
6582 int ignore ATTRIBUTE_UNUSED)
6583 {
6584 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6585 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6586 struct spu_builtin_description *d;
6587
6588 if (fcode < NUM_SPU_BUILTINS)
6589 {
6590 d = &spu_builtins[fcode];
6591
6592 return spu_expand_builtin_1 (d, exp, target);
6593 }
6594 abort ();
6595 }
6596
6597 /* Implement targetm.vectorize.builtin_mask_for_load. */
6598 static tree
6599 spu_builtin_mask_for_load (void)
6600 {
6601 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6602 }
6603
6604 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6605 static int
6606 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6607 tree vectype,
6608 int misalign ATTRIBUTE_UNUSED)
6609 {
6610 unsigned elements;
6611
6612 switch (type_of_cost)
6613 {
6614 case scalar_stmt:
6615 case vector_stmt:
6616 case vector_load:
6617 case vector_store:
6618 case vec_to_scalar:
6619 case scalar_to_vec:
6620 case cond_branch_not_taken:
6621 case vec_perm:
6622 case vec_promote_demote:
6623 return 1;
6624
6625 case scalar_store:
6626 return 10;
6627
6628 case scalar_load:
6629 /* Load + rotate. */
6630 return 2;
6631
6632 case unaligned_load:
6633 return 2;
6634
6635 case cond_branch_taken:
6636 return 6;
6637
6638 case vec_construct:
6639 elements = TYPE_VECTOR_SUBPARTS (vectype);
6640 return elements / 2 + 1;
6641
6642 default:
6643 gcc_unreachable ();
6644 }
6645 }
6646
6647 /* Implement targetm.vectorize.init_cost. */
6648
6649 static void *
6650 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6651 {
6652 unsigned *cost = XNEWVEC (unsigned, 3);
6653 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6654 return cost;
6655 }
6656
6657 /* Implement targetm.vectorize.add_stmt_cost. */
6658
6659 static unsigned
6660 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6661 struct _stmt_vec_info *stmt_info, int misalign,
6662 enum vect_cost_model_location where)
6663 {
6664 unsigned *cost = (unsigned *) data;
6665 unsigned retval = 0;
6666
6667 if (flag_vect_cost_model)
6668 {
6669 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6670 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6671
6672 /* Statements in an inner loop relative to the loop being
6673 vectorized are weighted more heavily. The value here is
6674 arbitrary and could potentially be improved with analysis. */
6675 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6676 count *= 50; /* FIXME. */
6677
6678 retval = (unsigned) (count * stmt_cost);
6679 cost[where] += retval;
6680 }
6681
6682 return retval;
6683 }
6684
6685 /* Implement targetm.vectorize.finish_cost. */
6686
6687 static void
6688 spu_finish_cost (void *data, unsigned *prologue_cost,
6689 unsigned *body_cost, unsigned *epilogue_cost)
6690 {
6691 unsigned *cost = (unsigned *) data;
6692 *prologue_cost = cost[vect_prologue];
6693 *body_cost = cost[vect_body];
6694 *epilogue_cost = cost[vect_epilogue];
6695 }
6696
6697 /* Implement targetm.vectorize.destroy_cost_data. */
6698
6699 static void
6700 spu_destroy_cost_data (void *data)
6701 {
6702 free (data);
6703 }
6704
6705 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6706 after applying N number of iterations. This routine does not determine
6707 how may iterations are required to reach desired alignment. */
6708
6709 static bool
6710 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6711 {
6712 if (is_packed)
6713 return false;
6714
6715 /* All other types are naturally aligned. */
6716 return true;
6717 }
6718
6719 /* Return the appropriate mode for a named address pointer. */
6720 static machine_mode
6721 spu_addr_space_pointer_mode (addr_space_t addrspace)
6722 {
6723 switch (addrspace)
6724 {
6725 case ADDR_SPACE_GENERIC:
6726 return ptr_mode;
6727 case ADDR_SPACE_EA:
6728 return EAmode;
6729 default:
6730 gcc_unreachable ();
6731 }
6732 }
6733
6734 /* Return the appropriate mode for a named address address. */
6735 static machine_mode
6736 spu_addr_space_address_mode (addr_space_t addrspace)
6737 {
6738 switch (addrspace)
6739 {
6740 case ADDR_SPACE_GENERIC:
6741 return Pmode;
6742 case ADDR_SPACE_EA:
6743 return EAmode;
6744 default:
6745 gcc_unreachable ();
6746 }
6747 }
6748
6749 /* Determine if one named address space is a subset of another. */
6750
6751 static bool
6752 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6753 {
6754 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6755 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6756
6757 if (subset == superset)
6758 return true;
6759
6760 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6761 being subsets but instead as disjoint address spaces. */
6762 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6763 return false;
6764
6765 else
6766 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6767 }
6768
6769 /* Convert from one address space to another. */
6770 static rtx
6771 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6772 {
6773 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6774 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6775
6776 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6777 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6778
6779 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6780 {
6781 rtx result, ls;
6782
6783 ls = gen_const_mem (DImode,
6784 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6785 set_mem_align (ls, 128);
6786
6787 result = gen_reg_rtx (Pmode);
6788 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6789 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6790 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6791 ls, const0_rtx, Pmode, 1);
6792
6793 emit_insn (gen_subsi3 (result, op, ls));
6794
6795 return result;
6796 }
6797
6798 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6799 {
6800 rtx result, ls;
6801
6802 ls = gen_const_mem (DImode,
6803 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6804 set_mem_align (ls, 128);
6805
6806 result = gen_reg_rtx (EAmode);
6807 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6808 op = force_reg (Pmode, op);
6809 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6810 ls, const0_rtx, EAmode, 1);
6811 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6812
6813 if (EAmode == SImode)
6814 emit_insn (gen_addsi3 (result, op, ls));
6815 else
6816 emit_insn (gen_adddi3 (result, op, ls));
6817
6818 return result;
6819 }
6820
6821 else
6822 gcc_unreachable ();
6823 }
6824
6825
6826 /* Count the total number of instructions in each pipe and return the
6827 maximum, which is used as the Minimum Iteration Interval (MII)
6828 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6829 -2 are instructions that can go in pipe0 or pipe1. */
6830 static int
6831 spu_sms_res_mii (struct ddg *g)
6832 {
6833 int i;
6834 unsigned t[4] = {0, 0, 0, 0};
6835
6836 for (i = 0; i < g->num_nodes; i++)
6837 {
6838 rtx_insn *insn = g->nodes[i].insn;
6839 int p = get_pipe (insn) + 2;
6840
6841 gcc_assert (p >= 0);
6842 gcc_assert (p < 4);
6843
6844 t[p]++;
6845 if (dump_file && INSN_P (insn))
6846 fprintf (dump_file, "i%d %s %d %d\n",
6847 INSN_UID (insn),
6848 insn_data[INSN_CODE(insn)].name,
6849 p, t[p]);
6850 }
6851 if (dump_file)
6852 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6853
6854 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6855 }
6856
6857
6858 void
6859 spu_init_expanders (void)
6860 {
6861 if (cfun)
6862 {
6863 rtx r0, r1;
6864 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6865 frame_pointer_needed is true. We don't know that until we're
6866 expanding the prologue. */
6867 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6868
6869 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6870 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6871 to be treated as aligned, so generate them here. */
6872 r0 = gen_reg_rtx (SImode);
6873 r1 = gen_reg_rtx (SImode);
6874 mark_reg_pointer (r0, 128);
6875 mark_reg_pointer (r1, 128);
6876 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6877 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6878 }
6879 }
6880
6881 static machine_mode
6882 spu_libgcc_cmp_return_mode (void)
6883 {
6884
6885 /* For SPU word mode is TI mode so it is better to use SImode
6886 for compare returns. */
6887 return SImode;
6888 }
6889
6890 static machine_mode
6891 spu_libgcc_shift_count_mode (void)
6892 {
6893 /* For SPU word mode is TI mode so it is better to use SImode
6894 for shift counts. */
6895 return SImode;
6896 }
6897
6898 /* Implement targetm.section_type_flags. */
6899 static unsigned int
6900 spu_section_type_flags (tree decl, const char *name, int reloc)
6901 {
6902 /* .toe needs to have type @nobits. */
6903 if (strcmp (name, ".toe") == 0)
6904 return SECTION_BSS;
6905 /* Don't load _ea into the current address space. */
6906 if (strcmp (name, "._ea") == 0)
6907 return SECTION_WRITE | SECTION_DEBUG;
6908 return default_section_type_flags (decl, name, reloc);
6909 }
6910
6911 /* Implement targetm.select_section. */
6912 static section *
6913 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6914 {
6915 /* Variables and constants defined in the __ea address space
6916 go into a special section named "._ea". */
6917 if (TREE_TYPE (decl) != error_mark_node
6918 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6919 {
6920 /* We might get called with string constants, but get_named_section
6921 doesn't like them as they are not DECLs. Also, we need to set
6922 flags in that case. */
6923 if (!DECL_P (decl))
6924 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6925
6926 return get_named_section (decl, "._ea", reloc);
6927 }
6928
6929 return default_elf_select_section (decl, reloc, align);
6930 }
6931
6932 /* Implement targetm.unique_section. */
6933 static void
6934 spu_unique_section (tree decl, int reloc)
6935 {
6936 /* We don't support unique section names in the __ea address
6937 space for now. */
6938 if (TREE_TYPE (decl) != error_mark_node
6939 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6940 return;
6941
6942 default_unique_section (decl, reloc);
6943 }
6944
6945 /* Generate a constant or register which contains 2^SCALE. We assume
6946 the result is valid for MODE. Currently, MODE must be V4SFmode and
6947 SCALE must be SImode. */
6948 rtx
6949 spu_gen_exp2 (machine_mode mode, rtx scale)
6950 {
6951 gcc_assert (mode == V4SFmode);
6952 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6953 if (GET_CODE (scale) != CONST_INT)
6954 {
6955 /* unsigned int exp = (127 + scale) << 23;
6956 __vector float m = (__vector float) spu_splats (exp); */
6957 rtx reg = force_reg (SImode, scale);
6958 rtx exp = gen_reg_rtx (SImode);
6959 rtx mul = gen_reg_rtx (mode);
6960 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6961 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6962 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6963 return mul;
6964 }
6965 else
6966 {
6967 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6968 unsigned char arr[16];
6969 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6970 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6971 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6972 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6973 return array_to_constant (mode, arr);
6974 }
6975 }
6976
6977 /* After reload, just change the convert into a move instruction
6978 or a dead instruction. */
6979 void
6980 spu_split_convert (rtx ops[])
6981 {
6982 if (REGNO (ops[0]) == REGNO (ops[1]))
6983 emit_note (NOTE_INSN_DELETED);
6984 else
6985 {
6986 /* Use TImode always as this might help hard reg copyprop. */
6987 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6988 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6989 emit_insn (gen_move_insn (op0, op1));
6990 }
6991 }
6992
6993 void
6994 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6995 {
6996 fprintf (file, "# profile\n");
6997 fprintf (file, "brsl $75, _mcount\n");
6998 }
6999
7000 /* Implement targetm.ref_may_alias_errno. */
7001 static bool
7002 spu_ref_may_alias_errno (ao_ref *ref)
7003 {
7004 tree base = ao_ref_base (ref);
7005
7006 /* With SPU newlib, errno is defined as something like
7007 _impure_data._errno
7008 The default implementation of this target macro does not
7009 recognize such expressions, so special-code for it here. */
7010
7011 if (TREE_CODE (base) == VAR_DECL
7012 && !TREE_STATIC (base)
7013 && DECL_EXTERNAL (base)
7014 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7015 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7016 "_impure_data") == 0
7017 /* _errno is the first member of _impure_data. */
7018 && ref->offset == 0)
7019 return true;
7020
7021 return default_ref_may_alias_errno (ref);
7022 }
7023
7024 /* Output thunk to FILE that implements a C++ virtual function call (with
7025 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7026 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7027 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7028 relative to the resulting this pointer. */
7029
7030 static void
7031 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7032 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7033 tree function)
7034 {
7035 rtx op[8];
7036
7037 /* Make sure unwind info is emitted for the thunk if needed. */
7038 final_start_function (emit_barrier (), file, 1);
7039
7040 /* Operand 0 is the target function. */
7041 op[0] = XEXP (DECL_RTL (function), 0);
7042
7043 /* Operand 1 is the 'this' pointer. */
7044 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7045 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7046 else
7047 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7048
7049 /* Operands 2/3 are the low/high halfwords of delta. */
7050 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7051 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7052
7053 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7054 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7055 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7056
7057 /* Operands 6/7 are temporary registers. */
7058 op[6] = gen_rtx_REG (Pmode, 79);
7059 op[7] = gen_rtx_REG (Pmode, 78);
7060
7061 /* Add DELTA to this pointer. */
7062 if (delta)
7063 {
7064 if (delta >= -0x200 && delta < 0x200)
7065 output_asm_insn ("ai\t%1,%1,%2", op);
7066 else if (delta >= -0x8000 && delta < 0x8000)
7067 {
7068 output_asm_insn ("il\t%6,%2", op);
7069 output_asm_insn ("a\t%1,%1,%6", op);
7070 }
7071 else
7072 {
7073 output_asm_insn ("ilhu\t%6,%3", op);
7074 output_asm_insn ("iohl\t%6,%2", op);
7075 output_asm_insn ("a\t%1,%1,%6", op);
7076 }
7077 }
7078
7079 /* Perform vcall adjustment. */
7080 if (vcall_offset)
7081 {
7082 output_asm_insn ("lqd\t%7,0(%1)", op);
7083 output_asm_insn ("rotqby\t%7,%7,%1", op);
7084
7085 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7086 output_asm_insn ("ai\t%7,%7,%4", op);
7087 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7088 {
7089 output_asm_insn ("il\t%6,%4", op);
7090 output_asm_insn ("a\t%7,%7,%6", op);
7091 }
7092 else
7093 {
7094 output_asm_insn ("ilhu\t%6,%5", op);
7095 output_asm_insn ("iohl\t%6,%4", op);
7096 output_asm_insn ("a\t%7,%7,%6", op);
7097 }
7098
7099 output_asm_insn ("lqd\t%6,0(%7)", op);
7100 output_asm_insn ("rotqby\t%6,%6,%7", op);
7101 output_asm_insn ("a\t%1,%1,%6", op);
7102 }
7103
7104 /* Jump to target. */
7105 output_asm_insn ("br\t%0", op);
7106
7107 final_end_function ();
7108 }
7109
7110 /* Canonicalize a comparison from one we don't have to one we do have. */
7111 static void
7112 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7113 bool op0_preserve_value)
7114 {
7115 if (!op0_preserve_value
7116 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7117 {
7118 rtx tem = *op0;
7119 *op0 = *op1;
7120 *op1 = tem;
7121 *code = (int)swap_condition ((enum rtx_code)*code);
7122 }
7123 }
7124
7125 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7126 to perform. MEM is the memory on which to operate. VAL is the second
7127 operand of the binary operator. BEFORE and AFTER are optional locations to
7128 return the value of MEM either before of after the operation. */
7129 void
7130 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7131 rtx orig_before, rtx orig_after)
7132 {
7133 machine_mode mode = GET_MODE (mem);
7134 rtx before = orig_before, after = orig_after;
7135
7136 if (before == NULL_RTX)
7137 before = gen_reg_rtx (mode);
7138
7139 emit_move_insn (before, mem);
7140
7141 if (code == MULT) /* NAND operation */
7142 {
7143 rtx x = expand_simple_binop (mode, AND, before, val,
7144 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7145 after = expand_simple_unop (mode, NOT, x, after, 1);
7146 }
7147 else
7148 {
7149 after = expand_simple_binop (mode, code, before, val,
7150 after, 1, OPTAB_LIB_WIDEN);
7151 }
7152
7153 emit_move_insn (mem, after);
7154
7155 if (orig_after && after != orig_after)
7156 emit_move_insn (orig_after, after);
7157 }
7158
7159 \f
7160 /* Table of machine attributes. */
7161 static const struct attribute_spec spu_attribute_table[] =
7162 {
7163 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7164 affects_type_identity } */
7165 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7166 false },
7167 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7168 false },
7169 { NULL, 0, 0, false, false, false, NULL, false }
7170 };
7171
7172 /* TARGET overrides. */
7173
7174 #undef TARGET_ADDR_SPACE_POINTER_MODE
7175 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7176
7177 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7178 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7179
7180 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7181 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7182 spu_addr_space_legitimate_address_p
7183
7184 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7185 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7186
7187 #undef TARGET_ADDR_SPACE_SUBSET_P
7188 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7189
7190 #undef TARGET_ADDR_SPACE_CONVERT
7191 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7192
7193 #undef TARGET_INIT_BUILTINS
7194 #define TARGET_INIT_BUILTINS spu_init_builtins
7195 #undef TARGET_BUILTIN_DECL
7196 #define TARGET_BUILTIN_DECL spu_builtin_decl
7197
7198 #undef TARGET_EXPAND_BUILTIN
7199 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7200
7201 #undef TARGET_UNWIND_WORD_MODE
7202 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7203
7204 #undef TARGET_LEGITIMIZE_ADDRESS
7205 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7206
7207 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7208 and .quad for the debugger. When it is known that the assembler is fixed,
7209 these can be removed. */
7210 #undef TARGET_ASM_UNALIGNED_SI_OP
7211 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7212
7213 #undef TARGET_ASM_ALIGNED_DI_OP
7214 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7215
7216 /* The .8byte directive doesn't seem to work well for a 32 bit
7217 architecture. */
7218 #undef TARGET_ASM_UNALIGNED_DI_OP
7219 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7220
7221 #undef TARGET_RTX_COSTS
7222 #define TARGET_RTX_COSTS spu_rtx_costs
7223
7224 #undef TARGET_ADDRESS_COST
7225 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7226
7227 #undef TARGET_SCHED_ISSUE_RATE
7228 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7229
7230 #undef TARGET_SCHED_INIT_GLOBAL
7231 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7232
7233 #undef TARGET_SCHED_INIT
7234 #define TARGET_SCHED_INIT spu_sched_init
7235
7236 #undef TARGET_SCHED_VARIABLE_ISSUE
7237 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7238
7239 #undef TARGET_SCHED_REORDER
7240 #define TARGET_SCHED_REORDER spu_sched_reorder
7241
7242 #undef TARGET_SCHED_REORDER2
7243 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7244
7245 #undef TARGET_SCHED_ADJUST_COST
7246 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7247
7248 #undef TARGET_ATTRIBUTE_TABLE
7249 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7250
7251 #undef TARGET_ASM_INTEGER
7252 #define TARGET_ASM_INTEGER spu_assemble_integer
7253
7254 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7255 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7256
7257 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7258 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7259
7260 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7261 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7262
7263 #undef TARGET_ASM_GLOBALIZE_LABEL
7264 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7265
7266 #undef TARGET_PASS_BY_REFERENCE
7267 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7268
7269 #undef TARGET_FUNCTION_ARG
7270 #define TARGET_FUNCTION_ARG spu_function_arg
7271
7272 #undef TARGET_FUNCTION_ARG_ADVANCE
7273 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7274
7275 #undef TARGET_MUST_PASS_IN_STACK
7276 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7277
7278 #undef TARGET_BUILD_BUILTIN_VA_LIST
7279 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7280
7281 #undef TARGET_EXPAND_BUILTIN_VA_START
7282 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7283
7284 #undef TARGET_SETUP_INCOMING_VARARGS
7285 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7286
7287 #undef TARGET_MACHINE_DEPENDENT_REORG
7288 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7289
7290 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7291 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7292
7293 #undef TARGET_INIT_LIBFUNCS
7294 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7295
7296 #undef TARGET_RETURN_IN_MEMORY
7297 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7298
7299 #undef TARGET_ENCODE_SECTION_INFO
7300 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7301
7302 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7303 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7304
7305 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7306 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7307
7308 #undef TARGET_VECTORIZE_INIT_COST
7309 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7310
7311 #undef TARGET_VECTORIZE_ADD_STMT_COST
7312 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7313
7314 #undef TARGET_VECTORIZE_FINISH_COST
7315 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7316
7317 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7318 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7319
7320 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7321 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7322
7323 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7324 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7325
7326 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7327 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7328
7329 #undef TARGET_SCHED_SMS_RES_MII
7330 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7331
7332 #undef TARGET_SECTION_TYPE_FLAGS
7333 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7334
7335 #undef TARGET_ASM_SELECT_SECTION
7336 #define TARGET_ASM_SELECT_SECTION spu_select_section
7337
7338 #undef TARGET_ASM_UNIQUE_SECTION
7339 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7340
7341 #undef TARGET_LEGITIMATE_ADDRESS_P
7342 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7343
7344 #undef TARGET_LEGITIMATE_CONSTANT_P
7345 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7346
7347 #undef TARGET_TRAMPOLINE_INIT
7348 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7349
7350 #undef TARGET_WARN_FUNC_RETURN
7351 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7352
7353 #undef TARGET_OPTION_OVERRIDE
7354 #define TARGET_OPTION_OVERRIDE spu_option_override
7355
7356 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7357 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7358
7359 #undef TARGET_REF_MAY_ALIAS_ERRNO
7360 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7361
7362 #undef TARGET_ASM_OUTPUT_MI_THUNK
7363 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7364 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7365 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7366
7367 /* Variable tracking should be run after all optimizations which
7368 change order of insns. It also needs a valid CFG. */
7369 #undef TARGET_DELAY_VARTRACK
7370 #define TARGET_DELAY_VARTRACK true
7371
7372 #undef TARGET_CANONICALIZE_COMPARISON
7373 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7374
7375 #undef TARGET_CAN_USE_DOLOOP_P
7376 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7377
7378 struct gcc_target targetm = TARGET_INITIALIZER;
7379
7380 #include "gt-spu.h"