]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
split up variables to use rtx_insn * more
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2016 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "target.h"
22 #include "rtl.h"
23 #include "tree.h"
24 #include "gimple.h"
25 #include "cfghooks.h"
26 #include "cfgloop.h"
27 #include "df.h"
28 #include "memmodel.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "expmed.h"
32 #include "optabs.h"
33 #include "regs.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "diagnostic-core.h"
37 #include "insn-attr.h"
38 #include "alias.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "calls.h"
42 #include "varasm.h"
43 #include "explow.h"
44 #include "expr.h"
45 #include "output.h"
46 #include "cfgrtl.h"
47 #include "cfgbuild.h"
48 #include "langhooks.h"
49 #include "reload.h"
50 #include "sched-int.h"
51 #include "params.h"
52 #include "gimplify.h"
53 #include "tm-constrs.h"
54 #include "ddg.h"
55 #include "dumpfile.h"
56 #include "builtins.h"
57 #include "rtl-iter.h"
58
59 /* This file should be included last. */
60 #include "target-def.h"
61
62 /* Builtin types, data and prototypes. */
63
64 enum spu_builtin_type_index
65 {
66 SPU_BTI_END_OF_PARAMS,
67
68 /* We create new type nodes for these. */
69 SPU_BTI_V16QI,
70 SPU_BTI_V8HI,
71 SPU_BTI_V4SI,
72 SPU_BTI_V2DI,
73 SPU_BTI_V4SF,
74 SPU_BTI_V2DF,
75 SPU_BTI_UV16QI,
76 SPU_BTI_UV8HI,
77 SPU_BTI_UV4SI,
78 SPU_BTI_UV2DI,
79
80 /* A 16-byte type. (Implemented with V16QI_type_node) */
81 SPU_BTI_QUADWORD,
82
83 /* These all correspond to intSI_type_node */
84 SPU_BTI_7,
85 SPU_BTI_S7,
86 SPU_BTI_U7,
87 SPU_BTI_S10,
88 SPU_BTI_S10_4,
89 SPU_BTI_U14,
90 SPU_BTI_16,
91 SPU_BTI_S16,
92 SPU_BTI_S16_2,
93 SPU_BTI_U16,
94 SPU_BTI_U16_2,
95 SPU_BTI_U18,
96
97 /* These correspond to the standard types */
98 SPU_BTI_INTQI,
99 SPU_BTI_INTHI,
100 SPU_BTI_INTSI,
101 SPU_BTI_INTDI,
102
103 SPU_BTI_UINTQI,
104 SPU_BTI_UINTHI,
105 SPU_BTI_UINTSI,
106 SPU_BTI_UINTDI,
107
108 SPU_BTI_FLOAT,
109 SPU_BTI_DOUBLE,
110
111 SPU_BTI_VOID,
112 SPU_BTI_PTR,
113
114 SPU_BTI_MAX
115 };
116
117 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
118 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
119 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
120 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
121 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
122 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
123 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
124 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
125 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
126 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
127
128 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
129
130 struct spu_builtin_range
131 {
132 int low, high;
133 };
134
135 static struct spu_builtin_range spu_builtin_range[] = {
136 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
137 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
138 {0ll, 0x7fll}, /* SPU_BTI_U7 */
139 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
140 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
141 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
142 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
143 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
144 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
145 {0ll, 0xffffll}, /* SPU_BTI_U16 */
146 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
147 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
148 };
149
150 \f
151 /* Target specific attribute specifications. */
152 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
153
154 /* Prototypes and external defs. */
155 static int get_pipe (rtx_insn *insn);
156 static int spu_naked_function_p (tree func);
157 static int mem_is_padded_component_ref (rtx x);
158 static void fix_range (const char *);
159 static rtx spu_expand_load (rtx, rtx, rtx, int);
160
161 /* Which instruction set architecture to use. */
162 int spu_arch;
163 /* Which cpu are we tuning for. */
164 int spu_tune;
165
166 /* The hardware requires 8 insns between a hint and the branch it
167 effects. This variable describes how many rtl instructions the
168 compiler needs to see before inserting a hint, and then the compiler
169 will insert enough nops to make it at least 8 insns. The default is
170 for the compiler to allow up to 2 nops be emitted. The nops are
171 inserted in pairs, so we round down. */
172 int spu_hint_dist = (8*4) - (2*4);
173
174 enum spu_immediate {
175 SPU_NONE,
176 SPU_IL,
177 SPU_ILA,
178 SPU_ILH,
179 SPU_ILHU,
180 SPU_ORI,
181 SPU_ORHI,
182 SPU_ORBI,
183 SPU_IOHL
184 };
185 enum immediate_class
186 {
187 IC_POOL, /* constant pool */
188 IC_IL1, /* one il* instruction */
189 IC_IL2, /* both ilhu and iohl instructions */
190 IC_IL1s, /* one il* instruction */
191 IC_IL2s, /* both ilhu and iohl instructions */
192 IC_FSMBI, /* the fsmbi instruction */
193 IC_CPAT, /* one of the c*d instructions */
194 IC_FSMBI2 /* fsmbi plus 1 other instruction */
195 };
196
197 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
198 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
199 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
200 static enum immediate_class classify_immediate (rtx op,
201 machine_mode mode);
202
203 /* Pointer mode for __ea references. */
204 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
205
206 \f
207 /* Define the structure for the machine field in struct function. */
208 struct GTY(()) machine_function
209 {
210 /* Register to use for PIC accesses. */
211 rtx pic_reg;
212 };
213
214 /* How to allocate a 'struct machine_function'. */
215 static struct machine_function *
216 spu_init_machine_status (void)
217 {
218 return ggc_cleared_alloc<machine_function> ();
219 }
220
221 /* Implement TARGET_OPTION_OVERRIDE. */
222 static void
223 spu_option_override (void)
224 {
225 /* Set up function hooks. */
226 init_machine_status = spu_init_machine_status;
227
228 /* Small loops will be unpeeled at -O3. For SPU it is more important
229 to keep code small by default. */
230 if (!flag_unroll_loops && !flag_peel_loops)
231 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
232 global_options.x_param_values,
233 global_options_set.x_param_values);
234
235 flag_omit_frame_pointer = 1;
236
237 /* Functions must be 8 byte aligned so we correctly handle dual issue */
238 if (align_functions < 8)
239 align_functions = 8;
240
241 spu_hint_dist = 8*4 - spu_max_nops*4;
242 if (spu_hint_dist < 0)
243 spu_hint_dist = 0;
244
245 if (spu_fixed_range_string)
246 fix_range (spu_fixed_range_string);
247
248 /* Determine processor architectural level. */
249 if (spu_arch_string)
250 {
251 if (strcmp (&spu_arch_string[0], "cell") == 0)
252 spu_arch = PROCESSOR_CELL;
253 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
254 spu_arch = PROCESSOR_CELLEDP;
255 else
256 error ("bad value (%s) for -march= switch", spu_arch_string);
257 }
258
259 /* Determine processor to tune for. */
260 if (spu_tune_string)
261 {
262 if (strcmp (&spu_tune_string[0], "cell") == 0)
263 spu_tune = PROCESSOR_CELL;
264 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
265 spu_tune = PROCESSOR_CELLEDP;
266 else
267 error ("bad value (%s) for -mtune= switch", spu_tune_string);
268 }
269
270 /* Change defaults according to the processor architecture. */
271 if (spu_arch == PROCESSOR_CELLEDP)
272 {
273 /* If no command line option has been otherwise specified, change
274 the default to -mno-safe-hints on celledp -- only the original
275 Cell/B.E. processors require this workaround. */
276 if (!(target_flags_explicit & MASK_SAFE_HINTS))
277 target_flags &= ~MASK_SAFE_HINTS;
278 }
279
280 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
281 }
282 \f
283 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
284 struct attribute_spec.handler. */
285
286 /* True if MODE is valid for the target. By "valid", we mean able to
287 be manipulated in non-trivial ways. In particular, this means all
288 the arithmetic is supported. */
289 static bool
290 spu_scalar_mode_supported_p (machine_mode mode)
291 {
292 switch (mode)
293 {
294 case QImode:
295 case HImode:
296 case SImode:
297 case SFmode:
298 case DImode:
299 case TImode:
300 case DFmode:
301 return true;
302
303 default:
304 return false;
305 }
306 }
307
308 /* Similarly for vector modes. "Supported" here is less strict. At
309 least some operations are supported; need to check optabs or builtins
310 for further details. */
311 static bool
312 spu_vector_mode_supported_p (machine_mode mode)
313 {
314 switch (mode)
315 {
316 case V16QImode:
317 case V8HImode:
318 case V4SImode:
319 case V2DImode:
320 case V4SFmode:
321 case V2DFmode:
322 return true;
323
324 default:
325 return false;
326 }
327 }
328
329 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
330 least significant bytes of the outer mode. This function returns
331 TRUE for the SUBREG's where this is correct. */
332 int
333 valid_subreg (rtx op)
334 {
335 machine_mode om = GET_MODE (op);
336 machine_mode im = GET_MODE (SUBREG_REG (op));
337 return om != VOIDmode && im != VOIDmode
338 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
339 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
340 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
341 }
342
343 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
344 and adjust the start offset. */
345 static rtx
346 adjust_operand (rtx op, HOST_WIDE_INT * start)
347 {
348 machine_mode mode;
349 int op_size;
350 /* Strip any paradoxical SUBREG. */
351 if (GET_CODE (op) == SUBREG
352 && (GET_MODE_BITSIZE (GET_MODE (op))
353 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
354 {
355 if (start)
356 *start -=
357 GET_MODE_BITSIZE (GET_MODE (op)) -
358 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
359 op = SUBREG_REG (op);
360 }
361 /* If it is smaller than SI, assure a SUBREG */
362 op_size = GET_MODE_BITSIZE (GET_MODE (op));
363 if (op_size < 32)
364 {
365 if (start)
366 *start += 32 - op_size;
367 op_size = 32;
368 }
369 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
370 mode = mode_for_size (op_size, MODE_INT, 0);
371 if (mode != GET_MODE (op))
372 op = gen_rtx_SUBREG (mode, op, 0);
373 return op;
374 }
375
376 void
377 spu_expand_extv (rtx ops[], int unsignedp)
378 {
379 rtx dst = ops[0], src = ops[1];
380 HOST_WIDE_INT width = INTVAL (ops[2]);
381 HOST_WIDE_INT start = INTVAL (ops[3]);
382 HOST_WIDE_INT align_mask;
383 rtx s0, s1, mask, r0;
384
385 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
386
387 if (MEM_P (src))
388 {
389 /* First, determine if we need 1 TImode load or 2. We need only 1
390 if the bits being extracted do not cross the alignment boundary
391 as determined by the MEM and its address. */
392
393 align_mask = -MEM_ALIGN (src);
394 if ((start & align_mask) == ((start + width - 1) & align_mask))
395 {
396 /* Alignment is sufficient for 1 load. */
397 s0 = gen_reg_rtx (TImode);
398 r0 = spu_expand_load (s0, 0, src, start / 8);
399 start &= 7;
400 if (r0)
401 emit_insn (gen_rotqby_ti (s0, s0, r0));
402 }
403 else
404 {
405 /* Need 2 loads. */
406 s0 = gen_reg_rtx (TImode);
407 s1 = gen_reg_rtx (TImode);
408 r0 = spu_expand_load (s0, s1, src, start / 8);
409 start &= 7;
410
411 gcc_assert (start + width <= 128);
412 if (r0)
413 {
414 rtx r1 = gen_reg_rtx (SImode);
415 mask = gen_reg_rtx (TImode);
416 emit_move_insn (mask, GEN_INT (-1));
417 emit_insn (gen_rotqby_ti (s0, s0, r0));
418 emit_insn (gen_rotqby_ti (s1, s1, r0));
419 if (GET_CODE (r0) == CONST_INT)
420 r1 = GEN_INT (INTVAL (r0) & 15);
421 else
422 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
423 emit_insn (gen_shlqby_ti (mask, mask, r1));
424 emit_insn (gen_selb (s0, s1, s0, mask));
425 }
426 }
427
428 }
429 else if (GET_CODE (src) == SUBREG)
430 {
431 rtx r = SUBREG_REG (src);
432 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
433 s0 = gen_reg_rtx (TImode);
434 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
435 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
436 else
437 emit_move_insn (s0, src);
438 }
439 else
440 {
441 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
442 s0 = gen_reg_rtx (TImode);
443 emit_move_insn (s0, src);
444 }
445
446 /* Now s0 is TImode and contains the bits to extract at start. */
447
448 if (start)
449 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
450
451 if (128 - width)
452 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
453
454 emit_move_insn (dst, s0);
455 }
456
457 void
458 spu_expand_insv (rtx ops[])
459 {
460 HOST_WIDE_INT width = INTVAL (ops[1]);
461 HOST_WIDE_INT start = INTVAL (ops[2]);
462 unsigned HOST_WIDE_INT maskbits;
463 machine_mode dst_mode;
464 rtx dst = ops[0], src = ops[3];
465 int dst_size;
466 rtx mask;
467 rtx shift_reg;
468 int shift;
469
470
471 if (GET_CODE (ops[0]) == MEM)
472 dst = gen_reg_rtx (TImode);
473 else
474 dst = adjust_operand (dst, &start);
475 dst_mode = GET_MODE (dst);
476 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
477
478 if (CONSTANT_P (src))
479 {
480 machine_mode m =
481 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
482 src = force_reg (m, convert_to_mode (m, src, 0));
483 }
484 src = adjust_operand (src, 0);
485
486 mask = gen_reg_rtx (dst_mode);
487 shift_reg = gen_reg_rtx (dst_mode);
488 shift = dst_size - start - width;
489
490 /* It's not safe to use subreg here because the compiler assumes
491 that the SUBREG_REG is right justified in the SUBREG. */
492 convert_move (shift_reg, src, 1);
493
494 if (shift > 0)
495 {
496 switch (dst_mode)
497 {
498 case SImode:
499 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
500 break;
501 case DImode:
502 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
503 break;
504 case TImode:
505 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
506 break;
507 default:
508 abort ();
509 }
510 }
511 else if (shift < 0)
512 abort ();
513
514 switch (dst_size)
515 {
516 case 32:
517 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
518 if (start)
519 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
520 emit_move_insn (mask, GEN_INT (maskbits));
521 break;
522 case 64:
523 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
524 if (start)
525 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
526 emit_move_insn (mask, GEN_INT (maskbits));
527 break;
528 case 128:
529 {
530 unsigned char arr[16];
531 int i = start / 8;
532 memset (arr, 0, sizeof (arr));
533 arr[i] = 0xff >> (start & 7);
534 for (i++; i <= (start + width - 1) / 8; i++)
535 arr[i] = 0xff;
536 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
537 emit_move_insn (mask, array_to_constant (TImode, arr));
538 }
539 break;
540 default:
541 abort ();
542 }
543 if (GET_CODE (ops[0]) == MEM)
544 {
545 rtx low = gen_reg_rtx (SImode);
546 rtx rotl = gen_reg_rtx (SImode);
547 rtx mask0 = gen_reg_rtx (TImode);
548 rtx addr;
549 rtx addr0;
550 rtx addr1;
551 rtx mem;
552
553 addr = force_reg (Pmode, XEXP (ops[0], 0));
554 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
555 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
556 emit_insn (gen_negsi2 (rotl, low));
557 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
558 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
559 mem = change_address (ops[0], TImode, addr0);
560 set_mem_alias_set (mem, 0);
561 emit_move_insn (dst, mem);
562 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
563 if (start + width > MEM_ALIGN (ops[0]))
564 {
565 rtx shl = gen_reg_rtx (SImode);
566 rtx mask1 = gen_reg_rtx (TImode);
567 rtx dst1 = gen_reg_rtx (TImode);
568 rtx mem1;
569 addr1 = plus_constant (Pmode, addr, 16);
570 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
571 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
572 emit_insn (gen_shlqby_ti (mask1, mask, shl));
573 mem1 = change_address (ops[0], TImode, addr1);
574 set_mem_alias_set (mem1, 0);
575 emit_move_insn (dst1, mem1);
576 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
577 emit_move_insn (mem1, dst1);
578 }
579 emit_move_insn (mem, dst);
580 }
581 else
582 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
583 }
584
585
586 int
587 spu_expand_block_move (rtx ops[])
588 {
589 HOST_WIDE_INT bytes, align, offset;
590 rtx src, dst, sreg, dreg, target;
591 int i;
592 if (GET_CODE (ops[2]) != CONST_INT
593 || GET_CODE (ops[3]) != CONST_INT
594 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
595 return 0;
596
597 bytes = INTVAL (ops[2]);
598 align = INTVAL (ops[3]);
599
600 if (bytes <= 0)
601 return 1;
602
603 dst = ops[0];
604 src = ops[1];
605
606 if (align == 16)
607 {
608 for (offset = 0; offset + 16 <= bytes; offset += 16)
609 {
610 dst = adjust_address (ops[0], V16QImode, offset);
611 src = adjust_address (ops[1], V16QImode, offset);
612 emit_move_insn (dst, src);
613 }
614 if (offset < bytes)
615 {
616 rtx mask;
617 unsigned char arr[16] = { 0 };
618 for (i = 0; i < bytes - offset; i++)
619 arr[i] = 0xff;
620 dst = adjust_address (ops[0], V16QImode, offset);
621 src = adjust_address (ops[1], V16QImode, offset);
622 mask = gen_reg_rtx (V16QImode);
623 sreg = gen_reg_rtx (V16QImode);
624 dreg = gen_reg_rtx (V16QImode);
625 target = gen_reg_rtx (V16QImode);
626 emit_move_insn (mask, array_to_constant (V16QImode, arr));
627 emit_move_insn (dreg, dst);
628 emit_move_insn (sreg, src);
629 emit_insn (gen_selb (target, dreg, sreg, mask));
630 emit_move_insn (dst, target);
631 }
632 return 1;
633 }
634 return 0;
635 }
636
637 enum spu_comp_code
638 { SPU_EQ, SPU_GT, SPU_GTU };
639
640 int spu_comp_icode[12][3] = {
641 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
642 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
643 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
644 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
645 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
646 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
647 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
648 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
649 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
650 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
651 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
652 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
653 };
654
655 /* Generate a compare for CODE. Return a brand-new rtx that represents
656 the result of the compare. GCC can figure this out too if we don't
657 provide all variations of compares, but GCC always wants to use
658 WORD_MODE, we can generate better code in most cases if we do it
659 ourselves. */
660 void
661 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
662 {
663 int reverse_compare = 0;
664 int reverse_test = 0;
665 rtx compare_result, eq_result;
666 rtx comp_rtx, eq_rtx;
667 machine_mode comp_mode;
668 machine_mode op_mode;
669 enum spu_comp_code scode, eq_code;
670 enum insn_code ior_code;
671 enum rtx_code code = GET_CODE (cmp);
672 rtx op0 = XEXP (cmp, 0);
673 rtx op1 = XEXP (cmp, 1);
674 int index;
675 int eq_test = 0;
676
677 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
678 and so on, to keep the constant in operand 1. */
679 if (GET_CODE (op1) == CONST_INT)
680 {
681 HOST_WIDE_INT val = INTVAL (op1) - 1;
682 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
683 switch (code)
684 {
685 case GE:
686 op1 = GEN_INT (val);
687 code = GT;
688 break;
689 case LT:
690 op1 = GEN_INT (val);
691 code = LE;
692 break;
693 case GEU:
694 op1 = GEN_INT (val);
695 code = GTU;
696 break;
697 case LTU:
698 op1 = GEN_INT (val);
699 code = LEU;
700 break;
701 default:
702 break;
703 }
704 }
705
706 /* However, if we generate an integer result, performing a reverse test
707 would require an extra negation, so avoid that where possible. */
708 if (GET_CODE (op1) == CONST_INT && is_set == 1)
709 {
710 HOST_WIDE_INT val = INTVAL (op1) + 1;
711 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
712 switch (code)
713 {
714 case LE:
715 op1 = GEN_INT (val);
716 code = LT;
717 break;
718 case LEU:
719 op1 = GEN_INT (val);
720 code = LTU;
721 break;
722 default:
723 break;
724 }
725 }
726
727 comp_mode = SImode;
728 op_mode = GET_MODE (op0);
729
730 switch (code)
731 {
732 case GE:
733 scode = SPU_GT;
734 if (HONOR_NANS (op_mode))
735 {
736 reverse_compare = 0;
737 reverse_test = 0;
738 eq_test = 1;
739 eq_code = SPU_EQ;
740 }
741 else
742 {
743 reverse_compare = 1;
744 reverse_test = 1;
745 }
746 break;
747 case LE:
748 scode = SPU_GT;
749 if (HONOR_NANS (op_mode))
750 {
751 reverse_compare = 1;
752 reverse_test = 0;
753 eq_test = 1;
754 eq_code = SPU_EQ;
755 }
756 else
757 {
758 reverse_compare = 0;
759 reverse_test = 1;
760 }
761 break;
762 case LT:
763 reverse_compare = 1;
764 reverse_test = 0;
765 scode = SPU_GT;
766 break;
767 case GEU:
768 reverse_compare = 1;
769 reverse_test = 1;
770 scode = SPU_GTU;
771 break;
772 case LEU:
773 reverse_compare = 0;
774 reverse_test = 1;
775 scode = SPU_GTU;
776 break;
777 case LTU:
778 reverse_compare = 1;
779 reverse_test = 0;
780 scode = SPU_GTU;
781 break;
782 case NE:
783 reverse_compare = 0;
784 reverse_test = 1;
785 scode = SPU_EQ;
786 break;
787
788 case EQ:
789 scode = SPU_EQ;
790 break;
791 case GT:
792 scode = SPU_GT;
793 break;
794 case GTU:
795 scode = SPU_GTU;
796 break;
797 default:
798 scode = SPU_EQ;
799 break;
800 }
801
802 switch (op_mode)
803 {
804 case QImode:
805 index = 0;
806 comp_mode = QImode;
807 break;
808 case HImode:
809 index = 1;
810 comp_mode = HImode;
811 break;
812 case SImode:
813 index = 2;
814 break;
815 case DImode:
816 index = 3;
817 break;
818 case TImode:
819 index = 4;
820 break;
821 case SFmode:
822 index = 5;
823 break;
824 case DFmode:
825 index = 6;
826 break;
827 case V16QImode:
828 index = 7;
829 comp_mode = op_mode;
830 break;
831 case V8HImode:
832 index = 8;
833 comp_mode = op_mode;
834 break;
835 case V4SImode:
836 index = 9;
837 comp_mode = op_mode;
838 break;
839 case V4SFmode:
840 index = 10;
841 comp_mode = V4SImode;
842 break;
843 case V2DFmode:
844 index = 11;
845 comp_mode = V2DImode;
846 break;
847 case V2DImode:
848 default:
849 abort ();
850 }
851
852 if (GET_MODE (op1) == DFmode
853 && (scode != SPU_GT && scode != SPU_EQ))
854 abort ();
855
856 if (is_set == 0 && op1 == const0_rtx
857 && (GET_MODE (op0) == SImode
858 || GET_MODE (op0) == HImode
859 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
860 {
861 /* Don't need to set a register with the result when we are
862 comparing against zero and branching. */
863 reverse_test = !reverse_test;
864 compare_result = op0;
865 }
866 else
867 {
868 compare_result = gen_reg_rtx (comp_mode);
869
870 if (reverse_compare)
871 {
872 rtx t = op1;
873 op1 = op0;
874 op0 = t;
875 }
876
877 if (spu_comp_icode[index][scode] == 0)
878 abort ();
879
880 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
881 (op0, op_mode))
882 op0 = force_reg (op_mode, op0);
883 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
884 (op1, op_mode))
885 op1 = force_reg (op_mode, op1);
886 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
887 op0, op1);
888 if (comp_rtx == 0)
889 abort ();
890 emit_insn (comp_rtx);
891
892 if (eq_test)
893 {
894 eq_result = gen_reg_rtx (comp_mode);
895 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
896 op0, op1);
897 if (eq_rtx == 0)
898 abort ();
899 emit_insn (eq_rtx);
900 ior_code = optab_handler (ior_optab, comp_mode);
901 gcc_assert (ior_code != CODE_FOR_nothing);
902 emit_insn (GEN_FCN (ior_code)
903 (compare_result, compare_result, eq_result));
904 }
905 }
906
907 if (is_set == 0)
908 {
909 rtx bcomp;
910 rtx loc_ref;
911
912 /* We don't have branch on QI compare insns, so we convert the
913 QI compare result to a HI result. */
914 if (comp_mode == QImode)
915 {
916 rtx old_res = compare_result;
917 compare_result = gen_reg_rtx (HImode);
918 comp_mode = HImode;
919 emit_insn (gen_extendqihi2 (compare_result, old_res));
920 }
921
922 if (reverse_test)
923 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
924 else
925 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
926
927 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
928 emit_jump_insn (gen_rtx_SET (pc_rtx,
929 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
930 loc_ref, pc_rtx)));
931 }
932 else if (is_set == 2)
933 {
934 rtx target = operands[0];
935 int compare_size = GET_MODE_BITSIZE (comp_mode);
936 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
937 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
938 rtx select_mask;
939 rtx op_t = operands[2];
940 rtx op_f = operands[3];
941
942 /* The result of the comparison can be SI, HI or QI mode. Create a
943 mask based on that result. */
944 if (target_size > compare_size)
945 {
946 select_mask = gen_reg_rtx (mode);
947 emit_insn (gen_extend_compare (select_mask, compare_result));
948 }
949 else if (target_size < compare_size)
950 select_mask =
951 gen_rtx_SUBREG (mode, compare_result,
952 (compare_size - target_size) / BITS_PER_UNIT);
953 else if (comp_mode != mode)
954 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
955 else
956 select_mask = compare_result;
957
958 if (GET_MODE (target) != GET_MODE (op_t)
959 || GET_MODE (target) != GET_MODE (op_f))
960 abort ();
961
962 if (reverse_test)
963 emit_insn (gen_selb (target, op_t, op_f, select_mask));
964 else
965 emit_insn (gen_selb (target, op_f, op_t, select_mask));
966 }
967 else
968 {
969 rtx target = operands[0];
970 if (reverse_test)
971 emit_insn (gen_rtx_SET (compare_result,
972 gen_rtx_NOT (comp_mode, compare_result)));
973 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
974 emit_insn (gen_extendhisi2 (target, compare_result));
975 else if (GET_MODE (target) == SImode
976 && GET_MODE (compare_result) == QImode)
977 emit_insn (gen_extend_compare (target, compare_result));
978 else
979 emit_move_insn (target, compare_result);
980 }
981 }
982
983 HOST_WIDE_INT
984 const_double_to_hwint (rtx x)
985 {
986 HOST_WIDE_INT val;
987 if (GET_MODE (x) == SFmode)
988 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
989 else if (GET_MODE (x) == DFmode)
990 {
991 long l[2];
992 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
993 val = l[0];
994 val = (val << 32) | (l[1] & 0xffffffff);
995 }
996 else
997 abort ();
998 return val;
999 }
1000
1001 rtx
1002 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1003 {
1004 long tv[2];
1005 REAL_VALUE_TYPE rv;
1006 gcc_assert (mode == SFmode || mode == DFmode);
1007
1008 if (mode == SFmode)
1009 tv[0] = (v << 32) >> 32;
1010 else if (mode == DFmode)
1011 {
1012 tv[1] = (v << 32) >> 32;
1013 tv[0] = v >> 32;
1014 }
1015 real_from_target (&rv, tv, mode);
1016 return const_double_from_real_value (rv, mode);
1017 }
1018
1019 void
1020 print_operand_address (FILE * file, register rtx addr)
1021 {
1022 rtx reg;
1023 rtx offset;
1024
1025 if (GET_CODE (addr) == AND
1026 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1027 && INTVAL (XEXP (addr, 1)) == -16)
1028 addr = XEXP (addr, 0);
1029
1030 switch (GET_CODE (addr))
1031 {
1032 case REG:
1033 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1034 break;
1035
1036 case PLUS:
1037 reg = XEXP (addr, 0);
1038 offset = XEXP (addr, 1);
1039 if (GET_CODE (offset) == REG)
1040 {
1041 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1042 reg_names[REGNO (offset)]);
1043 }
1044 else if (GET_CODE (offset) == CONST_INT)
1045 {
1046 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1047 INTVAL (offset), reg_names[REGNO (reg)]);
1048 }
1049 else
1050 abort ();
1051 break;
1052
1053 case CONST:
1054 case LABEL_REF:
1055 case SYMBOL_REF:
1056 case CONST_INT:
1057 output_addr_const (file, addr);
1058 break;
1059
1060 default:
1061 debug_rtx (addr);
1062 abort ();
1063 }
1064 }
1065
1066 void
1067 print_operand (FILE * file, rtx x, int code)
1068 {
1069 machine_mode mode = GET_MODE (x);
1070 HOST_WIDE_INT val;
1071 unsigned char arr[16];
1072 int xcode = GET_CODE (x);
1073 int i, info;
1074 if (GET_MODE (x) == VOIDmode)
1075 switch (code)
1076 {
1077 case 'L': /* 128 bits, signed */
1078 case 'm': /* 128 bits, signed */
1079 case 'T': /* 128 bits, signed */
1080 case 't': /* 128 bits, signed */
1081 mode = TImode;
1082 break;
1083 case 'K': /* 64 bits, signed */
1084 case 'k': /* 64 bits, signed */
1085 case 'D': /* 64 bits, signed */
1086 case 'd': /* 64 bits, signed */
1087 mode = DImode;
1088 break;
1089 case 'J': /* 32 bits, signed */
1090 case 'j': /* 32 bits, signed */
1091 case 's': /* 32 bits, signed */
1092 case 'S': /* 32 bits, signed */
1093 mode = SImode;
1094 break;
1095 }
1096 switch (code)
1097 {
1098
1099 case 'j': /* 32 bits, signed */
1100 case 'k': /* 64 bits, signed */
1101 case 'm': /* 128 bits, signed */
1102 if (xcode == CONST_INT
1103 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1104 {
1105 gcc_assert (logical_immediate_p (x, mode));
1106 constant_to_array (mode, x, arr);
1107 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1108 val = trunc_int_for_mode (val, SImode);
1109 switch (which_logical_immediate (val))
1110 {
1111 case SPU_ORI:
1112 break;
1113 case SPU_ORHI:
1114 fprintf (file, "h");
1115 break;
1116 case SPU_ORBI:
1117 fprintf (file, "b");
1118 break;
1119 default:
1120 gcc_unreachable();
1121 }
1122 }
1123 else
1124 gcc_unreachable();
1125 return;
1126
1127 case 'J': /* 32 bits, signed */
1128 case 'K': /* 64 bits, signed */
1129 case 'L': /* 128 bits, signed */
1130 if (xcode == CONST_INT
1131 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1132 {
1133 gcc_assert (logical_immediate_p (x, mode)
1134 || iohl_immediate_p (x, mode));
1135 constant_to_array (mode, x, arr);
1136 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1137 val = trunc_int_for_mode (val, SImode);
1138 switch (which_logical_immediate (val))
1139 {
1140 case SPU_ORI:
1141 case SPU_IOHL:
1142 break;
1143 case SPU_ORHI:
1144 val = trunc_int_for_mode (val, HImode);
1145 break;
1146 case SPU_ORBI:
1147 val = trunc_int_for_mode (val, QImode);
1148 break;
1149 default:
1150 gcc_unreachable();
1151 }
1152 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1153 }
1154 else
1155 gcc_unreachable();
1156 return;
1157
1158 case 't': /* 128 bits, signed */
1159 case 'd': /* 64 bits, signed */
1160 case 's': /* 32 bits, signed */
1161 if (CONSTANT_P (x))
1162 {
1163 enum immediate_class c = classify_immediate (x, mode);
1164 switch (c)
1165 {
1166 case IC_IL1:
1167 constant_to_array (mode, x, arr);
1168 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1169 val = trunc_int_for_mode (val, SImode);
1170 switch (which_immediate_load (val))
1171 {
1172 case SPU_IL:
1173 break;
1174 case SPU_ILA:
1175 fprintf (file, "a");
1176 break;
1177 case SPU_ILH:
1178 fprintf (file, "h");
1179 break;
1180 case SPU_ILHU:
1181 fprintf (file, "hu");
1182 break;
1183 default:
1184 gcc_unreachable ();
1185 }
1186 break;
1187 case IC_CPAT:
1188 constant_to_array (mode, x, arr);
1189 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1190 if (info == 1)
1191 fprintf (file, "b");
1192 else if (info == 2)
1193 fprintf (file, "h");
1194 else if (info == 4)
1195 fprintf (file, "w");
1196 else if (info == 8)
1197 fprintf (file, "d");
1198 break;
1199 case IC_IL1s:
1200 if (xcode == CONST_VECTOR)
1201 {
1202 x = CONST_VECTOR_ELT (x, 0);
1203 xcode = GET_CODE (x);
1204 }
1205 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1206 fprintf (file, "a");
1207 else if (xcode == HIGH)
1208 fprintf (file, "hu");
1209 break;
1210 case IC_FSMBI:
1211 case IC_FSMBI2:
1212 case IC_IL2:
1213 case IC_IL2s:
1214 case IC_POOL:
1215 abort ();
1216 }
1217 }
1218 else
1219 gcc_unreachable ();
1220 return;
1221
1222 case 'T': /* 128 bits, signed */
1223 case 'D': /* 64 bits, signed */
1224 case 'S': /* 32 bits, signed */
1225 if (CONSTANT_P (x))
1226 {
1227 enum immediate_class c = classify_immediate (x, mode);
1228 switch (c)
1229 {
1230 case IC_IL1:
1231 constant_to_array (mode, x, arr);
1232 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1233 val = trunc_int_for_mode (val, SImode);
1234 switch (which_immediate_load (val))
1235 {
1236 case SPU_IL:
1237 case SPU_ILA:
1238 break;
1239 case SPU_ILH:
1240 case SPU_ILHU:
1241 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1242 break;
1243 default:
1244 gcc_unreachable ();
1245 }
1246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1247 break;
1248 case IC_FSMBI:
1249 constant_to_array (mode, x, arr);
1250 val = 0;
1251 for (i = 0; i < 16; i++)
1252 {
1253 val <<= 1;
1254 val |= arr[i] & 1;
1255 }
1256 print_operand (file, GEN_INT (val), 0);
1257 break;
1258 case IC_CPAT:
1259 constant_to_array (mode, x, arr);
1260 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1262 break;
1263 case IC_IL1s:
1264 if (xcode == HIGH)
1265 x = XEXP (x, 0);
1266 if (GET_CODE (x) == CONST_VECTOR)
1267 x = CONST_VECTOR_ELT (x, 0);
1268 output_addr_const (file, x);
1269 if (xcode == HIGH)
1270 fprintf (file, "@h");
1271 break;
1272 case IC_IL2:
1273 case IC_IL2s:
1274 case IC_FSMBI2:
1275 case IC_POOL:
1276 abort ();
1277 }
1278 }
1279 else
1280 gcc_unreachable ();
1281 return;
1282
1283 case 'C':
1284 if (xcode == CONST_INT)
1285 {
1286 /* Only 4 least significant bits are relevant for generate
1287 control word instructions. */
1288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1289 return;
1290 }
1291 break;
1292
1293 case 'M': /* print code for c*d */
1294 if (GET_CODE (x) == CONST_INT)
1295 switch (INTVAL (x))
1296 {
1297 case 1:
1298 fprintf (file, "b");
1299 break;
1300 case 2:
1301 fprintf (file, "h");
1302 break;
1303 case 4:
1304 fprintf (file, "w");
1305 break;
1306 case 8:
1307 fprintf (file, "d");
1308 break;
1309 default:
1310 gcc_unreachable();
1311 }
1312 else
1313 gcc_unreachable();
1314 return;
1315
1316 case 'N': /* Negate the operand */
1317 if (xcode == CONST_INT)
1318 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1319 else if (xcode == CONST_VECTOR)
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1321 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1322 return;
1323
1324 case 'I': /* enable/disable interrupts */
1325 if (xcode == CONST_INT)
1326 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1327 return;
1328
1329 case 'b': /* branch modifiers */
1330 if (xcode == REG)
1331 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1332 else if (COMPARISON_P (x))
1333 fprintf (file, "%s", xcode == NE ? "n" : "");
1334 return;
1335
1336 case 'i': /* indirect call */
1337 if (xcode == MEM)
1338 {
1339 if (GET_CODE (XEXP (x, 0)) == REG)
1340 /* Used in indirect function calls. */
1341 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1342 else
1343 output_address (GET_MODE (x), XEXP (x, 0));
1344 }
1345 return;
1346
1347 case 'p': /* load/store */
1348 if (xcode == MEM)
1349 {
1350 x = XEXP (x, 0);
1351 xcode = GET_CODE (x);
1352 }
1353 if (xcode == AND)
1354 {
1355 x = XEXP (x, 0);
1356 xcode = GET_CODE (x);
1357 }
1358 if (xcode == REG)
1359 fprintf (file, "d");
1360 else if (xcode == CONST_INT)
1361 fprintf (file, "a");
1362 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1363 fprintf (file, "r");
1364 else if (xcode == PLUS || xcode == LO_SUM)
1365 {
1366 if (GET_CODE (XEXP (x, 1)) == REG)
1367 fprintf (file, "x");
1368 else
1369 fprintf (file, "d");
1370 }
1371 return;
1372
1373 case 'e':
1374 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1375 val &= 0x7;
1376 output_addr_const (file, GEN_INT (val));
1377 return;
1378
1379 case 'f':
1380 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1381 val &= 0x1f;
1382 output_addr_const (file, GEN_INT (val));
1383 return;
1384
1385 case 'g':
1386 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1387 val &= 0x3f;
1388 output_addr_const (file, GEN_INT (val));
1389 return;
1390
1391 case 'h':
1392 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1393 val = (val >> 3) & 0x1f;
1394 output_addr_const (file, GEN_INT (val));
1395 return;
1396
1397 case 'E':
1398 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1399 val = -val;
1400 val &= 0x7;
1401 output_addr_const (file, GEN_INT (val));
1402 return;
1403
1404 case 'F':
1405 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1406 val = -val;
1407 val &= 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'G':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val = -val;
1414 val &= 0x3f;
1415 output_addr_const (file, GEN_INT (val));
1416 return;
1417
1418 case 'H':
1419 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420 val = -(val & -8ll);
1421 val = (val >> 3) & 0x1f;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1424
1425 case 'v':
1426 case 'w':
1427 constant_to_array (mode, x, arr);
1428 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1429 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1430 return;
1431
1432 case 0:
1433 if (xcode == REG)
1434 fprintf (file, "%s", reg_names[REGNO (x)]);
1435 else if (xcode == MEM)
1436 output_address (GET_MODE (x), XEXP (x, 0));
1437 else if (xcode == CONST_VECTOR)
1438 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1439 else
1440 output_addr_const (file, x);
1441 return;
1442
1443 /* unused letters
1444 o qr u yz
1445 AB OPQR UVWXYZ */
1446 default:
1447 output_operand_lossage ("invalid %%xn code");
1448 }
1449 gcc_unreachable ();
1450 }
1451
1452 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1453 caller saved register. For leaf functions it is more efficient to
1454 use a volatile register because we won't need to save and restore the
1455 pic register. This routine is only valid after register allocation
1456 is completed, so we can pick an unused register. */
1457 static rtx
1458 get_pic_reg (void)
1459 {
1460 if (!reload_completed && !reload_in_progress)
1461 abort ();
1462
1463 /* If we've already made the decision, we need to keep with it. Once we've
1464 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1465 return true since the register is now live; this should not cause us to
1466 "switch back" to using pic_offset_table_rtx. */
1467 if (!cfun->machine->pic_reg)
1468 {
1469 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1470 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1471 else
1472 cfun->machine->pic_reg = pic_offset_table_rtx;
1473 }
1474
1475 return cfun->machine->pic_reg;
1476 }
1477
1478 /* Split constant addresses to handle cases that are too large.
1479 Add in the pic register when in PIC mode.
1480 Split immediates that require more than 1 instruction. */
1481 int
1482 spu_split_immediate (rtx * ops)
1483 {
1484 machine_mode mode = GET_MODE (ops[0]);
1485 enum immediate_class c = classify_immediate (ops[1], mode);
1486
1487 switch (c)
1488 {
1489 case IC_IL2:
1490 {
1491 unsigned char arrhi[16];
1492 unsigned char arrlo[16];
1493 rtx to, temp, hi, lo;
1494 int i;
1495 machine_mode imode = mode;
1496 /* We need to do reals as ints because the constant used in the
1497 IOR might not be a legitimate real constant. */
1498 imode = int_mode_for_mode (mode);
1499 constant_to_array (mode, ops[1], arrhi);
1500 if (imode != mode)
1501 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1502 else
1503 to = ops[0];
1504 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1505 for (i = 0; i < 16; i += 4)
1506 {
1507 arrlo[i + 2] = arrhi[i + 2];
1508 arrlo[i + 3] = arrhi[i + 3];
1509 arrlo[i + 0] = arrlo[i + 1] = 0;
1510 arrhi[i + 2] = arrhi[i + 3] = 0;
1511 }
1512 hi = array_to_constant (imode, arrhi);
1513 lo = array_to_constant (imode, arrlo);
1514 emit_move_insn (temp, hi);
1515 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1516 return 1;
1517 }
1518 case IC_FSMBI2:
1519 {
1520 unsigned char arr_fsmbi[16];
1521 unsigned char arr_andbi[16];
1522 rtx to, reg_fsmbi, reg_and;
1523 int i;
1524 machine_mode imode = mode;
1525 /* We need to do reals as ints because the constant used in the
1526 * AND might not be a legitimate real constant. */
1527 imode = int_mode_for_mode (mode);
1528 constant_to_array (mode, ops[1], arr_fsmbi);
1529 if (imode != mode)
1530 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1531 else
1532 to = ops[0];
1533 for (i = 0; i < 16; i++)
1534 if (arr_fsmbi[i] != 0)
1535 {
1536 arr_andbi[0] = arr_fsmbi[i];
1537 arr_fsmbi[i] = 0xff;
1538 }
1539 for (i = 1; i < 16; i++)
1540 arr_andbi[i] = arr_andbi[0];
1541 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1542 reg_and = array_to_constant (imode, arr_andbi);
1543 emit_move_insn (to, reg_fsmbi);
1544 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1545 return 1;
1546 }
1547 case IC_POOL:
1548 if (reload_in_progress || reload_completed)
1549 {
1550 rtx mem = force_const_mem (mode, ops[1]);
1551 if (TARGET_LARGE_MEM)
1552 {
1553 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1554 emit_move_insn (addr, XEXP (mem, 0));
1555 mem = replace_equiv_address (mem, addr);
1556 }
1557 emit_move_insn (ops[0], mem);
1558 return 1;
1559 }
1560 break;
1561 case IC_IL1s:
1562 case IC_IL2s:
1563 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1564 {
1565 if (c == IC_IL2s)
1566 {
1567 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1568 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1569 }
1570 else if (flag_pic)
1571 emit_insn (gen_pic (ops[0], ops[1]));
1572 if (flag_pic)
1573 {
1574 rtx pic_reg = get_pic_reg ();
1575 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1576 }
1577 return flag_pic || c == IC_IL2s;
1578 }
1579 break;
1580 case IC_IL1:
1581 case IC_FSMBI:
1582 case IC_CPAT:
1583 break;
1584 }
1585 return 0;
1586 }
1587
1588 /* SAVING is TRUE when we are generating the actual load and store
1589 instructions for REGNO. When determining the size of the stack
1590 needed for saving register we must allocate enough space for the
1591 worst case, because we don't always have the information early enough
1592 to not allocate it. But we can at least eliminate the actual loads
1593 and stores during the prologue/epilogue. */
1594 static int
1595 need_to_save_reg (int regno, int saving)
1596 {
1597 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1598 return 1;
1599 if (flag_pic
1600 && regno == PIC_OFFSET_TABLE_REGNUM
1601 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1602 return 1;
1603 return 0;
1604 }
1605
1606 /* This function is only correct starting with local register
1607 allocation */
1608 int
1609 spu_saved_regs_size (void)
1610 {
1611 int reg_save_size = 0;
1612 int regno;
1613
1614 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1615 if (need_to_save_reg (regno, 0))
1616 reg_save_size += 0x10;
1617 return reg_save_size;
1618 }
1619
1620 static rtx_insn *
1621 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1622 {
1623 rtx reg = gen_rtx_REG (V4SImode, regno);
1624 rtx mem =
1625 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1626 return emit_insn (gen_movv4si (mem, reg));
1627 }
1628
1629 static rtx_insn *
1630 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1631 {
1632 rtx reg = gen_rtx_REG (V4SImode, regno);
1633 rtx mem =
1634 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1635 return emit_insn (gen_movv4si (reg, mem));
1636 }
1637
1638 /* This happens after reload, so we need to expand it. */
1639 static rtx_insn *
1640 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1641 {
1642 rtx_insn *insn;
1643 if (satisfies_constraint_K (GEN_INT (imm)))
1644 {
1645 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1646 }
1647 else
1648 {
1649 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1650 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1651 if (REGNO (src) == REGNO (scratch))
1652 abort ();
1653 }
1654 return insn;
1655 }
1656
1657 /* Return nonzero if this function is known to have a null epilogue. */
1658
1659 int
1660 direct_return (void)
1661 {
1662 if (reload_completed)
1663 {
1664 if (cfun->static_chain_decl == 0
1665 && (spu_saved_regs_size ()
1666 + get_frame_size ()
1667 + crtl->outgoing_args_size
1668 + crtl->args.pretend_args_size == 0)
1669 && crtl->is_leaf)
1670 return 1;
1671 }
1672 return 0;
1673 }
1674
1675 /*
1676 The stack frame looks like this:
1677 +-------------+
1678 | incoming |
1679 | args |
1680 AP -> +-------------+
1681 | $lr save |
1682 +-------------+
1683 prev SP | back chain |
1684 +-------------+
1685 | var args |
1686 | reg save | crtl->args.pretend_args_size bytes
1687 +-------------+
1688 | ... |
1689 | saved regs | spu_saved_regs_size() bytes
1690 FP -> +-------------+
1691 | ... |
1692 | vars | get_frame_size() bytes
1693 HFP -> +-------------+
1694 | ... |
1695 | outgoing |
1696 | args | crtl->outgoing_args_size bytes
1697 +-------------+
1698 | $lr of next |
1699 | frame |
1700 +-------------+
1701 | back chain |
1702 SP -> +-------------+
1703
1704 */
1705 void
1706 spu_expand_prologue (void)
1707 {
1708 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1709 HOST_WIDE_INT total_size;
1710 HOST_WIDE_INT saved_regs_size;
1711 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1712 rtx scratch_reg_0, scratch_reg_1;
1713 rtx_insn *insn;
1714 rtx real;
1715
1716 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1717 cfun->machine->pic_reg = pic_offset_table_rtx;
1718
1719 if (spu_naked_function_p (current_function_decl))
1720 return;
1721
1722 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1723 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1724
1725 saved_regs_size = spu_saved_regs_size ();
1726 total_size = size + saved_regs_size
1727 + crtl->outgoing_args_size
1728 + crtl->args.pretend_args_size;
1729
1730 if (!crtl->is_leaf
1731 || cfun->calls_alloca || total_size > 0)
1732 total_size += STACK_POINTER_OFFSET;
1733
1734 /* Save this first because code after this might use the link
1735 register as a scratch register. */
1736 if (!crtl->is_leaf)
1737 {
1738 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1739 RTX_FRAME_RELATED_P (insn) = 1;
1740 }
1741
1742 if (total_size > 0)
1743 {
1744 offset = -crtl->args.pretend_args_size;
1745 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1746 if (need_to_save_reg (regno, 1))
1747 {
1748 offset -= 16;
1749 insn = frame_emit_store (regno, sp_reg, offset);
1750 RTX_FRAME_RELATED_P (insn) = 1;
1751 }
1752 }
1753
1754 if (flag_pic && cfun->machine->pic_reg)
1755 {
1756 rtx pic_reg = cfun->machine->pic_reg;
1757 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1758 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1759 }
1760
1761 if (total_size > 0)
1762 {
1763 if (flag_stack_check)
1764 {
1765 /* We compare against total_size-1 because
1766 ($sp >= total_size) <=> ($sp > total_size-1) */
1767 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1768 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1769 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1770 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1771 {
1772 emit_move_insn (scratch_v4si, size_v4si);
1773 size_v4si = scratch_v4si;
1774 }
1775 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1776 emit_insn (gen_vec_extractv4si
1777 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1778 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1779 }
1780
1781 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1782 the value of the previous $sp because we save it as the back
1783 chain. */
1784 if (total_size <= 2000)
1785 {
1786 /* In this case we save the back chain first. */
1787 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1788 insn =
1789 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1790 }
1791 else
1792 {
1793 insn = emit_move_insn (scratch_reg_0, sp_reg);
1794 insn =
1795 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1796 }
1797 RTX_FRAME_RELATED_P (insn) = 1;
1798 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1799 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1800
1801 if (total_size > 2000)
1802 {
1803 /* Save the back chain ptr */
1804 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1805 }
1806
1807 if (frame_pointer_needed)
1808 {
1809 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1810 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1811 + crtl->outgoing_args_size;
1812 /* Set the new frame_pointer */
1813 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1814 RTX_FRAME_RELATED_P (insn) = 1;
1815 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1816 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1817 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1818 }
1819 }
1820
1821 if (flag_stack_usage_info)
1822 current_function_static_stack_size = total_size;
1823 }
1824
1825 void
1826 spu_expand_epilogue (bool sibcall_p)
1827 {
1828 int size = get_frame_size (), offset, regno;
1829 HOST_WIDE_INT saved_regs_size, total_size;
1830 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1831 rtx scratch_reg_0;
1832
1833 if (spu_naked_function_p (current_function_decl))
1834 return;
1835
1836 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1837
1838 saved_regs_size = spu_saved_regs_size ();
1839 total_size = size + saved_regs_size
1840 + crtl->outgoing_args_size
1841 + crtl->args.pretend_args_size;
1842
1843 if (!crtl->is_leaf
1844 || cfun->calls_alloca || total_size > 0)
1845 total_size += STACK_POINTER_OFFSET;
1846
1847 if (total_size > 0)
1848 {
1849 if (cfun->calls_alloca)
1850 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1851 else
1852 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1853
1854
1855 if (saved_regs_size > 0)
1856 {
1857 offset = -crtl->args.pretend_args_size;
1858 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1859 if (need_to_save_reg (regno, 1))
1860 {
1861 offset -= 0x10;
1862 frame_emit_load (regno, sp_reg, offset);
1863 }
1864 }
1865 }
1866
1867 if (!crtl->is_leaf)
1868 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1869
1870 if (!sibcall_p)
1871 {
1872 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1873 emit_jump_insn (gen__return ());
1874 }
1875 }
1876
1877 rtx
1878 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1879 {
1880 if (count != 0)
1881 return 0;
1882 /* This is inefficient because it ends up copying to a save-register
1883 which then gets saved even though $lr has already been saved. But
1884 it does generate better code for leaf functions and we don't need
1885 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1886 used for __builtin_return_address anyway, so maybe we don't care if
1887 it's inefficient. */
1888 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1889 }
1890 \f
1891
1892 /* Given VAL, generate a constant appropriate for MODE.
1893 If MODE is a vector mode, every element will be VAL.
1894 For TImode, VAL will be zero extended to 128 bits. */
1895 rtx
1896 spu_const (machine_mode mode, HOST_WIDE_INT val)
1897 {
1898 rtx inner;
1899 rtvec v;
1900 int units, i;
1901
1902 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1903 || GET_MODE_CLASS (mode) == MODE_FLOAT
1904 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1905 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1906
1907 if (GET_MODE_CLASS (mode) == MODE_INT)
1908 return immed_double_const (val, 0, mode);
1909
1910 /* val is the bit representation of the float */
1911 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1912 return hwint_to_const_double (mode, val);
1913
1914 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1915 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1916 else
1917 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1918
1919 units = GET_MODE_NUNITS (mode);
1920
1921 v = rtvec_alloc (units);
1922
1923 for (i = 0; i < units; ++i)
1924 RTVEC_ELT (v, i) = inner;
1925
1926 return gen_rtx_CONST_VECTOR (mode, v);
1927 }
1928
1929 /* Create a MODE vector constant from 4 ints. */
1930 rtx
1931 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1932 {
1933 unsigned char arr[16];
1934 arr[0] = (a >> 24) & 0xff;
1935 arr[1] = (a >> 16) & 0xff;
1936 arr[2] = (a >> 8) & 0xff;
1937 arr[3] = (a >> 0) & 0xff;
1938 arr[4] = (b >> 24) & 0xff;
1939 arr[5] = (b >> 16) & 0xff;
1940 arr[6] = (b >> 8) & 0xff;
1941 arr[7] = (b >> 0) & 0xff;
1942 arr[8] = (c >> 24) & 0xff;
1943 arr[9] = (c >> 16) & 0xff;
1944 arr[10] = (c >> 8) & 0xff;
1945 arr[11] = (c >> 0) & 0xff;
1946 arr[12] = (d >> 24) & 0xff;
1947 arr[13] = (d >> 16) & 0xff;
1948 arr[14] = (d >> 8) & 0xff;
1949 arr[15] = (d >> 0) & 0xff;
1950 return array_to_constant(mode, arr);
1951 }
1952 \f
1953 /* branch hint stuff */
1954
1955 /* An array of these is used to propagate hints to predecessor blocks. */
1956 struct spu_bb_info
1957 {
1958 rtx_insn *prop_jump; /* propagated from another block */
1959 int bb_index; /* the original block. */
1960 };
1961 static struct spu_bb_info *spu_bb_info;
1962
1963 #define STOP_HINT_P(INSN) \
1964 (CALL_P(INSN) \
1965 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1966 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1967
1968 /* 1 when RTX is a hinted branch or its target. We keep track of
1969 what has been hinted so the safe-hint code can test it easily. */
1970 #define HINTED_P(RTX) \
1971 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1972
1973 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1974 #define SCHED_ON_EVEN_P(RTX) \
1975 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1976
1977 /* Emit a nop for INSN such that the two will dual issue. This assumes
1978 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1979 We check for TImode to handle a MULTI1 insn which has dual issued its
1980 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1981 static void
1982 emit_nop_for_insn (rtx_insn *insn)
1983 {
1984 int p;
1985 rtx_insn *new_insn;
1986
1987 /* We need to handle JUMP_TABLE_DATA separately. */
1988 if (JUMP_TABLE_DATA_P (insn))
1989 {
1990 new_insn = emit_insn_after (gen_lnop(), insn);
1991 recog_memoized (new_insn);
1992 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1993 return;
1994 }
1995
1996 p = get_pipe (insn);
1997 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1998 new_insn = emit_insn_after (gen_lnop (), insn);
1999 else if (p == 1 && GET_MODE (insn) == TImode)
2000 {
2001 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2002 PUT_MODE (new_insn, TImode);
2003 PUT_MODE (insn, VOIDmode);
2004 }
2005 else
2006 new_insn = emit_insn_after (gen_lnop (), insn);
2007 recog_memoized (new_insn);
2008 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2009 }
2010
2011 /* Insert nops in basic blocks to meet dual issue alignment
2012 requirements. Also make sure hbrp and hint instructions are at least
2013 one cycle apart, possibly inserting a nop. */
2014 static void
2015 pad_bb(void)
2016 {
2017 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2018 int length;
2019 int addr;
2020
2021 /* This sets up INSN_ADDRESSES. */
2022 shorten_branches (get_insns ());
2023
2024 /* Keep track of length added by nops. */
2025 length = 0;
2026
2027 prev_insn = 0;
2028 insn = get_insns ();
2029 if (!active_insn_p (insn))
2030 insn = next_active_insn (insn);
2031 for (; insn; insn = next_insn)
2032 {
2033 next_insn = next_active_insn (insn);
2034 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2035 || INSN_CODE (insn) == CODE_FOR_hbr)
2036 {
2037 if (hbr_insn)
2038 {
2039 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2040 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2041 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2042 || (a1 - a0 == 4))
2043 {
2044 prev_insn = emit_insn_before (gen_lnop (), insn);
2045 PUT_MODE (prev_insn, GET_MODE (insn));
2046 PUT_MODE (insn, TImode);
2047 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2048 length += 4;
2049 }
2050 }
2051 hbr_insn = insn;
2052 }
2053 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2054 {
2055 if (GET_MODE (insn) == TImode)
2056 PUT_MODE (next_insn, TImode);
2057 insn = next_insn;
2058 next_insn = next_active_insn (insn);
2059 }
2060 addr = INSN_ADDRESSES (INSN_UID (insn));
2061 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2062 {
2063 if (((addr + length) & 7) != 0)
2064 {
2065 emit_nop_for_insn (prev_insn);
2066 length += 4;
2067 }
2068 }
2069 else if (GET_MODE (insn) == TImode
2070 && ((next_insn && GET_MODE (next_insn) != TImode)
2071 || get_attr_type (insn) == TYPE_MULTI0)
2072 && ((addr + length) & 7) != 0)
2073 {
2074 /* prev_insn will always be set because the first insn is
2075 always 8-byte aligned. */
2076 emit_nop_for_insn (prev_insn);
2077 length += 4;
2078 }
2079 prev_insn = insn;
2080 }
2081 }
2082
2083 \f
2084 /* Routines for branch hints. */
2085
2086 static void
2087 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2088 int distance, sbitmap blocks)
2089 {
2090 rtx_insn *hint;
2091 rtx_insn *insn;
2092 rtx_jump_table_data *table;
2093
2094 if (before == 0 || branch == 0 || target == 0)
2095 return;
2096
2097 /* While scheduling we require hints to be no further than 600, so
2098 we need to enforce that here too */
2099 if (distance > 600)
2100 return;
2101
2102 /* If we have a Basic block note, emit it after the basic block note. */
2103 if (NOTE_INSN_BASIC_BLOCK_P (before))
2104 before = NEXT_INSN (before);
2105
2106 rtx_code_label *branch_label = gen_label_rtx ();
2107 LABEL_NUSES (branch_label)++;
2108 LABEL_PRESERVE_P (branch_label) = 1;
2109 insn = emit_label_before (branch_label, branch);
2110 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2111 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2112
2113 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2114 recog_memoized (hint);
2115 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2116 HINTED_P (branch) = 1;
2117
2118 if (GET_CODE (target) == LABEL_REF)
2119 HINTED_P (XEXP (target, 0)) = 1;
2120 else if (tablejump_p (branch, 0, &table))
2121 {
2122 rtvec vec;
2123 int j;
2124 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2125 vec = XVEC (PATTERN (table), 0);
2126 else
2127 vec = XVEC (PATTERN (table), 1);
2128 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2129 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2130 }
2131
2132 if (distance >= 588)
2133 {
2134 /* Make sure the hint isn't scheduled any earlier than this point,
2135 which could make it too far for the branch offest to fit */
2136 insn = emit_insn_before (gen_blockage (), hint);
2137 recog_memoized (insn);
2138 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2139 }
2140 else if (distance <= 8 * 4)
2141 {
2142 /* To guarantee at least 8 insns between the hint and branch we
2143 insert nops. */
2144 int d;
2145 for (d = distance; d < 8 * 4; d += 4)
2146 {
2147 insn =
2148 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2149 recog_memoized (insn);
2150 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2151 }
2152
2153 /* Make sure any nops inserted aren't scheduled before the hint. */
2154 insn = emit_insn_after (gen_blockage (), hint);
2155 recog_memoized (insn);
2156 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2157
2158 /* Make sure any nops inserted aren't scheduled after the call. */
2159 if (CALL_P (branch) && distance < 8 * 4)
2160 {
2161 insn = emit_insn_before (gen_blockage (), branch);
2162 recog_memoized (insn);
2163 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2164 }
2165 }
2166 }
2167
2168 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2169 the rtx for the branch target. */
2170 static rtx
2171 get_branch_target (rtx_insn *branch)
2172 {
2173 if (JUMP_P (branch))
2174 {
2175 rtx set, src;
2176
2177 /* Return statements */
2178 if (GET_CODE (PATTERN (branch)) == RETURN)
2179 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2180
2181 /* ASM GOTOs. */
2182 if (extract_asm_operands (PATTERN (branch)) != NULL)
2183 return NULL;
2184
2185 set = single_set (branch);
2186 src = SET_SRC (set);
2187 if (GET_CODE (SET_DEST (set)) != PC)
2188 abort ();
2189
2190 if (GET_CODE (src) == IF_THEN_ELSE)
2191 {
2192 rtx lab = 0;
2193 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2194 if (note)
2195 {
2196 /* If the more probable case is not a fall through, then
2197 try a branch hint. */
2198 int prob = XINT (note, 0);
2199 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2200 && GET_CODE (XEXP (src, 1)) != PC)
2201 lab = XEXP (src, 1);
2202 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2203 && GET_CODE (XEXP (src, 2)) != PC)
2204 lab = XEXP (src, 2);
2205 }
2206 if (lab)
2207 {
2208 if (GET_CODE (lab) == RETURN)
2209 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2210 return lab;
2211 }
2212 return 0;
2213 }
2214
2215 return src;
2216 }
2217 else if (CALL_P (branch))
2218 {
2219 rtx call;
2220 /* All of our call patterns are in a PARALLEL and the CALL is
2221 the first pattern in the PARALLEL. */
2222 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2223 abort ();
2224 call = XVECEXP (PATTERN (branch), 0, 0);
2225 if (GET_CODE (call) == SET)
2226 call = SET_SRC (call);
2227 if (GET_CODE (call) != CALL)
2228 abort ();
2229 return XEXP (XEXP (call, 0), 0);
2230 }
2231 return 0;
2232 }
2233
2234 /* The special $hbr register is used to prevent the insn scheduler from
2235 moving hbr insns across instructions which invalidate them. It
2236 should only be used in a clobber, and this function searches for
2237 insns which clobber it. */
2238 static bool
2239 insn_clobbers_hbr (rtx_insn *insn)
2240 {
2241 if (INSN_P (insn)
2242 && GET_CODE (PATTERN (insn)) == PARALLEL)
2243 {
2244 rtx parallel = PATTERN (insn);
2245 rtx clobber;
2246 int j;
2247 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2248 {
2249 clobber = XVECEXP (parallel, 0, j);
2250 if (GET_CODE (clobber) == CLOBBER
2251 && GET_CODE (XEXP (clobber, 0)) == REG
2252 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2253 return 1;
2254 }
2255 }
2256 return 0;
2257 }
2258
2259 /* Search up to 32 insns starting at FIRST:
2260 - at any kind of hinted branch, just return
2261 - at any unconditional branch in the first 15 insns, just return
2262 - at a call or indirect branch, after the first 15 insns, force it to
2263 an even address and return
2264 - at any unconditional branch, after the first 15 insns, force it to
2265 an even address.
2266 At then end of the search, insert an hbrp within 4 insns of FIRST,
2267 and an hbrp within 16 instructions of FIRST.
2268 */
2269 static void
2270 insert_hbrp_for_ilb_runout (rtx_insn *first)
2271 {
2272 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2273 int addr = 0, length, first_addr = -1;
2274 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2275 int insert_lnop_after = 0;
2276 for (insn = first; insn; insn = NEXT_INSN (insn))
2277 if (INSN_P (insn))
2278 {
2279 if (first_addr == -1)
2280 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2281 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2282 length = get_attr_length (insn);
2283
2284 if (before_4 == 0 && addr + length >= 4 * 4)
2285 before_4 = insn;
2286 /* We test for 14 instructions because the first hbrp will add
2287 up to 2 instructions. */
2288 if (before_16 == 0 && addr + length >= 14 * 4)
2289 before_16 = insn;
2290
2291 if (INSN_CODE (insn) == CODE_FOR_hbr)
2292 {
2293 /* Make sure an hbrp is at least 2 cycles away from a hint.
2294 Insert an lnop after the hbrp when necessary. */
2295 if (before_4 == 0 && addr > 0)
2296 {
2297 before_4 = insn;
2298 insert_lnop_after |= 1;
2299 }
2300 else if (before_4 && addr <= 4 * 4)
2301 insert_lnop_after |= 1;
2302 if (before_16 == 0 && addr > 10 * 4)
2303 {
2304 before_16 = insn;
2305 insert_lnop_after |= 2;
2306 }
2307 else if (before_16 && addr <= 14 * 4)
2308 insert_lnop_after |= 2;
2309 }
2310
2311 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2312 {
2313 if (addr < hbrp_addr0)
2314 hbrp_addr0 = addr;
2315 else if (addr < hbrp_addr1)
2316 hbrp_addr1 = addr;
2317 }
2318
2319 if (CALL_P (insn) || JUMP_P (insn))
2320 {
2321 if (HINTED_P (insn))
2322 return;
2323
2324 /* Any branch after the first 15 insns should be on an even
2325 address to avoid a special case branch. There might be
2326 some nops and/or hbrps inserted, so we test after 10
2327 insns. */
2328 if (addr > 10 * 4)
2329 SCHED_ON_EVEN_P (insn) = 1;
2330 }
2331
2332 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2333 return;
2334
2335
2336 if (addr + length >= 32 * 4)
2337 {
2338 gcc_assert (before_4 && before_16);
2339 if (hbrp_addr0 > 4 * 4)
2340 {
2341 insn =
2342 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2343 recog_memoized (insn);
2344 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2345 INSN_ADDRESSES_NEW (insn,
2346 INSN_ADDRESSES (INSN_UID (before_4)));
2347 PUT_MODE (insn, GET_MODE (before_4));
2348 PUT_MODE (before_4, TImode);
2349 if (insert_lnop_after & 1)
2350 {
2351 insn = emit_insn_before (gen_lnop (), before_4);
2352 recog_memoized (insn);
2353 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2354 INSN_ADDRESSES_NEW (insn,
2355 INSN_ADDRESSES (INSN_UID (before_4)));
2356 PUT_MODE (insn, TImode);
2357 }
2358 }
2359 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2360 && hbrp_addr1 > 16 * 4)
2361 {
2362 insn =
2363 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2364 recog_memoized (insn);
2365 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2366 INSN_ADDRESSES_NEW (insn,
2367 INSN_ADDRESSES (INSN_UID (before_16)));
2368 PUT_MODE (insn, GET_MODE (before_16));
2369 PUT_MODE (before_16, TImode);
2370 if (insert_lnop_after & 2)
2371 {
2372 insn = emit_insn_before (gen_lnop (), before_16);
2373 recog_memoized (insn);
2374 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2375 INSN_ADDRESSES_NEW (insn,
2376 INSN_ADDRESSES (INSN_UID
2377 (before_16)));
2378 PUT_MODE (insn, TImode);
2379 }
2380 }
2381 return;
2382 }
2383 }
2384 else if (BARRIER_P (insn))
2385 return;
2386
2387 }
2388
2389 /* The SPU might hang when it executes 48 inline instructions after a
2390 hinted branch jumps to its hinted target. The beginning of a
2391 function and the return from a call might have been hinted, and
2392 must be handled as well. To prevent a hang we insert 2 hbrps. The
2393 first should be within 6 insns of the branch target. The second
2394 should be within 22 insns of the branch target. When determining
2395 if hbrps are necessary, we look for only 32 inline instructions,
2396 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2397 when inserting new hbrps, we insert them within 4 and 16 insns of
2398 the target. */
2399 static void
2400 insert_hbrp (void)
2401 {
2402 rtx_insn *insn;
2403 if (TARGET_SAFE_HINTS)
2404 {
2405 shorten_branches (get_insns ());
2406 /* Insert hbrp at beginning of function */
2407 insn = next_active_insn (get_insns ());
2408 if (insn)
2409 insert_hbrp_for_ilb_runout (insn);
2410 /* Insert hbrp after hinted targets. */
2411 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2412 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2413 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2414 }
2415 }
2416
2417 static int in_spu_reorg;
2418
2419 static void
2420 spu_var_tracking (void)
2421 {
2422 if (flag_var_tracking)
2423 {
2424 df_analyze ();
2425 timevar_push (TV_VAR_TRACKING);
2426 variable_tracking_main ();
2427 timevar_pop (TV_VAR_TRACKING);
2428 df_finish_pass (false);
2429 }
2430 }
2431
2432 /* Insert branch hints. There are no branch optimizations after this
2433 pass, so it's safe to set our branch hints now. */
2434 static void
2435 spu_machine_dependent_reorg (void)
2436 {
2437 sbitmap blocks;
2438 basic_block bb;
2439 rtx_insn *branch, *insn;
2440 rtx branch_target = 0;
2441 int branch_addr = 0, insn_addr, required_dist = 0;
2442 int i;
2443 unsigned int j;
2444
2445 if (!TARGET_BRANCH_HINTS || optimize == 0)
2446 {
2447 /* We still do it for unoptimized code because an external
2448 function might have hinted a call or return. */
2449 compute_bb_for_insn ();
2450 insert_hbrp ();
2451 pad_bb ();
2452 spu_var_tracking ();
2453 free_bb_for_insn ();
2454 return;
2455 }
2456
2457 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2458 bitmap_clear (blocks);
2459
2460 in_spu_reorg = 1;
2461 compute_bb_for_insn ();
2462
2463 /* (Re-)discover loops so that bb->loop_father can be used
2464 in the analysis below. */
2465 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2466
2467 compact_blocks ();
2468
2469 spu_bb_info =
2470 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2471 sizeof (struct spu_bb_info));
2472
2473 /* We need exact insn addresses and lengths. */
2474 shorten_branches (get_insns ());
2475
2476 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2477 {
2478 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2479 branch = 0;
2480 if (spu_bb_info[i].prop_jump)
2481 {
2482 branch = spu_bb_info[i].prop_jump;
2483 branch_target = get_branch_target (branch);
2484 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2485 required_dist = spu_hint_dist;
2486 }
2487 /* Search from end of a block to beginning. In this loop, find
2488 jumps which need a branch and emit them only when:
2489 - it's an indirect branch and we're at the insn which sets
2490 the register
2491 - we're at an insn that will invalidate the hint. e.g., a
2492 call, another hint insn, inline asm that clobbers $hbr, and
2493 some inlined operations (divmodsi4). Don't consider jumps
2494 because they are only at the end of a block and are
2495 considered when we are deciding whether to propagate
2496 - we're getting too far away from the branch. The hbr insns
2497 only have a signed 10 bit offset
2498 We go back as far as possible so the branch will be considered
2499 for propagation when we get to the beginning of the block. */
2500 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2501 {
2502 if (INSN_P (insn))
2503 {
2504 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2505 if (branch
2506 && ((GET_CODE (branch_target) == REG
2507 && set_of (branch_target, insn) != NULL_RTX)
2508 || insn_clobbers_hbr (insn)
2509 || branch_addr - insn_addr > 600))
2510 {
2511 rtx_insn *next = NEXT_INSN (insn);
2512 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2513 if (insn != BB_END (bb)
2514 && branch_addr - next_addr >= required_dist)
2515 {
2516 if (dump_file)
2517 fprintf (dump_file,
2518 "hint for %i in block %i before %i\n",
2519 INSN_UID (branch), bb->index,
2520 INSN_UID (next));
2521 spu_emit_branch_hint (next, branch, branch_target,
2522 branch_addr - next_addr, blocks);
2523 }
2524 branch = 0;
2525 }
2526
2527 /* JUMP_P will only be true at the end of a block. When
2528 branch is already set it means we've previously decided
2529 to propagate a hint for that branch into this block. */
2530 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2531 {
2532 branch = 0;
2533 if ((branch_target = get_branch_target (insn)))
2534 {
2535 branch = insn;
2536 branch_addr = insn_addr;
2537 required_dist = spu_hint_dist;
2538 }
2539 }
2540 }
2541 if (insn == BB_HEAD (bb))
2542 break;
2543 }
2544
2545 if (branch)
2546 {
2547 /* If we haven't emitted a hint for this branch yet, it might
2548 be profitable to emit it in one of the predecessor blocks,
2549 especially for loops. */
2550 rtx_insn *bbend;
2551 basic_block prev = 0, prop = 0, prev2 = 0;
2552 int loop_exit = 0, simple_loop = 0;
2553 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2554
2555 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2556 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2557 prev = EDGE_PRED (bb, j)->src;
2558 else
2559 prev2 = EDGE_PRED (bb, j)->src;
2560
2561 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2562 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2563 loop_exit = 1;
2564 else if (EDGE_SUCC (bb, j)->dest == bb)
2565 simple_loop = 1;
2566
2567 /* If this branch is a loop exit then propagate to previous
2568 fallthru block. This catches the cases when it is a simple
2569 loop or when there is an initial branch into the loop. */
2570 if (prev && (loop_exit || simple_loop)
2571 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2572 prop = prev;
2573
2574 /* If there is only one adjacent predecessor. Don't propagate
2575 outside this loop. */
2576 else if (prev && single_pred_p (bb)
2577 && prev->loop_father == bb->loop_father)
2578 prop = prev;
2579
2580 /* If this is the JOIN block of a simple IF-THEN then
2581 propagate the hint to the HEADER block. */
2582 else if (prev && prev2
2583 && EDGE_COUNT (bb->preds) == 2
2584 && EDGE_COUNT (prev->preds) == 1
2585 && EDGE_PRED (prev, 0)->src == prev2
2586 && prev2->loop_father == bb->loop_father
2587 && GET_CODE (branch_target) != REG)
2588 prop = prev;
2589
2590 /* Don't propagate when:
2591 - this is a simple loop and the hint would be too far
2592 - this is not a simple loop and there are 16 insns in
2593 this block already
2594 - the predecessor block ends in a branch that will be
2595 hinted
2596 - the predecessor block ends in an insn that invalidates
2597 the hint */
2598 if (prop
2599 && prop->index >= 0
2600 && (bbend = BB_END (prop))
2601 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2602 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2603 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2604 {
2605 if (dump_file)
2606 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2607 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2608 bb->index, prop->index, bb_loop_depth (bb),
2609 INSN_UID (branch), loop_exit, simple_loop,
2610 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2611
2612 spu_bb_info[prop->index].prop_jump = branch;
2613 spu_bb_info[prop->index].bb_index = i;
2614 }
2615 else if (branch_addr - next_addr >= required_dist)
2616 {
2617 if (dump_file)
2618 fprintf (dump_file, "hint for %i in block %i before %i\n",
2619 INSN_UID (branch), bb->index,
2620 INSN_UID (NEXT_INSN (insn)));
2621 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2622 branch_addr - next_addr, blocks);
2623 }
2624 branch = 0;
2625 }
2626 }
2627 free (spu_bb_info);
2628
2629 if (!bitmap_empty_p (blocks))
2630 find_many_sub_basic_blocks (blocks);
2631
2632 /* We have to schedule to make sure alignment is ok. */
2633 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2634
2635 /* The hints need to be scheduled, so call it again. */
2636 schedule_insns ();
2637 df_finish_pass (true);
2638
2639 insert_hbrp ();
2640
2641 pad_bb ();
2642
2643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2644 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2645 {
2646 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2647 between its branch label and the branch . We don't move the
2648 label because GCC expects it at the beginning of the block. */
2649 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2650 rtx label_ref = XVECEXP (unspec, 0, 0);
2651 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2652 rtx_insn *branch;
2653 int offset = 0;
2654 for (branch = NEXT_INSN (label);
2655 !JUMP_P (branch) && !CALL_P (branch);
2656 branch = NEXT_INSN (branch))
2657 if (NONJUMP_INSN_P (branch))
2658 offset += get_attr_length (branch);
2659 if (offset > 0)
2660 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2661 }
2662
2663 spu_var_tracking ();
2664
2665 loop_optimizer_finalize ();
2666
2667 free_bb_for_insn ();
2668
2669 in_spu_reorg = 0;
2670 }
2671 \f
2672
2673 /* Insn scheduling routines, primarily for dual issue. */
2674 static int
2675 spu_sched_issue_rate (void)
2676 {
2677 return 2;
2678 }
2679
2680 static int
2681 uses_ls_unit(rtx_insn *insn)
2682 {
2683 rtx set = single_set (insn);
2684 if (set != 0
2685 && (GET_CODE (SET_DEST (set)) == MEM
2686 || GET_CODE (SET_SRC (set)) == MEM))
2687 return 1;
2688 return 0;
2689 }
2690
2691 static int
2692 get_pipe (rtx_insn *insn)
2693 {
2694 enum attr_type t;
2695 /* Handle inline asm */
2696 if (INSN_CODE (insn) == -1)
2697 return -1;
2698 t = get_attr_type (insn);
2699 switch (t)
2700 {
2701 case TYPE_CONVERT:
2702 return -2;
2703 case TYPE_MULTI0:
2704 return -1;
2705
2706 case TYPE_FX2:
2707 case TYPE_FX3:
2708 case TYPE_SPR:
2709 case TYPE_NOP:
2710 case TYPE_FXB:
2711 case TYPE_FPD:
2712 case TYPE_FP6:
2713 case TYPE_FP7:
2714 return 0;
2715
2716 case TYPE_LNOP:
2717 case TYPE_SHUF:
2718 case TYPE_LOAD:
2719 case TYPE_STORE:
2720 case TYPE_BR:
2721 case TYPE_MULTI1:
2722 case TYPE_HBR:
2723 case TYPE_IPREFETCH:
2724 return 1;
2725 default:
2726 abort ();
2727 }
2728 }
2729
2730
2731 /* haifa-sched.c has a static variable that keeps track of the current
2732 cycle. It is passed to spu_sched_reorder, and we record it here for
2733 use by spu_sched_variable_issue. It won't be accurate if the
2734 scheduler updates it's clock_var between the two calls. */
2735 static int clock_var;
2736
2737 /* This is used to keep track of insn alignment. Set to 0 at the
2738 beginning of each block and increased by the "length" attr of each
2739 insn scheduled. */
2740 static int spu_sched_length;
2741
2742 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2743 ready list appropriately in spu_sched_reorder(). */
2744 static int pipe0_clock;
2745 static int pipe1_clock;
2746
2747 static int prev_clock_var;
2748
2749 static int prev_priority;
2750
2751 /* The SPU needs to load the next ilb sometime during the execution of
2752 the previous ilb. There is a potential conflict if every cycle has a
2753 load or store. To avoid the conflict we make sure the load/store
2754 unit is free for at least one cycle during the execution of insns in
2755 the previous ilb. */
2756 static int spu_ls_first;
2757 static int prev_ls_clock;
2758
2759 static void
2760 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2761 int max_ready ATTRIBUTE_UNUSED)
2762 {
2763 spu_sched_length = 0;
2764 }
2765
2766 static void
2767 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2768 int max_ready ATTRIBUTE_UNUSED)
2769 {
2770 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2771 {
2772 /* When any block might be at least 8-byte aligned, assume they
2773 will all be at least 8-byte aligned to make sure dual issue
2774 works out correctly. */
2775 spu_sched_length = 0;
2776 }
2777 spu_ls_first = INT_MAX;
2778 clock_var = -1;
2779 prev_ls_clock = -1;
2780 pipe0_clock = -1;
2781 pipe1_clock = -1;
2782 prev_clock_var = -1;
2783 prev_priority = -1;
2784 }
2785
2786 static int
2787 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2788 int verbose ATTRIBUTE_UNUSED,
2789 rtx_insn *insn, int more)
2790 {
2791 int len;
2792 int p;
2793 if (GET_CODE (PATTERN (insn)) == USE
2794 || GET_CODE (PATTERN (insn)) == CLOBBER
2795 || (len = get_attr_length (insn)) == 0)
2796 return more;
2797
2798 spu_sched_length += len;
2799
2800 /* Reset on inline asm */
2801 if (INSN_CODE (insn) == -1)
2802 {
2803 spu_ls_first = INT_MAX;
2804 pipe0_clock = -1;
2805 pipe1_clock = -1;
2806 return 0;
2807 }
2808 p = get_pipe (insn);
2809 if (p == 0)
2810 pipe0_clock = clock_var;
2811 else
2812 pipe1_clock = clock_var;
2813
2814 if (in_spu_reorg)
2815 {
2816 if (clock_var - prev_ls_clock > 1
2817 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2818 spu_ls_first = INT_MAX;
2819 if (uses_ls_unit (insn))
2820 {
2821 if (spu_ls_first == INT_MAX)
2822 spu_ls_first = spu_sched_length;
2823 prev_ls_clock = clock_var;
2824 }
2825
2826 /* The scheduler hasn't inserted the nop, but we will later on.
2827 Include those nops in spu_sched_length. */
2828 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2829 spu_sched_length += 4;
2830 prev_clock_var = clock_var;
2831
2832 /* more is -1 when called from spu_sched_reorder for new insns
2833 that don't have INSN_PRIORITY */
2834 if (more >= 0)
2835 prev_priority = INSN_PRIORITY (insn);
2836 }
2837
2838 /* Always try issuing more insns. spu_sched_reorder will decide
2839 when the cycle should be advanced. */
2840 return 1;
2841 }
2842
2843 /* This function is called for both TARGET_SCHED_REORDER and
2844 TARGET_SCHED_REORDER2. */
2845 static int
2846 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2847 rtx_insn **ready, int *nreadyp, int clock)
2848 {
2849 int i, nready = *nreadyp;
2850 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2851 rtx_insn *insn;
2852
2853 clock_var = clock;
2854
2855 if (nready <= 0 || pipe1_clock >= clock)
2856 return 0;
2857
2858 /* Find any rtl insns that don't generate assembly insns and schedule
2859 them first. */
2860 for (i = nready - 1; i >= 0; i--)
2861 {
2862 insn = ready[i];
2863 if (INSN_CODE (insn) == -1
2864 || INSN_CODE (insn) == CODE_FOR_blockage
2865 || (INSN_P (insn) && get_attr_length (insn) == 0))
2866 {
2867 ready[i] = ready[nready - 1];
2868 ready[nready - 1] = insn;
2869 return 1;
2870 }
2871 }
2872
2873 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2874 for (i = 0; i < nready; i++)
2875 if (INSN_CODE (ready[i]) != -1)
2876 {
2877 insn = ready[i];
2878 switch (get_attr_type (insn))
2879 {
2880 default:
2881 case TYPE_MULTI0:
2882 case TYPE_CONVERT:
2883 case TYPE_FX2:
2884 case TYPE_FX3:
2885 case TYPE_SPR:
2886 case TYPE_NOP:
2887 case TYPE_FXB:
2888 case TYPE_FPD:
2889 case TYPE_FP6:
2890 case TYPE_FP7:
2891 pipe_0 = i;
2892 break;
2893 case TYPE_LOAD:
2894 case TYPE_STORE:
2895 pipe_ls = i;
2896 /* FALLTHRU */
2897 case TYPE_LNOP:
2898 case TYPE_SHUF:
2899 case TYPE_BR:
2900 case TYPE_MULTI1:
2901 case TYPE_HBR:
2902 pipe_1 = i;
2903 break;
2904 case TYPE_IPREFETCH:
2905 pipe_hbrp = i;
2906 break;
2907 }
2908 }
2909
2910 /* In the first scheduling phase, schedule loads and stores together
2911 to increase the chance they will get merged during postreload CSE. */
2912 if (!reload_completed && pipe_ls >= 0)
2913 {
2914 insn = ready[pipe_ls];
2915 ready[pipe_ls] = ready[nready - 1];
2916 ready[nready - 1] = insn;
2917 return 1;
2918 }
2919
2920 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2921 if (pipe_hbrp >= 0)
2922 pipe_1 = pipe_hbrp;
2923
2924 /* When we have loads/stores in every cycle of the last 15 insns and
2925 we are about to schedule another load/store, emit an hbrp insn
2926 instead. */
2927 if (in_spu_reorg
2928 && spu_sched_length - spu_ls_first >= 4 * 15
2929 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2930 {
2931 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2932 recog_memoized (insn);
2933 if (pipe0_clock < clock)
2934 PUT_MODE (insn, TImode);
2935 spu_sched_variable_issue (file, verbose, insn, -1);
2936 return 0;
2937 }
2938
2939 /* In general, we want to emit nops to increase dual issue, but dual
2940 issue isn't faster when one of the insns could be scheduled later
2941 without effecting the critical path. We look at INSN_PRIORITY to
2942 make a good guess, but it isn't perfect so -mdual-nops=n can be
2943 used to effect it. */
2944 if (in_spu_reorg && spu_dual_nops < 10)
2945 {
2946 /* When we are at an even address and we are not issuing nops to
2947 improve scheduling then we need to advance the cycle. */
2948 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2949 && (spu_dual_nops == 0
2950 || (pipe_1 != -1
2951 && prev_priority >
2952 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2953 return 0;
2954
2955 /* When at an odd address, schedule the highest priority insn
2956 without considering pipeline. */
2957 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2958 && (spu_dual_nops == 0
2959 || (prev_priority >
2960 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2961 return 1;
2962 }
2963
2964
2965 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2966 pipe0 insn in the ready list, schedule it. */
2967 if (pipe0_clock < clock && pipe_0 >= 0)
2968 schedule_i = pipe_0;
2969
2970 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2971 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2972 else
2973 schedule_i = pipe_1;
2974
2975 if (schedule_i > -1)
2976 {
2977 insn = ready[schedule_i];
2978 ready[schedule_i] = ready[nready - 1];
2979 ready[nready - 1] = insn;
2980 return 1;
2981 }
2982 return 0;
2983 }
2984
2985 /* INSN is dependent on DEP_INSN. */
2986 static int
2987 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2988 int cost, unsigned int)
2989 {
2990 rtx set;
2991
2992 /* The blockage pattern is used to prevent instructions from being
2993 moved across it and has no cost. */
2994 if (INSN_CODE (insn) == CODE_FOR_blockage
2995 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2996 return 0;
2997
2998 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2999 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3000 return 0;
3001
3002 /* Make sure hbrps are spread out. */
3003 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3004 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3005 return 8;
3006
3007 /* Make sure hints and hbrps are 2 cycles apart. */
3008 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3009 || INSN_CODE (insn) == CODE_FOR_hbr)
3010 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3011 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3012 return 2;
3013
3014 /* An hbrp has no real dependency on other insns. */
3015 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3016 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3017 return 0;
3018
3019 /* Assuming that it is unlikely an argument register will be used in
3020 the first cycle of the called function, we reduce the cost for
3021 slightly better scheduling of dep_insn. When not hinted, the
3022 mispredicted branch would hide the cost as well. */
3023 if (CALL_P (insn))
3024 {
3025 rtx target = get_branch_target (insn);
3026 if (GET_CODE (target) != REG || !set_of (target, insn))
3027 return cost - 2;
3028 return cost;
3029 }
3030
3031 /* And when returning from a function, let's assume the return values
3032 are completed sooner too. */
3033 if (CALL_P (dep_insn))
3034 return cost - 2;
3035
3036 /* Make sure an instruction that loads from the back chain is schedule
3037 away from the return instruction so a hint is more likely to get
3038 issued. */
3039 if (INSN_CODE (insn) == CODE_FOR__return
3040 && (set = single_set (dep_insn))
3041 && GET_CODE (SET_DEST (set)) == REG
3042 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3043 return 20;
3044
3045 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3046 scheduler makes every insn in a block anti-dependent on the final
3047 jump_insn. We adjust here so higher cost insns will get scheduled
3048 earlier. */
3049 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3050 return insn_cost (dep_insn) - 3;
3051
3052 return cost;
3053 }
3054 \f
3055 /* Create a CONST_DOUBLE from a string. */
3056 rtx
3057 spu_float_const (const char *string, machine_mode mode)
3058 {
3059 REAL_VALUE_TYPE value;
3060 value = REAL_VALUE_ATOF (string, mode);
3061 return const_double_from_real_value (value, mode);
3062 }
3063
3064 int
3065 spu_constant_address_p (rtx x)
3066 {
3067 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3068 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3069 || GET_CODE (x) == HIGH);
3070 }
3071
3072 static enum spu_immediate
3073 which_immediate_load (HOST_WIDE_INT val)
3074 {
3075 gcc_assert (val == trunc_int_for_mode (val, SImode));
3076
3077 if (val >= -0x8000 && val <= 0x7fff)
3078 return SPU_IL;
3079 if (val >= 0 && val <= 0x3ffff)
3080 return SPU_ILA;
3081 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3082 return SPU_ILH;
3083 if ((val & 0xffff) == 0)
3084 return SPU_ILHU;
3085
3086 return SPU_NONE;
3087 }
3088
3089 /* Return true when OP can be loaded by one of the il instructions, or
3090 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3091 int
3092 immediate_load_p (rtx op, machine_mode mode)
3093 {
3094 if (CONSTANT_P (op))
3095 {
3096 enum immediate_class c = classify_immediate (op, mode);
3097 return c == IC_IL1 || c == IC_IL1s
3098 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3099 }
3100 return 0;
3101 }
3102
3103 /* Return true if the first SIZE bytes of arr is a constant that can be
3104 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3105 represent the size and offset of the instruction to use. */
3106 static int
3107 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3108 {
3109 int cpat, run, i, start;
3110 cpat = 1;
3111 run = 0;
3112 start = -1;
3113 for (i = 0; i < size && cpat; i++)
3114 if (arr[i] != i+16)
3115 {
3116 if (!run)
3117 {
3118 start = i;
3119 if (arr[i] == 3)
3120 run = 1;
3121 else if (arr[i] == 2 && arr[i+1] == 3)
3122 run = 2;
3123 else if (arr[i] == 0)
3124 {
3125 while (arr[i+run] == run && i+run < 16)
3126 run++;
3127 if (run != 4 && run != 8)
3128 cpat = 0;
3129 }
3130 else
3131 cpat = 0;
3132 if ((i & (run-1)) != 0)
3133 cpat = 0;
3134 i += run;
3135 }
3136 else
3137 cpat = 0;
3138 }
3139 if (cpat && (run || size < 16))
3140 {
3141 if (run == 0)
3142 run = 1;
3143 if (prun)
3144 *prun = run;
3145 if (pstart)
3146 *pstart = start == -1 ? 16-run : start;
3147 return 1;
3148 }
3149 return 0;
3150 }
3151
3152 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3153 it into a register. MODE is only valid when OP is a CONST_INT. */
3154 static enum immediate_class
3155 classify_immediate (rtx op, machine_mode mode)
3156 {
3157 HOST_WIDE_INT val;
3158 unsigned char arr[16];
3159 int i, j, repeated, fsmbi, repeat;
3160
3161 gcc_assert (CONSTANT_P (op));
3162
3163 if (GET_MODE (op) != VOIDmode)
3164 mode = GET_MODE (op);
3165
3166 /* A V4SI const_vector with all identical symbols is ok. */
3167 if (!flag_pic
3168 && mode == V4SImode
3169 && GET_CODE (op) == CONST_VECTOR
3170 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3171 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3172 op = unwrap_const_vec_duplicate (op);
3173
3174 switch (GET_CODE (op))
3175 {
3176 case SYMBOL_REF:
3177 case LABEL_REF:
3178 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3179
3180 case CONST:
3181 /* We can never know if the resulting address fits in 18 bits and can be
3182 loaded with ila. For now, assume the address will not overflow if
3183 the displacement is "small" (fits 'K' constraint). */
3184 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3185 {
3186 rtx sym = XEXP (XEXP (op, 0), 0);
3187 rtx cst = XEXP (XEXP (op, 0), 1);
3188
3189 if (GET_CODE (sym) == SYMBOL_REF
3190 && GET_CODE (cst) == CONST_INT
3191 && satisfies_constraint_K (cst))
3192 return IC_IL1s;
3193 }
3194 return IC_IL2s;
3195
3196 case HIGH:
3197 return IC_IL1s;
3198
3199 case CONST_VECTOR:
3200 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3201 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3202 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3203 return IC_POOL;
3204 /* Fall through. */
3205
3206 case CONST_INT:
3207 case CONST_DOUBLE:
3208 constant_to_array (mode, op, arr);
3209
3210 /* Check that each 4-byte slot is identical. */
3211 repeated = 1;
3212 for (i = 4; i < 16; i += 4)
3213 for (j = 0; j < 4; j++)
3214 if (arr[j] != arr[i + j])
3215 repeated = 0;
3216
3217 if (repeated)
3218 {
3219 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3220 val = trunc_int_for_mode (val, SImode);
3221
3222 if (which_immediate_load (val) != SPU_NONE)
3223 return IC_IL1;
3224 }
3225
3226 /* Any mode of 2 bytes or smaller can be loaded with an il
3227 instruction. */
3228 gcc_assert (GET_MODE_SIZE (mode) > 2);
3229
3230 fsmbi = 1;
3231 repeat = 0;
3232 for (i = 0; i < 16 && fsmbi; i++)
3233 if (arr[i] != 0 && repeat == 0)
3234 repeat = arr[i];
3235 else if (arr[i] != 0 && arr[i] != repeat)
3236 fsmbi = 0;
3237 if (fsmbi)
3238 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3239
3240 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3241 return IC_CPAT;
3242
3243 if (repeated)
3244 return IC_IL2;
3245
3246 return IC_POOL;
3247 default:
3248 break;
3249 }
3250 gcc_unreachable ();
3251 }
3252
3253 static enum spu_immediate
3254 which_logical_immediate (HOST_WIDE_INT val)
3255 {
3256 gcc_assert (val == trunc_int_for_mode (val, SImode));
3257
3258 if (val >= -0x200 && val <= 0x1ff)
3259 return SPU_ORI;
3260 if (val >= 0 && val <= 0xffff)
3261 return SPU_IOHL;
3262 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3263 {
3264 val = trunc_int_for_mode (val, HImode);
3265 if (val >= -0x200 && val <= 0x1ff)
3266 return SPU_ORHI;
3267 if ((val & 0xff) == ((val >> 8) & 0xff))
3268 {
3269 val = trunc_int_for_mode (val, QImode);
3270 if (val >= -0x200 && val <= 0x1ff)
3271 return SPU_ORBI;
3272 }
3273 }
3274 return SPU_NONE;
3275 }
3276
3277 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3278 CONST_DOUBLEs. */
3279 static int
3280 const_vector_immediate_p (rtx x)
3281 {
3282 int i;
3283 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3284 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3285 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3286 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3287 return 0;
3288 return 1;
3289 }
3290
3291 int
3292 logical_immediate_p (rtx op, machine_mode mode)
3293 {
3294 HOST_WIDE_INT val;
3295 unsigned char arr[16];
3296 int i, j;
3297
3298 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3299 || GET_CODE (op) == CONST_VECTOR);
3300
3301 if (GET_CODE (op) == CONST_VECTOR
3302 && !const_vector_immediate_p (op))
3303 return 0;
3304
3305 if (GET_MODE (op) != VOIDmode)
3306 mode = GET_MODE (op);
3307
3308 constant_to_array (mode, op, arr);
3309
3310 /* Check that bytes are repeated. */
3311 for (i = 4; i < 16; i += 4)
3312 for (j = 0; j < 4; j++)
3313 if (arr[j] != arr[i + j])
3314 return 0;
3315
3316 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3317 val = trunc_int_for_mode (val, SImode);
3318
3319 i = which_logical_immediate (val);
3320 return i != SPU_NONE && i != SPU_IOHL;
3321 }
3322
3323 int
3324 iohl_immediate_p (rtx op, machine_mode mode)
3325 {
3326 HOST_WIDE_INT val;
3327 unsigned char arr[16];
3328 int i, j;
3329
3330 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3331 || GET_CODE (op) == CONST_VECTOR);
3332
3333 if (GET_CODE (op) == CONST_VECTOR
3334 && !const_vector_immediate_p (op))
3335 return 0;
3336
3337 if (GET_MODE (op) != VOIDmode)
3338 mode = GET_MODE (op);
3339
3340 constant_to_array (mode, op, arr);
3341
3342 /* Check that bytes are repeated. */
3343 for (i = 4; i < 16; i += 4)
3344 for (j = 0; j < 4; j++)
3345 if (arr[j] != arr[i + j])
3346 return 0;
3347
3348 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3349 val = trunc_int_for_mode (val, SImode);
3350
3351 return val >= 0 && val <= 0xffff;
3352 }
3353
3354 int
3355 arith_immediate_p (rtx op, machine_mode mode,
3356 HOST_WIDE_INT low, HOST_WIDE_INT high)
3357 {
3358 HOST_WIDE_INT val;
3359 unsigned char arr[16];
3360 int bytes, i, j;
3361
3362 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3363 || GET_CODE (op) == CONST_VECTOR);
3364
3365 if (GET_CODE (op) == CONST_VECTOR
3366 && !const_vector_immediate_p (op))
3367 return 0;
3368
3369 if (GET_MODE (op) != VOIDmode)
3370 mode = GET_MODE (op);
3371
3372 constant_to_array (mode, op, arr);
3373
3374 bytes = GET_MODE_UNIT_SIZE (mode);
3375 mode = mode_for_size (GET_MODE_UNIT_BITSIZE (mode), MODE_INT, 0);
3376
3377 /* Check that bytes are repeated. */
3378 for (i = bytes; i < 16; i += bytes)
3379 for (j = 0; j < bytes; j++)
3380 if (arr[j] != arr[i + j])
3381 return 0;
3382
3383 val = arr[0];
3384 for (j = 1; j < bytes; j++)
3385 val = (val << 8) | arr[j];
3386
3387 val = trunc_int_for_mode (val, mode);
3388
3389 return val >= low && val <= high;
3390 }
3391
3392 /* TRUE when op is an immediate and an exact power of 2, and given that
3393 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3394 all entries must be the same. */
3395 bool
3396 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3397 {
3398 machine_mode int_mode;
3399 HOST_WIDE_INT val;
3400 unsigned char arr[16];
3401 int bytes, i, j;
3402
3403 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3404 || GET_CODE (op) == CONST_VECTOR);
3405
3406 if (GET_CODE (op) == CONST_VECTOR
3407 && !const_vector_immediate_p (op))
3408 return 0;
3409
3410 if (GET_MODE (op) != VOIDmode)
3411 mode = GET_MODE (op);
3412
3413 constant_to_array (mode, op, arr);
3414
3415 mode = GET_MODE_INNER (mode);
3416
3417 bytes = GET_MODE_SIZE (mode);
3418 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3419
3420 /* Check that bytes are repeated. */
3421 for (i = bytes; i < 16; i += bytes)
3422 for (j = 0; j < bytes; j++)
3423 if (arr[j] != arr[i + j])
3424 return 0;
3425
3426 val = arr[0];
3427 for (j = 1; j < bytes; j++)
3428 val = (val << 8) | arr[j];
3429
3430 val = trunc_int_for_mode (val, int_mode);
3431
3432 /* Currently, we only handle SFmode */
3433 gcc_assert (mode == SFmode);
3434 if (mode == SFmode)
3435 {
3436 int exp = (val >> 23) - 127;
3437 return val > 0 && (val & 0x007fffff) == 0
3438 && exp >= low && exp <= high;
3439 }
3440 return FALSE;
3441 }
3442
3443 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3444
3445 static bool
3446 ea_symbol_ref_p (const_rtx x)
3447 {
3448 tree decl;
3449
3450 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3451 {
3452 rtx plus = XEXP (x, 0);
3453 rtx op0 = XEXP (plus, 0);
3454 rtx op1 = XEXP (plus, 1);
3455 if (GET_CODE (op1) == CONST_INT)
3456 x = op0;
3457 }
3458
3459 return (GET_CODE (x) == SYMBOL_REF
3460 && (decl = SYMBOL_REF_DECL (x)) != 0
3461 && TREE_CODE (decl) == VAR_DECL
3462 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3463 }
3464
3465 /* We accept:
3466 - any 32-bit constant (SImode, SFmode)
3467 - any constant that can be generated with fsmbi (any mode)
3468 - a 64-bit constant where the high and low bits are identical
3469 (DImode, DFmode)
3470 - a 128-bit constant where the four 32-bit words match. */
3471 bool
3472 spu_legitimate_constant_p (machine_mode mode, rtx x)
3473 {
3474 subrtx_iterator::array_type array;
3475 if (GET_CODE (x) == HIGH)
3476 x = XEXP (x, 0);
3477
3478 /* Reject any __ea qualified reference. These can't appear in
3479 instructions but must be forced to the constant pool. */
3480 FOR_EACH_SUBRTX (iter, array, x, ALL)
3481 if (ea_symbol_ref_p (*iter))
3482 return 0;
3483
3484 /* V4SI with all identical symbols is valid. */
3485 if (!flag_pic
3486 && mode == V4SImode
3487 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3488 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3489 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3490 return const_vec_duplicate_p (x);
3491
3492 if (GET_CODE (x) == CONST_VECTOR
3493 && !const_vector_immediate_p (x))
3494 return 0;
3495 return 1;
3496 }
3497
3498 /* Valid address are:
3499 - symbol_ref, label_ref, const
3500 - reg
3501 - reg + const_int, where const_int is 16 byte aligned
3502 - reg + reg, alignment doesn't matter
3503 The alignment matters in the reg+const case because lqd and stqd
3504 ignore the 4 least significant bits of the const. We only care about
3505 16 byte modes because the expand phase will change all smaller MEM
3506 references to TImode. */
3507 static bool
3508 spu_legitimate_address_p (machine_mode mode,
3509 rtx x, bool reg_ok_strict)
3510 {
3511 int aligned = GET_MODE_SIZE (mode) >= 16;
3512 if (aligned
3513 && GET_CODE (x) == AND
3514 && GET_CODE (XEXP (x, 1)) == CONST_INT
3515 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3516 x = XEXP (x, 0);
3517 switch (GET_CODE (x))
3518 {
3519 case LABEL_REF:
3520 return !TARGET_LARGE_MEM;
3521
3522 case SYMBOL_REF:
3523 case CONST:
3524 /* Keep __ea references until reload so that spu_expand_mov can see them
3525 in MEMs. */
3526 if (ea_symbol_ref_p (x))
3527 return !reload_in_progress && !reload_completed;
3528 return !TARGET_LARGE_MEM;
3529
3530 case CONST_INT:
3531 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3532
3533 case SUBREG:
3534 x = XEXP (x, 0);
3535 if (!REG_P (x))
3536 return 0;
3537 /* FALLTHRU */
3538
3539 case REG:
3540 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3541
3542 case PLUS:
3543 case LO_SUM:
3544 {
3545 rtx op0 = XEXP (x, 0);
3546 rtx op1 = XEXP (x, 1);
3547 if (GET_CODE (op0) == SUBREG)
3548 op0 = XEXP (op0, 0);
3549 if (GET_CODE (op1) == SUBREG)
3550 op1 = XEXP (op1, 0);
3551 if (GET_CODE (op0) == REG
3552 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3553 && GET_CODE (op1) == CONST_INT
3554 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3555 /* If virtual registers are involved, the displacement will
3556 change later on anyway, so checking would be premature.
3557 Reload will make sure the final displacement after
3558 register elimination is OK. */
3559 || op0 == arg_pointer_rtx
3560 || op0 == frame_pointer_rtx
3561 || op0 == virtual_stack_vars_rtx)
3562 && (!aligned || (INTVAL (op1) & 15) == 0))
3563 return TRUE;
3564 if (GET_CODE (op0) == REG
3565 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3566 && GET_CODE (op1) == REG
3567 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3568 return TRUE;
3569 }
3570 break;
3571
3572 default:
3573 break;
3574 }
3575 return FALSE;
3576 }
3577
3578 /* Like spu_legitimate_address_p, except with named addresses. */
3579 static bool
3580 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3581 bool reg_ok_strict, addr_space_t as)
3582 {
3583 if (as == ADDR_SPACE_EA)
3584 return (REG_P (x) && (GET_MODE (x) == EAmode));
3585
3586 else if (as != ADDR_SPACE_GENERIC)
3587 gcc_unreachable ();
3588
3589 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3590 }
3591
3592 /* When the address is reg + const_int, force the const_int into a
3593 register. */
3594 static rtx
3595 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3596 machine_mode mode ATTRIBUTE_UNUSED)
3597 {
3598 rtx op0, op1;
3599 /* Make sure both operands are registers. */
3600 if (GET_CODE (x) == PLUS)
3601 {
3602 op0 = XEXP (x, 0);
3603 op1 = XEXP (x, 1);
3604 if (ALIGNED_SYMBOL_REF_P (op0))
3605 {
3606 op0 = force_reg (Pmode, op0);
3607 mark_reg_pointer (op0, 128);
3608 }
3609 else if (GET_CODE (op0) != REG)
3610 op0 = force_reg (Pmode, op0);
3611 if (ALIGNED_SYMBOL_REF_P (op1))
3612 {
3613 op1 = force_reg (Pmode, op1);
3614 mark_reg_pointer (op1, 128);
3615 }
3616 else if (GET_CODE (op1) != REG)
3617 op1 = force_reg (Pmode, op1);
3618 x = gen_rtx_PLUS (Pmode, op0, op1);
3619 }
3620 return x;
3621 }
3622
3623 /* Like spu_legitimate_address, except with named address support. */
3624 static rtx
3625 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3626 addr_space_t as)
3627 {
3628 if (as != ADDR_SPACE_GENERIC)
3629 return x;
3630
3631 return spu_legitimize_address (x, oldx, mode);
3632 }
3633
3634 /* Reload reg + const_int for out-of-range displacements. */
3635 rtx
3636 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3637 int opnum, int type)
3638 {
3639 bool removed_and = false;
3640
3641 if (GET_CODE (ad) == AND
3642 && CONST_INT_P (XEXP (ad, 1))
3643 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3644 {
3645 ad = XEXP (ad, 0);
3646 removed_and = true;
3647 }
3648
3649 if (GET_CODE (ad) == PLUS
3650 && REG_P (XEXP (ad, 0))
3651 && CONST_INT_P (XEXP (ad, 1))
3652 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3653 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3654 {
3655 /* Unshare the sum. */
3656 ad = copy_rtx (ad);
3657
3658 /* Reload the displacement. */
3659 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3660 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3661 opnum, (enum reload_type) type);
3662
3663 /* Add back AND for alignment if we stripped it. */
3664 if (removed_and)
3665 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3666
3667 return ad;
3668 }
3669
3670 return NULL_RTX;
3671 }
3672
3673 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3674 struct attribute_spec.handler. */
3675 static tree
3676 spu_handle_fndecl_attribute (tree * node,
3677 tree name,
3678 tree args ATTRIBUTE_UNUSED,
3679 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3680 {
3681 if (TREE_CODE (*node) != FUNCTION_DECL)
3682 {
3683 warning (0, "%qE attribute only applies to functions",
3684 name);
3685 *no_add_attrs = true;
3686 }
3687
3688 return NULL_TREE;
3689 }
3690
3691 /* Handle the "vector" attribute. */
3692 static tree
3693 spu_handle_vector_attribute (tree * node, tree name,
3694 tree args ATTRIBUTE_UNUSED,
3695 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3696 {
3697 tree type = *node, result = NULL_TREE;
3698 machine_mode mode;
3699 int unsigned_p;
3700
3701 while (POINTER_TYPE_P (type)
3702 || TREE_CODE (type) == FUNCTION_TYPE
3703 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3704 type = TREE_TYPE (type);
3705
3706 mode = TYPE_MODE (type);
3707
3708 unsigned_p = TYPE_UNSIGNED (type);
3709 switch (mode)
3710 {
3711 case DImode:
3712 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3713 break;
3714 case SImode:
3715 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3716 break;
3717 case HImode:
3718 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3719 break;
3720 case QImode:
3721 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3722 break;
3723 case SFmode:
3724 result = V4SF_type_node;
3725 break;
3726 case DFmode:
3727 result = V2DF_type_node;
3728 break;
3729 default:
3730 break;
3731 }
3732
3733 /* Propagate qualifiers attached to the element type
3734 onto the vector type. */
3735 if (result && result != type && TYPE_QUALS (type))
3736 result = build_qualified_type (result, TYPE_QUALS (type));
3737
3738 *no_add_attrs = true; /* No need to hang on to the attribute. */
3739
3740 if (!result)
3741 warning (0, "%qE attribute ignored", name);
3742 else
3743 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3744
3745 return NULL_TREE;
3746 }
3747
3748 /* Return nonzero if FUNC is a naked function. */
3749 static int
3750 spu_naked_function_p (tree func)
3751 {
3752 tree a;
3753
3754 if (TREE_CODE (func) != FUNCTION_DECL)
3755 abort ();
3756
3757 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3758 return a != NULL_TREE;
3759 }
3760
3761 int
3762 spu_initial_elimination_offset (int from, int to)
3763 {
3764 int saved_regs_size = spu_saved_regs_size ();
3765 int sp_offset = 0;
3766 if (!crtl->is_leaf || crtl->outgoing_args_size
3767 || get_frame_size () || saved_regs_size)
3768 sp_offset = STACK_POINTER_OFFSET;
3769 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3770 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3771 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3772 return get_frame_size ();
3773 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3774 return sp_offset + crtl->outgoing_args_size
3775 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3776 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3777 return get_frame_size () + saved_regs_size + sp_offset;
3778 else
3779 gcc_unreachable ();
3780 }
3781
3782 rtx
3783 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3784 {
3785 machine_mode mode = TYPE_MODE (type);
3786 int byte_size = ((mode == BLKmode)
3787 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3788
3789 /* Make sure small structs are left justified in a register. */
3790 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3791 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3792 {
3793 machine_mode smode;
3794 rtvec v;
3795 int i;
3796 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3797 int n = byte_size / UNITS_PER_WORD;
3798 v = rtvec_alloc (nregs);
3799 for (i = 0; i < n; i++)
3800 {
3801 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3802 gen_rtx_REG (TImode,
3803 FIRST_RETURN_REGNUM
3804 + i),
3805 GEN_INT (UNITS_PER_WORD * i));
3806 byte_size -= UNITS_PER_WORD;
3807 }
3808
3809 if (n < nregs)
3810 {
3811 if (byte_size < 4)
3812 byte_size = 4;
3813 smode =
3814 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3815 RTVEC_ELT (v, n) =
3816 gen_rtx_EXPR_LIST (VOIDmode,
3817 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3818 GEN_INT (UNITS_PER_WORD * n));
3819 }
3820 return gen_rtx_PARALLEL (mode, v);
3821 }
3822 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3823 }
3824
3825 static rtx
3826 spu_function_arg (cumulative_args_t cum_v,
3827 machine_mode mode,
3828 const_tree type, bool named ATTRIBUTE_UNUSED)
3829 {
3830 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3831 int byte_size;
3832
3833 if (*cum >= MAX_REGISTER_ARGS)
3834 return 0;
3835
3836 byte_size = ((mode == BLKmode)
3837 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3838
3839 /* The ABI does not allow parameters to be passed partially in
3840 reg and partially in stack. */
3841 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3842 return 0;
3843
3844 /* Make sure small structs are left justified in a register. */
3845 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3846 && byte_size < UNITS_PER_WORD && byte_size > 0)
3847 {
3848 machine_mode smode;
3849 rtx gr_reg;
3850 if (byte_size < 4)
3851 byte_size = 4;
3852 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3853 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3854 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3855 const0_rtx);
3856 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3857 }
3858 else
3859 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3860 }
3861
3862 static void
3863 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3864 const_tree type, bool named ATTRIBUTE_UNUSED)
3865 {
3866 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3867
3868 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3869 ? 1
3870 : mode == BLKmode
3871 ? ((int_size_in_bytes (type) + 15) / 16)
3872 : mode == VOIDmode
3873 ? 1
3874 : HARD_REGNO_NREGS (cum, mode));
3875 }
3876
3877 /* Variable sized types are passed by reference. */
3878 static bool
3879 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3880 machine_mode mode ATTRIBUTE_UNUSED,
3881 const_tree type, bool named ATTRIBUTE_UNUSED)
3882 {
3883 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3884 }
3885 \f
3886
3887 /* Var args. */
3888
3889 /* Create and return the va_list datatype.
3890
3891 On SPU, va_list is an array type equivalent to
3892
3893 typedef struct __va_list_tag
3894 {
3895 void *__args __attribute__((__aligned(16)));
3896 void *__skip __attribute__((__aligned(16)));
3897
3898 } va_list[1];
3899
3900 where __args points to the arg that will be returned by the next
3901 va_arg(), and __skip points to the previous stack frame such that
3902 when __args == __skip we should advance __args by 32 bytes. */
3903 static tree
3904 spu_build_builtin_va_list (void)
3905 {
3906 tree f_args, f_skip, record, type_decl;
3907 bool owp;
3908
3909 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3910
3911 type_decl =
3912 build_decl (BUILTINS_LOCATION,
3913 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3914
3915 f_args = build_decl (BUILTINS_LOCATION,
3916 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3917 f_skip = build_decl (BUILTINS_LOCATION,
3918 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3919
3920 DECL_FIELD_CONTEXT (f_args) = record;
3921 SET_DECL_ALIGN (f_args, 128);
3922 DECL_USER_ALIGN (f_args) = 1;
3923
3924 DECL_FIELD_CONTEXT (f_skip) = record;
3925 SET_DECL_ALIGN (f_skip, 128);
3926 DECL_USER_ALIGN (f_skip) = 1;
3927
3928 TYPE_STUB_DECL (record) = type_decl;
3929 TYPE_NAME (record) = type_decl;
3930 TYPE_FIELDS (record) = f_args;
3931 DECL_CHAIN (f_args) = f_skip;
3932
3933 /* We know this is being padded and we want it too. It is an internal
3934 type so hide the warnings from the user. */
3935 owp = warn_padded;
3936 warn_padded = false;
3937
3938 layout_type (record);
3939
3940 warn_padded = owp;
3941
3942 /* The correct type is an array type of one element. */
3943 return build_array_type (record, build_index_type (size_zero_node));
3944 }
3945
3946 /* Implement va_start by filling the va_list structure VALIST.
3947 NEXTARG points to the first anonymous stack argument.
3948
3949 The following global variables are used to initialize
3950 the va_list structure:
3951
3952 crtl->args.info;
3953 the CUMULATIVE_ARGS for this function
3954
3955 crtl->args.arg_offset_rtx:
3956 holds the offset of the first anonymous stack argument
3957 (relative to the virtual arg pointer). */
3958
3959 static void
3960 spu_va_start (tree valist, rtx nextarg)
3961 {
3962 tree f_args, f_skip;
3963 tree args, skip, t;
3964
3965 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3966 f_skip = DECL_CHAIN (f_args);
3967
3968 valist = build_simple_mem_ref (valist);
3969 args =
3970 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3971 skip =
3972 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3973
3974 /* Find the __args area. */
3975 t = make_tree (TREE_TYPE (args), nextarg);
3976 if (crtl->args.pretend_args_size > 0)
3977 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3978 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3979 TREE_SIDE_EFFECTS (t) = 1;
3980 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3981
3982 /* Find the __skip area. */
3983 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3984 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3985 - STACK_POINTER_OFFSET));
3986 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3987 TREE_SIDE_EFFECTS (t) = 1;
3988 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3989 }
3990
3991 /* Gimplify va_arg by updating the va_list structure
3992 VALIST as required to retrieve an argument of type
3993 TYPE, and returning that argument.
3994
3995 ret = va_arg(VALIST, TYPE);
3996
3997 generates code equivalent to:
3998
3999 paddedsize = (sizeof(TYPE) + 15) & -16;
4000 if (VALIST.__args + paddedsize > VALIST.__skip
4001 && VALIST.__args <= VALIST.__skip)
4002 addr = VALIST.__skip + 32;
4003 else
4004 addr = VALIST.__args;
4005 VALIST.__args = addr + paddedsize;
4006 ret = *(TYPE *)addr;
4007 */
4008 static tree
4009 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4010 gimple_seq * post_p ATTRIBUTE_UNUSED)
4011 {
4012 tree f_args, f_skip;
4013 tree args, skip;
4014 HOST_WIDE_INT size, rsize;
4015 tree addr, tmp;
4016 bool pass_by_reference_p;
4017
4018 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4019 f_skip = DECL_CHAIN (f_args);
4020
4021 args =
4022 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4023 skip =
4024 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4025
4026 addr = create_tmp_var (ptr_type_node, "va_arg");
4027
4028 /* if an object is dynamically sized, a pointer to it is passed
4029 instead of the object itself. */
4030 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4031 false);
4032 if (pass_by_reference_p)
4033 type = build_pointer_type (type);
4034 size = int_size_in_bytes (type);
4035 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4036
4037 /* build conditional expression to calculate addr. The expression
4038 will be gimplified later. */
4039 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4040 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4041 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4042 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4043 unshare_expr (skip)));
4044
4045 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4046 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4047 unshare_expr (args));
4048
4049 gimplify_assign (addr, tmp, pre_p);
4050
4051 /* update VALIST.__args */
4052 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4053 gimplify_assign (unshare_expr (args), tmp, pre_p);
4054
4055 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4056 addr);
4057
4058 if (pass_by_reference_p)
4059 addr = build_va_arg_indirect_ref (addr);
4060
4061 return build_va_arg_indirect_ref (addr);
4062 }
4063
4064 /* Save parameter registers starting with the register that corresponds
4065 to the first unnamed parameters. If the first unnamed parameter is
4066 in the stack then save no registers. Set pretend_args_size to the
4067 amount of space needed to save the registers. */
4068 static void
4069 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4070 tree type, int *pretend_size, int no_rtl)
4071 {
4072 if (!no_rtl)
4073 {
4074 rtx tmp;
4075 int regno;
4076 int offset;
4077 int ncum = *get_cumulative_args (cum);
4078
4079 /* cum currently points to the last named argument, we want to
4080 start at the next argument. */
4081 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4082
4083 offset = -STACK_POINTER_OFFSET;
4084 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4085 {
4086 tmp = gen_frame_mem (V4SImode,
4087 plus_constant (Pmode, virtual_incoming_args_rtx,
4088 offset));
4089 emit_move_insn (tmp,
4090 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4091 offset += 16;
4092 }
4093 *pretend_size = offset + STACK_POINTER_OFFSET;
4094 }
4095 }
4096 \f
4097 static void
4098 spu_conditional_register_usage (void)
4099 {
4100 if (flag_pic)
4101 {
4102 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4103 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4104 }
4105 }
4106
4107 /* This is called any time we inspect the alignment of a register for
4108 addresses. */
4109 static int
4110 reg_aligned_for_addr (rtx x)
4111 {
4112 int regno =
4113 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4114 return REGNO_POINTER_ALIGN (regno) >= 128;
4115 }
4116
4117 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4118 into its SYMBOL_REF_FLAGS. */
4119 static void
4120 spu_encode_section_info (tree decl, rtx rtl, int first)
4121 {
4122 default_encode_section_info (decl, rtl, first);
4123
4124 /* If a variable has a forced alignment to < 16 bytes, mark it with
4125 SYMBOL_FLAG_ALIGN1. */
4126 if (TREE_CODE (decl) == VAR_DECL
4127 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4128 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4129 }
4130
4131 /* Return TRUE if we are certain the mem refers to a complete object
4132 which is both 16-byte aligned and padded to a 16-byte boundary. This
4133 would make it safe to store with a single instruction.
4134 We guarantee the alignment and padding for static objects by aligning
4135 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4136 FIXME: We currently cannot guarantee this for objects on the stack
4137 because assign_parm_setup_stack calls assign_stack_local with the
4138 alignment of the parameter mode and in that case the alignment never
4139 gets adjusted by LOCAL_ALIGNMENT. */
4140 static int
4141 store_with_one_insn_p (rtx mem)
4142 {
4143 machine_mode mode = GET_MODE (mem);
4144 rtx addr = XEXP (mem, 0);
4145 if (mode == BLKmode)
4146 return 0;
4147 if (GET_MODE_SIZE (mode) >= 16)
4148 return 1;
4149 /* Only static objects. */
4150 if (GET_CODE (addr) == SYMBOL_REF)
4151 {
4152 /* We use the associated declaration to make sure the access is
4153 referring to the whole object.
4154 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4155 if it is necessary. Will there be cases where one exists, and
4156 the other does not? Will there be cases where both exist, but
4157 have different types? */
4158 tree decl = MEM_EXPR (mem);
4159 if (decl
4160 && TREE_CODE (decl) == VAR_DECL
4161 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4162 return 1;
4163 decl = SYMBOL_REF_DECL (addr);
4164 if (decl
4165 && TREE_CODE (decl) == VAR_DECL
4166 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4167 return 1;
4168 }
4169 return 0;
4170 }
4171
4172 /* Return 1 when the address is not valid for a simple load and store as
4173 required by the '_mov*' patterns. We could make this less strict
4174 for loads, but we prefer mem's to look the same so they are more
4175 likely to be merged. */
4176 static int
4177 address_needs_split (rtx mem)
4178 {
4179 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4180 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4181 || !(store_with_one_insn_p (mem)
4182 || mem_is_padded_component_ref (mem))))
4183 return 1;
4184
4185 return 0;
4186 }
4187
4188 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4189 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4190 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4191
4192 /* MEM is known to be an __ea qualified memory access. Emit a call to
4193 fetch the ppu memory to local store, and return its address in local
4194 store. */
4195
4196 static void
4197 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4198 {
4199 if (is_store)
4200 {
4201 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4202 if (!cache_fetch_dirty)
4203 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4204 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4205 2, ea_addr, EAmode, ndirty, SImode);
4206 }
4207 else
4208 {
4209 if (!cache_fetch)
4210 cache_fetch = init_one_libfunc ("__cache_fetch");
4211 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4212 1, ea_addr, EAmode);
4213 }
4214 }
4215
4216 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4217 dirty bit marking, inline.
4218
4219 The cache control data structure is an array of
4220
4221 struct __cache_tag_array
4222 {
4223 unsigned int tag_lo[4];
4224 unsigned int tag_hi[4];
4225 void *data_pointer[4];
4226 int reserved[4];
4227 vector unsigned short dirty_bits[4];
4228 } */
4229
4230 static void
4231 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4232 {
4233 rtx ea_addr_si;
4234 HOST_WIDE_INT v;
4235 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4236 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4237 rtx index_mask = gen_reg_rtx (SImode);
4238 rtx tag_arr = gen_reg_rtx (Pmode);
4239 rtx splat_mask = gen_reg_rtx (TImode);
4240 rtx splat = gen_reg_rtx (V4SImode);
4241 rtx splat_hi = NULL_RTX;
4242 rtx tag_index = gen_reg_rtx (Pmode);
4243 rtx block_off = gen_reg_rtx (SImode);
4244 rtx tag_addr = gen_reg_rtx (Pmode);
4245 rtx tag = gen_reg_rtx (V4SImode);
4246 rtx cache_tag = gen_reg_rtx (V4SImode);
4247 rtx cache_tag_hi = NULL_RTX;
4248 rtx cache_ptrs = gen_reg_rtx (TImode);
4249 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4250 rtx tag_equal = gen_reg_rtx (V4SImode);
4251 rtx tag_equal_hi = NULL_RTX;
4252 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4253 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4254 rtx eq_index = gen_reg_rtx (SImode);
4255 rtx bcomp, hit_label, hit_ref, cont_label;
4256 rtx_insn *insn;
4257
4258 if (spu_ea_model != 32)
4259 {
4260 splat_hi = gen_reg_rtx (V4SImode);
4261 cache_tag_hi = gen_reg_rtx (V4SImode);
4262 tag_equal_hi = gen_reg_rtx (V4SImode);
4263 }
4264
4265 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4266 emit_move_insn (tag_arr, tag_arr_sym);
4267 v = 0x0001020300010203LL;
4268 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4269 ea_addr_si = ea_addr;
4270 if (spu_ea_model != 32)
4271 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4272
4273 /* tag_index = ea_addr & (tag_array_size - 128) */
4274 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4275
4276 /* splat ea_addr to all 4 slots. */
4277 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4278 /* Similarly for high 32 bits of ea_addr. */
4279 if (spu_ea_model != 32)
4280 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4281
4282 /* block_off = ea_addr & 127 */
4283 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4284
4285 /* tag_addr = tag_arr + tag_index */
4286 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4287
4288 /* Read cache tags. */
4289 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4290 if (spu_ea_model != 32)
4291 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4292 plus_constant (Pmode,
4293 tag_addr, 16)));
4294
4295 /* tag = ea_addr & -128 */
4296 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4297
4298 /* Read all four cache data pointers. */
4299 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4300 plus_constant (Pmode,
4301 tag_addr, 32)));
4302
4303 /* Compare tags. */
4304 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4305 if (spu_ea_model != 32)
4306 {
4307 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4308 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4309 }
4310
4311 /* At most one of the tags compare equal, so tag_equal has one
4312 32-bit slot set to all 1's, with the other slots all zero.
4313 gbb picks off low bit from each byte in the 128-bit registers,
4314 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4315 we have a hit. */
4316 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4317 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4318
4319 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4320 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4321
4322 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4323 (rotating eq_index mod 16 bytes). */
4324 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4325 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4326
4327 /* Add block offset to form final data address. */
4328 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4329
4330 /* Check that we did hit. */
4331 hit_label = gen_label_rtx ();
4332 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4333 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4334 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4335 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4336 hit_ref, pc_rtx)));
4337 /* Say that this branch is very likely to happen. */
4338 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4339 add_int_reg_note (insn, REG_BR_PROB, v);
4340
4341 ea_load_store (mem, is_store, ea_addr, data_addr);
4342 cont_label = gen_label_rtx ();
4343 emit_jump_insn (gen_jump (cont_label));
4344 emit_barrier ();
4345
4346 emit_label (hit_label);
4347
4348 if (is_store)
4349 {
4350 HOST_WIDE_INT v_hi;
4351 rtx dirty_bits = gen_reg_rtx (TImode);
4352 rtx dirty_off = gen_reg_rtx (SImode);
4353 rtx dirty_128 = gen_reg_rtx (TImode);
4354 rtx neg_block_off = gen_reg_rtx (SImode);
4355
4356 /* Set up mask with one dirty bit per byte of the mem we are
4357 writing, starting from top bit. */
4358 v_hi = v = -1;
4359 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4360 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4361 {
4362 v_hi = v;
4363 v = 0;
4364 }
4365 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4366
4367 /* Form index into cache dirty_bits. eq_index is one of
4368 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4369 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4370 offset to each of the four dirty_bits elements. */
4371 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4372
4373 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4374
4375 /* Rotate bit mask to proper bit. */
4376 emit_insn (gen_negsi2 (neg_block_off, block_off));
4377 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4378 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4379
4380 /* Or in the new dirty bits. */
4381 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4382
4383 /* Store. */
4384 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4385 }
4386
4387 emit_label (cont_label);
4388 }
4389
4390 static rtx
4391 expand_ea_mem (rtx mem, bool is_store)
4392 {
4393 rtx ea_addr;
4394 rtx data_addr = gen_reg_rtx (Pmode);
4395 rtx new_mem;
4396
4397 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4398 if (optimize_size || optimize == 0)
4399 ea_load_store (mem, is_store, ea_addr, data_addr);
4400 else
4401 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4402
4403 if (ea_alias_set == -1)
4404 ea_alias_set = new_alias_set ();
4405
4406 /* We generate a new MEM RTX to refer to the copy of the data
4407 in the cache. We do not copy memory attributes (except the
4408 alignment) from the original MEM, as they may no longer apply
4409 to the cache copy. */
4410 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4411 set_mem_alias_set (new_mem, ea_alias_set);
4412 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4413
4414 return new_mem;
4415 }
4416
4417 int
4418 spu_expand_mov (rtx * ops, machine_mode mode)
4419 {
4420 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4421 {
4422 /* Perform the move in the destination SUBREG's inner mode. */
4423 ops[0] = SUBREG_REG (ops[0]);
4424 mode = GET_MODE (ops[0]);
4425 ops[1] = gen_lowpart_common (mode, ops[1]);
4426 gcc_assert (ops[1]);
4427 }
4428
4429 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4430 {
4431 rtx from = SUBREG_REG (ops[1]);
4432 machine_mode imode = int_mode_for_mode (GET_MODE (from));
4433
4434 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4435 && GET_MODE_CLASS (imode) == MODE_INT
4436 && subreg_lowpart_p (ops[1]));
4437
4438 if (GET_MODE_SIZE (imode) < 4)
4439 imode = SImode;
4440 if (imode != GET_MODE (from))
4441 from = gen_rtx_SUBREG (imode, from, 0);
4442
4443 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4444 {
4445 enum insn_code icode = convert_optab_handler (trunc_optab,
4446 mode, imode);
4447 emit_insn (GEN_FCN (icode) (ops[0], from));
4448 }
4449 else
4450 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4451 return 1;
4452 }
4453
4454 /* At least one of the operands needs to be a register. */
4455 if ((reload_in_progress | reload_completed) == 0
4456 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4457 {
4458 rtx temp = force_reg (mode, ops[1]);
4459 emit_move_insn (ops[0], temp);
4460 return 1;
4461 }
4462 if (reload_in_progress || reload_completed)
4463 {
4464 if (CONSTANT_P (ops[1]))
4465 return spu_split_immediate (ops);
4466 return 0;
4467 }
4468
4469 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4470 extend them. */
4471 if (GET_CODE (ops[1]) == CONST_INT)
4472 {
4473 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4474 if (val != INTVAL (ops[1]))
4475 {
4476 emit_move_insn (ops[0], GEN_INT (val));
4477 return 1;
4478 }
4479 }
4480 if (MEM_P (ops[0]))
4481 {
4482 if (MEM_ADDR_SPACE (ops[0]))
4483 ops[0] = expand_ea_mem (ops[0], true);
4484 return spu_split_store (ops);
4485 }
4486 if (MEM_P (ops[1]))
4487 {
4488 if (MEM_ADDR_SPACE (ops[1]))
4489 ops[1] = expand_ea_mem (ops[1], false);
4490 return spu_split_load (ops);
4491 }
4492
4493 return 0;
4494 }
4495
4496 static void
4497 spu_convert_move (rtx dst, rtx src)
4498 {
4499 machine_mode mode = GET_MODE (dst);
4500 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4501 rtx reg;
4502 gcc_assert (GET_MODE (src) == TImode);
4503 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4504 emit_insn (gen_rtx_SET (reg,
4505 gen_rtx_TRUNCATE (int_mode,
4506 gen_rtx_LSHIFTRT (TImode, src,
4507 GEN_INT (int_mode == DImode ? 64 : 96)))));
4508 if (int_mode != mode)
4509 {
4510 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4511 emit_move_insn (dst, reg);
4512 }
4513 }
4514
4515 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4516 the address from SRC and SRC+16. Return a REG or CONST_INT that
4517 specifies how many bytes to rotate the loaded registers, plus any
4518 extra from EXTRA_ROTQBY. The address and rotate amounts are
4519 normalized to improve merging of loads and rotate computations. */
4520 static rtx
4521 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4522 {
4523 rtx addr = XEXP (src, 0);
4524 rtx p0, p1, rot, addr0, addr1;
4525 int rot_amt;
4526
4527 rot = 0;
4528 rot_amt = 0;
4529
4530 if (MEM_ALIGN (src) >= 128)
4531 /* Address is already aligned; simply perform a TImode load. */ ;
4532 else if (GET_CODE (addr) == PLUS)
4533 {
4534 /* 8 cases:
4535 aligned reg + aligned reg => lqx
4536 aligned reg + unaligned reg => lqx, rotqby
4537 aligned reg + aligned const => lqd
4538 aligned reg + unaligned const => lqd, rotqbyi
4539 unaligned reg + aligned reg => lqx, rotqby
4540 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4541 unaligned reg + aligned const => lqd, rotqby
4542 unaligned reg + unaligned const -> not allowed by legitimate address
4543 */
4544 p0 = XEXP (addr, 0);
4545 p1 = XEXP (addr, 1);
4546 if (!reg_aligned_for_addr (p0))
4547 {
4548 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4549 {
4550 rot = gen_reg_rtx (SImode);
4551 emit_insn (gen_addsi3 (rot, p0, p1));
4552 }
4553 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4554 {
4555 if (INTVAL (p1) > 0
4556 && REG_POINTER (p0)
4557 && INTVAL (p1) * BITS_PER_UNIT
4558 < REGNO_POINTER_ALIGN (REGNO (p0)))
4559 {
4560 rot = gen_reg_rtx (SImode);
4561 emit_insn (gen_addsi3 (rot, p0, p1));
4562 addr = p0;
4563 }
4564 else
4565 {
4566 rtx x = gen_reg_rtx (SImode);
4567 emit_move_insn (x, p1);
4568 if (!spu_arith_operand (p1, SImode))
4569 p1 = x;
4570 rot = gen_reg_rtx (SImode);
4571 emit_insn (gen_addsi3 (rot, p0, p1));
4572 addr = gen_rtx_PLUS (Pmode, p0, x);
4573 }
4574 }
4575 else
4576 rot = p0;
4577 }
4578 else
4579 {
4580 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4581 {
4582 rot_amt = INTVAL (p1) & 15;
4583 if (INTVAL (p1) & -16)
4584 {
4585 p1 = GEN_INT (INTVAL (p1) & -16);
4586 addr = gen_rtx_PLUS (SImode, p0, p1);
4587 }
4588 else
4589 addr = p0;
4590 }
4591 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4592 rot = p1;
4593 }
4594 }
4595 else if (REG_P (addr))
4596 {
4597 if (!reg_aligned_for_addr (addr))
4598 rot = addr;
4599 }
4600 else if (GET_CODE (addr) == CONST)
4601 {
4602 if (GET_CODE (XEXP (addr, 0)) == PLUS
4603 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4604 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4605 {
4606 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4607 if (rot_amt & -16)
4608 addr = gen_rtx_CONST (Pmode,
4609 gen_rtx_PLUS (Pmode,
4610 XEXP (XEXP (addr, 0), 0),
4611 GEN_INT (rot_amt & -16)));
4612 else
4613 addr = XEXP (XEXP (addr, 0), 0);
4614 }
4615 else
4616 {
4617 rot = gen_reg_rtx (Pmode);
4618 emit_move_insn (rot, addr);
4619 }
4620 }
4621 else if (GET_CODE (addr) == CONST_INT)
4622 {
4623 rot_amt = INTVAL (addr);
4624 addr = GEN_INT (rot_amt & -16);
4625 }
4626 else if (!ALIGNED_SYMBOL_REF_P (addr))
4627 {
4628 rot = gen_reg_rtx (Pmode);
4629 emit_move_insn (rot, addr);
4630 }
4631
4632 rot_amt += extra_rotby;
4633
4634 rot_amt &= 15;
4635
4636 if (rot && rot_amt)
4637 {
4638 rtx x = gen_reg_rtx (SImode);
4639 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4640 rot = x;
4641 rot_amt = 0;
4642 }
4643 if (!rot && rot_amt)
4644 rot = GEN_INT (rot_amt);
4645
4646 addr0 = copy_rtx (addr);
4647 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4648 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4649
4650 if (dst1)
4651 {
4652 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4653 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4654 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4655 }
4656
4657 return rot;
4658 }
4659
4660 int
4661 spu_split_load (rtx * ops)
4662 {
4663 machine_mode mode = GET_MODE (ops[0]);
4664 rtx addr, load, rot;
4665 int rot_amt;
4666
4667 if (GET_MODE_SIZE (mode) >= 16)
4668 return 0;
4669
4670 addr = XEXP (ops[1], 0);
4671 gcc_assert (GET_CODE (addr) != AND);
4672
4673 if (!address_needs_split (ops[1]))
4674 {
4675 ops[1] = change_address (ops[1], TImode, addr);
4676 load = gen_reg_rtx (TImode);
4677 emit_insn (gen__movti (load, ops[1]));
4678 spu_convert_move (ops[0], load);
4679 return 1;
4680 }
4681
4682 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4683
4684 load = gen_reg_rtx (TImode);
4685 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4686
4687 if (rot)
4688 emit_insn (gen_rotqby_ti (load, load, rot));
4689
4690 spu_convert_move (ops[0], load);
4691 return 1;
4692 }
4693
4694 int
4695 spu_split_store (rtx * ops)
4696 {
4697 machine_mode mode = GET_MODE (ops[0]);
4698 rtx reg;
4699 rtx addr, p0, p1, p1_lo, smem;
4700 int aform;
4701 int scalar;
4702
4703 if (GET_MODE_SIZE (mode) >= 16)
4704 return 0;
4705
4706 addr = XEXP (ops[0], 0);
4707 gcc_assert (GET_CODE (addr) != AND);
4708
4709 if (!address_needs_split (ops[0]))
4710 {
4711 reg = gen_reg_rtx (TImode);
4712 emit_insn (gen_spu_convert (reg, ops[1]));
4713 ops[0] = change_address (ops[0], TImode, addr);
4714 emit_move_insn (ops[0], reg);
4715 return 1;
4716 }
4717
4718 if (GET_CODE (addr) == PLUS)
4719 {
4720 /* 8 cases:
4721 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4722 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4723 aligned reg + aligned const => lqd, c?d, shuf, stqx
4724 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4725 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4726 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4727 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4728 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4729 */
4730 aform = 0;
4731 p0 = XEXP (addr, 0);
4732 p1 = p1_lo = XEXP (addr, 1);
4733 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4734 {
4735 p1_lo = GEN_INT (INTVAL (p1) & 15);
4736 if (reg_aligned_for_addr (p0))
4737 {
4738 p1 = GEN_INT (INTVAL (p1) & -16);
4739 if (p1 == const0_rtx)
4740 addr = p0;
4741 else
4742 addr = gen_rtx_PLUS (SImode, p0, p1);
4743 }
4744 else
4745 {
4746 rtx x = gen_reg_rtx (SImode);
4747 emit_move_insn (x, p1);
4748 addr = gen_rtx_PLUS (SImode, p0, x);
4749 }
4750 }
4751 }
4752 else if (REG_P (addr))
4753 {
4754 aform = 0;
4755 p0 = addr;
4756 p1 = p1_lo = const0_rtx;
4757 }
4758 else
4759 {
4760 aform = 1;
4761 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4762 p1 = 0; /* aform doesn't use p1 */
4763 p1_lo = addr;
4764 if (ALIGNED_SYMBOL_REF_P (addr))
4765 p1_lo = const0_rtx;
4766 else if (GET_CODE (addr) == CONST
4767 && GET_CODE (XEXP (addr, 0)) == PLUS
4768 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4769 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4770 {
4771 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4772 if ((v & -16) != 0)
4773 addr = gen_rtx_CONST (Pmode,
4774 gen_rtx_PLUS (Pmode,
4775 XEXP (XEXP (addr, 0), 0),
4776 GEN_INT (v & -16)));
4777 else
4778 addr = XEXP (XEXP (addr, 0), 0);
4779 p1_lo = GEN_INT (v & 15);
4780 }
4781 else if (GET_CODE (addr) == CONST_INT)
4782 {
4783 p1_lo = GEN_INT (INTVAL (addr) & 15);
4784 addr = GEN_INT (INTVAL (addr) & -16);
4785 }
4786 else
4787 {
4788 p1_lo = gen_reg_rtx (SImode);
4789 emit_move_insn (p1_lo, addr);
4790 }
4791 }
4792
4793 gcc_assert (aform == 0 || aform == 1);
4794 reg = gen_reg_rtx (TImode);
4795
4796 scalar = store_with_one_insn_p (ops[0]);
4797 if (!scalar)
4798 {
4799 /* We could copy the flags from the ops[0] MEM to mem here,
4800 We don't because we want this load to be optimized away if
4801 possible, and copying the flags will prevent that in certain
4802 cases, e.g. consider the volatile flag. */
4803
4804 rtx pat = gen_reg_rtx (TImode);
4805 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4806 set_mem_alias_set (lmem, 0);
4807 emit_insn (gen_movti (reg, lmem));
4808
4809 if (!p0 || reg_aligned_for_addr (p0))
4810 p0 = stack_pointer_rtx;
4811 if (!p1_lo)
4812 p1_lo = const0_rtx;
4813
4814 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4815 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4816 }
4817 else
4818 {
4819 if (GET_CODE (ops[1]) == REG)
4820 emit_insn (gen_spu_convert (reg, ops[1]));
4821 else if (GET_CODE (ops[1]) == SUBREG)
4822 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4823 else
4824 abort ();
4825 }
4826
4827 if (GET_MODE_SIZE (mode) < 4 && scalar)
4828 emit_insn (gen_ashlti3
4829 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4830
4831 smem = change_address (ops[0], TImode, copy_rtx (addr));
4832 /* We can't use the previous alias set because the memory has changed
4833 size and can potentially overlap objects of other types. */
4834 set_mem_alias_set (smem, 0);
4835
4836 emit_insn (gen_movti (smem, reg));
4837 return 1;
4838 }
4839
4840 /* Return TRUE if X is MEM which is a struct member reference
4841 and the member can safely be loaded and stored with a single
4842 instruction because it is padded. */
4843 static int
4844 mem_is_padded_component_ref (rtx x)
4845 {
4846 tree t = MEM_EXPR (x);
4847 tree r;
4848 if (!t || TREE_CODE (t) != COMPONENT_REF)
4849 return 0;
4850 t = TREE_OPERAND (t, 1);
4851 if (!t || TREE_CODE (t) != FIELD_DECL
4852 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4853 return 0;
4854 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4855 r = DECL_FIELD_CONTEXT (t);
4856 if (!r || TREE_CODE (r) != RECORD_TYPE)
4857 return 0;
4858 /* Make sure they are the same mode */
4859 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4860 return 0;
4861 /* If there are no following fields then the field alignment assures
4862 the structure is padded to the alignment which means this field is
4863 padded too. */
4864 if (TREE_CHAIN (t) == 0)
4865 return 1;
4866 /* If the following field is also aligned then this field will be
4867 padded. */
4868 t = TREE_CHAIN (t);
4869 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4870 return 1;
4871 return 0;
4872 }
4873
4874 /* Parse the -mfixed-range= option string. */
4875 static void
4876 fix_range (const char *const_str)
4877 {
4878 int i, first, last;
4879 char *str, *dash, *comma;
4880
4881 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4882 REG2 are either register names or register numbers. The effect
4883 of this option is to mark the registers in the range from REG1 to
4884 REG2 as ``fixed'' so they won't be used by the compiler. */
4885
4886 i = strlen (const_str);
4887 str = (char *) alloca (i + 1);
4888 memcpy (str, const_str, i + 1);
4889
4890 while (1)
4891 {
4892 dash = strchr (str, '-');
4893 if (!dash)
4894 {
4895 warning (0, "value of -mfixed-range must have form REG1-REG2");
4896 return;
4897 }
4898 *dash = '\0';
4899 comma = strchr (dash + 1, ',');
4900 if (comma)
4901 *comma = '\0';
4902
4903 first = decode_reg_name (str);
4904 if (first < 0)
4905 {
4906 warning (0, "unknown register name: %s", str);
4907 return;
4908 }
4909
4910 last = decode_reg_name (dash + 1);
4911 if (last < 0)
4912 {
4913 warning (0, "unknown register name: %s", dash + 1);
4914 return;
4915 }
4916
4917 *dash = '-';
4918
4919 if (first > last)
4920 {
4921 warning (0, "%s-%s is an empty range", str, dash + 1);
4922 return;
4923 }
4924
4925 for (i = first; i <= last; ++i)
4926 fixed_regs[i] = call_used_regs[i] = 1;
4927
4928 if (!comma)
4929 break;
4930
4931 *comma = ',';
4932 str = comma + 1;
4933 }
4934 }
4935
4936 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4937 can be generated using the fsmbi instruction. */
4938 int
4939 fsmbi_const_p (rtx x)
4940 {
4941 if (CONSTANT_P (x))
4942 {
4943 /* We can always choose TImode for CONST_INT because the high bits
4944 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4945 enum immediate_class c = classify_immediate (x, TImode);
4946 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4947 }
4948 return 0;
4949 }
4950
4951 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4952 can be generated using the cbd, chd, cwd or cdd instruction. */
4953 int
4954 cpat_const_p (rtx x, machine_mode mode)
4955 {
4956 if (CONSTANT_P (x))
4957 {
4958 enum immediate_class c = classify_immediate (x, mode);
4959 return c == IC_CPAT;
4960 }
4961 return 0;
4962 }
4963
4964 rtx
4965 gen_cpat_const (rtx * ops)
4966 {
4967 unsigned char dst[16];
4968 int i, offset, shift, isize;
4969 if (GET_CODE (ops[3]) != CONST_INT
4970 || GET_CODE (ops[2]) != CONST_INT
4971 || (GET_CODE (ops[1]) != CONST_INT
4972 && GET_CODE (ops[1]) != REG))
4973 return 0;
4974 if (GET_CODE (ops[1]) == REG
4975 && (!REG_POINTER (ops[1])
4976 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4977 return 0;
4978
4979 for (i = 0; i < 16; i++)
4980 dst[i] = i + 16;
4981 isize = INTVAL (ops[3]);
4982 if (isize == 1)
4983 shift = 3;
4984 else if (isize == 2)
4985 shift = 2;
4986 else
4987 shift = 0;
4988 offset = (INTVAL (ops[2]) +
4989 (GET_CODE (ops[1]) ==
4990 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4991 for (i = 0; i < isize; i++)
4992 dst[offset + i] = i + shift;
4993 return array_to_constant (TImode, dst);
4994 }
4995
4996 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4997 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4998 than 16 bytes, the value is repeated across the rest of the array. */
4999 void
5000 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5001 {
5002 HOST_WIDE_INT val;
5003 int i, j, first;
5004
5005 memset (arr, 0, 16);
5006 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5007 if (GET_CODE (x) == CONST_INT
5008 || (GET_CODE (x) == CONST_DOUBLE
5009 && (mode == SFmode || mode == DFmode)))
5010 {
5011 gcc_assert (mode != VOIDmode && mode != BLKmode);
5012
5013 if (GET_CODE (x) == CONST_DOUBLE)
5014 val = const_double_to_hwint (x);
5015 else
5016 val = INTVAL (x);
5017 first = GET_MODE_SIZE (mode) - 1;
5018 for (i = first; i >= 0; i--)
5019 {
5020 arr[i] = val & 0xff;
5021 val >>= 8;
5022 }
5023 /* Splat the constant across the whole array. */
5024 for (j = 0, i = first + 1; i < 16; i++)
5025 {
5026 arr[i] = arr[j];
5027 j = (j == first) ? 0 : j + 1;
5028 }
5029 }
5030 else if (GET_CODE (x) == CONST_DOUBLE)
5031 {
5032 val = CONST_DOUBLE_LOW (x);
5033 for (i = 15; i >= 8; i--)
5034 {
5035 arr[i] = val & 0xff;
5036 val >>= 8;
5037 }
5038 val = CONST_DOUBLE_HIGH (x);
5039 for (i = 7; i >= 0; i--)
5040 {
5041 arr[i] = val & 0xff;
5042 val >>= 8;
5043 }
5044 }
5045 else if (GET_CODE (x) == CONST_VECTOR)
5046 {
5047 int units;
5048 rtx elt;
5049 mode = GET_MODE_INNER (mode);
5050 units = CONST_VECTOR_NUNITS (x);
5051 for (i = 0; i < units; i++)
5052 {
5053 elt = CONST_VECTOR_ELT (x, i);
5054 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5055 {
5056 if (GET_CODE (elt) == CONST_DOUBLE)
5057 val = const_double_to_hwint (elt);
5058 else
5059 val = INTVAL (elt);
5060 first = GET_MODE_SIZE (mode) - 1;
5061 if (first + i * GET_MODE_SIZE (mode) > 16)
5062 abort ();
5063 for (j = first; j >= 0; j--)
5064 {
5065 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5066 val >>= 8;
5067 }
5068 }
5069 }
5070 }
5071 else
5072 gcc_unreachable();
5073 }
5074
5075 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5076 smaller than 16 bytes, use the bytes that would represent that value
5077 in a register, e.g., for QImode return the value of arr[3]. */
5078 rtx
5079 array_to_constant (machine_mode mode, const unsigned char arr[16])
5080 {
5081 machine_mode inner_mode;
5082 rtvec v;
5083 int units, size, i, j, k;
5084 HOST_WIDE_INT val;
5085
5086 if (GET_MODE_CLASS (mode) == MODE_INT
5087 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5088 {
5089 j = GET_MODE_SIZE (mode);
5090 i = j < 4 ? 4 - j : 0;
5091 for (val = 0; i < j; i++)
5092 val = (val << 8) | arr[i];
5093 val = trunc_int_for_mode (val, mode);
5094 return GEN_INT (val);
5095 }
5096
5097 if (mode == TImode)
5098 {
5099 HOST_WIDE_INT high;
5100 for (i = high = 0; i < 8; i++)
5101 high = (high << 8) | arr[i];
5102 for (i = 8, val = 0; i < 16; i++)
5103 val = (val << 8) | arr[i];
5104 return immed_double_const (val, high, TImode);
5105 }
5106 if (mode == SFmode)
5107 {
5108 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5109 val = trunc_int_for_mode (val, SImode);
5110 return hwint_to_const_double (SFmode, val);
5111 }
5112 if (mode == DFmode)
5113 {
5114 for (i = 0, val = 0; i < 8; i++)
5115 val = (val << 8) | arr[i];
5116 return hwint_to_const_double (DFmode, val);
5117 }
5118
5119 if (!VECTOR_MODE_P (mode))
5120 abort ();
5121
5122 units = GET_MODE_NUNITS (mode);
5123 size = GET_MODE_UNIT_SIZE (mode);
5124 inner_mode = GET_MODE_INNER (mode);
5125 v = rtvec_alloc (units);
5126
5127 for (k = i = 0; i < units; ++i)
5128 {
5129 val = 0;
5130 for (j = 0; j < size; j++, k++)
5131 val = (val << 8) | arr[k];
5132
5133 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5134 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5135 else
5136 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5137 }
5138 if (k > 16)
5139 abort ();
5140
5141 return gen_rtx_CONST_VECTOR (mode, v);
5142 }
5143
5144 static void
5145 reloc_diagnostic (rtx x)
5146 {
5147 tree decl = 0;
5148 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5149 return;
5150
5151 if (GET_CODE (x) == SYMBOL_REF)
5152 decl = SYMBOL_REF_DECL (x);
5153 else if (GET_CODE (x) == CONST
5154 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5155 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5156
5157 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5158 if (decl && !DECL_P (decl))
5159 decl = 0;
5160
5161 /* The decl could be a string constant. */
5162 if (decl && DECL_P (decl))
5163 {
5164 location_t loc;
5165 /* We use last_assemble_variable_decl to get line information. It's
5166 not always going to be right and might not even be close, but will
5167 be right for the more common cases. */
5168 if (!last_assemble_variable_decl || in_section == ctors_section)
5169 loc = DECL_SOURCE_LOCATION (decl);
5170 else
5171 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5172
5173 if (TARGET_WARN_RELOC)
5174 warning_at (loc, 0,
5175 "creating run-time relocation for %qD", decl);
5176 else
5177 error_at (loc,
5178 "creating run-time relocation for %qD", decl);
5179 }
5180 else
5181 {
5182 if (TARGET_WARN_RELOC)
5183 warning_at (input_location, 0, "creating run-time relocation");
5184 else
5185 error_at (input_location, "creating run-time relocation");
5186 }
5187 }
5188
5189 /* Hook into assemble_integer so we can generate an error for run-time
5190 relocations. The SPU ABI disallows them. */
5191 static bool
5192 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5193 {
5194 /* By default run-time relocations aren't supported, but we allow them
5195 in case users support it in their own run-time loader. And we provide
5196 a warning for those users that don't. */
5197 if ((GET_CODE (x) == SYMBOL_REF)
5198 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5199 reloc_diagnostic (x);
5200
5201 return default_assemble_integer (x, size, aligned_p);
5202 }
5203
5204 static void
5205 spu_asm_globalize_label (FILE * file, const char *name)
5206 {
5207 fputs ("\t.global\t", file);
5208 assemble_name (file, name);
5209 fputs ("\n", file);
5210 }
5211
5212 static bool
5213 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5214 int opno ATTRIBUTE_UNUSED, int *total,
5215 bool speed ATTRIBUTE_UNUSED)
5216 {
5217 int code = GET_CODE (x);
5218 int cost = COSTS_N_INSNS (2);
5219
5220 /* Folding to a CONST_VECTOR will use extra space but there might
5221 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5222 only if it allows us to fold away multiple insns. Changing the cost
5223 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5224 because this cost will only be compared against a single insn.
5225 if (code == CONST_VECTOR)
5226 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5227 */
5228
5229 /* Use defaults for float operations. Not accurate but good enough. */
5230 if (mode == DFmode)
5231 {
5232 *total = COSTS_N_INSNS (13);
5233 return true;
5234 }
5235 if (mode == SFmode)
5236 {
5237 *total = COSTS_N_INSNS (6);
5238 return true;
5239 }
5240 switch (code)
5241 {
5242 case CONST_INT:
5243 if (satisfies_constraint_K (x))
5244 *total = 0;
5245 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5246 *total = COSTS_N_INSNS (1);
5247 else
5248 *total = COSTS_N_INSNS (3);
5249 return true;
5250
5251 case CONST:
5252 *total = COSTS_N_INSNS (3);
5253 return true;
5254
5255 case LABEL_REF:
5256 case SYMBOL_REF:
5257 *total = COSTS_N_INSNS (0);
5258 return true;
5259
5260 case CONST_DOUBLE:
5261 *total = COSTS_N_INSNS (5);
5262 return true;
5263
5264 case FLOAT_EXTEND:
5265 case FLOAT_TRUNCATE:
5266 case FLOAT:
5267 case UNSIGNED_FLOAT:
5268 case FIX:
5269 case UNSIGNED_FIX:
5270 *total = COSTS_N_INSNS (7);
5271 return true;
5272
5273 case PLUS:
5274 if (mode == TImode)
5275 {
5276 *total = COSTS_N_INSNS (9);
5277 return true;
5278 }
5279 break;
5280
5281 case MULT:
5282 cost =
5283 GET_CODE (XEXP (x, 0)) ==
5284 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5285 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5286 {
5287 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5288 {
5289 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5290 cost = COSTS_N_INSNS (14);
5291 if ((val & 0xffff) == 0)
5292 cost = COSTS_N_INSNS (9);
5293 else if (val > 0 && val < 0x10000)
5294 cost = COSTS_N_INSNS (11);
5295 }
5296 }
5297 *total = cost;
5298 return true;
5299 case DIV:
5300 case UDIV:
5301 case MOD:
5302 case UMOD:
5303 *total = COSTS_N_INSNS (20);
5304 return true;
5305 case ROTATE:
5306 case ROTATERT:
5307 case ASHIFT:
5308 case ASHIFTRT:
5309 case LSHIFTRT:
5310 *total = COSTS_N_INSNS (4);
5311 return true;
5312 case UNSPEC:
5313 if (XINT (x, 1) == UNSPEC_CONVERT)
5314 *total = COSTS_N_INSNS (0);
5315 else
5316 *total = COSTS_N_INSNS (4);
5317 return true;
5318 }
5319 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5320 if (GET_MODE_CLASS (mode) == MODE_INT
5321 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5322 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5323 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5324 *total = cost;
5325 return true;
5326 }
5327
5328 static machine_mode
5329 spu_unwind_word_mode (void)
5330 {
5331 return SImode;
5332 }
5333
5334 /* Decide whether we can make a sibling call to a function. DECL is the
5335 declaration of the function being targeted by the call and EXP is the
5336 CALL_EXPR representing the call. */
5337 static bool
5338 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5339 {
5340 return decl && !TARGET_LARGE_MEM;
5341 }
5342
5343 /* We need to correctly update the back chain pointer and the Available
5344 Stack Size (which is in the second slot of the sp register.) */
5345 void
5346 spu_allocate_stack (rtx op0, rtx op1)
5347 {
5348 HOST_WIDE_INT v;
5349 rtx chain = gen_reg_rtx (V4SImode);
5350 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5351 rtx sp = gen_reg_rtx (V4SImode);
5352 rtx splatted = gen_reg_rtx (V4SImode);
5353 rtx pat = gen_reg_rtx (TImode);
5354
5355 /* copy the back chain so we can save it back again. */
5356 emit_move_insn (chain, stack_bot);
5357
5358 op1 = force_reg (SImode, op1);
5359
5360 v = 0x1020300010203ll;
5361 emit_move_insn (pat, immed_double_const (v, v, TImode));
5362 emit_insn (gen_shufb (splatted, op1, op1, pat));
5363
5364 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5365 emit_insn (gen_subv4si3 (sp, sp, splatted));
5366
5367 if (flag_stack_check)
5368 {
5369 rtx avail = gen_reg_rtx(SImode);
5370 rtx result = gen_reg_rtx(SImode);
5371 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5372 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5373 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5374 }
5375
5376 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5377
5378 emit_move_insn (stack_bot, chain);
5379
5380 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5381 }
5382
5383 void
5384 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5385 {
5386 static unsigned char arr[16] =
5387 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5388 rtx temp = gen_reg_rtx (SImode);
5389 rtx temp2 = gen_reg_rtx (SImode);
5390 rtx temp3 = gen_reg_rtx (V4SImode);
5391 rtx temp4 = gen_reg_rtx (V4SImode);
5392 rtx pat = gen_reg_rtx (TImode);
5393 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5394
5395 /* Restore the backchain from the first word, sp from the second. */
5396 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5397 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5398
5399 emit_move_insn (pat, array_to_constant (TImode, arr));
5400
5401 /* Compute Available Stack Size for sp */
5402 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5403 emit_insn (gen_shufb (temp3, temp, temp, pat));
5404
5405 /* Compute Available Stack Size for back chain */
5406 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5407 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5408 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5409
5410 emit_insn (gen_addv4si3 (sp, sp, temp3));
5411 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5412 }
5413
5414 static void
5415 spu_init_libfuncs (void)
5416 {
5417 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5418 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5419 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5420 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5421 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5422 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5423 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5424 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5425 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5426 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5427 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5428 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5429
5430 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5431 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5432
5433 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5434 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5435 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5436 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5437 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5438 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5439 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5440 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5441 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5442 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5443 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5444 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5445
5446 set_optab_libfunc (smul_optab, TImode, "__multi3");
5447 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5448 set_optab_libfunc (smod_optab, TImode, "__modti3");
5449 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5450 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5451 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5452 }
5453
5454 /* Make a subreg, stripping any existing subreg. We could possibly just
5455 call simplify_subreg, but in this case we know what we want. */
5456 rtx
5457 spu_gen_subreg (machine_mode mode, rtx x)
5458 {
5459 if (GET_CODE (x) == SUBREG)
5460 x = SUBREG_REG (x);
5461 if (GET_MODE (x) == mode)
5462 return x;
5463 return gen_rtx_SUBREG (mode, x, 0);
5464 }
5465
5466 static bool
5467 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5468 {
5469 return (TYPE_MODE (type) == BLKmode
5470 && ((type) == 0
5471 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5472 || int_size_in_bytes (type) >
5473 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5474 }
5475 \f
5476 /* Create the built-in types and functions */
5477
5478 enum spu_function_code
5479 {
5480 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5481 #include "spu-builtins.def"
5482 #undef DEF_BUILTIN
5483 NUM_SPU_BUILTINS
5484 };
5485
5486 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5487
5488 struct spu_builtin_description spu_builtins[] = {
5489 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5490 {fcode, icode, name, type, params},
5491 #include "spu-builtins.def"
5492 #undef DEF_BUILTIN
5493 };
5494
5495 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5496
5497 /* Returns the spu builtin decl for CODE. */
5498
5499 static tree
5500 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5501 {
5502 if (code >= NUM_SPU_BUILTINS)
5503 return error_mark_node;
5504
5505 return spu_builtin_decls[code];
5506 }
5507
5508
5509 static void
5510 spu_init_builtins (void)
5511 {
5512 struct spu_builtin_description *d;
5513 unsigned int i;
5514
5515 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5516 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5517 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5518 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5519 V4SF_type_node = build_vector_type (float_type_node, 4);
5520 V2DF_type_node = build_vector_type (double_type_node, 2);
5521
5522 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5523 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5524 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5525 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5526
5527 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5528
5529 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5530 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5531 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5532 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5533 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5534 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5535 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5536 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5537 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5538 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5539 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5540 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5541
5542 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5543 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5544 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5545 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5546 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5547 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5548 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5550
5551 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5552 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5553
5554 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5555
5556 spu_builtin_types[SPU_BTI_PTR] =
5557 build_pointer_type (build_qualified_type
5558 (void_type_node,
5559 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5560
5561 /* For each builtin we build a new prototype. The tree code will make
5562 sure nodes are shared. */
5563 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5564 {
5565 tree p;
5566 char name[64]; /* build_function will make a copy. */
5567 int parm;
5568
5569 if (d->name == 0)
5570 continue;
5571
5572 /* Find last parm. */
5573 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5574 ;
5575
5576 p = void_list_node;
5577 while (parm > 1)
5578 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5579
5580 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5581
5582 sprintf (name, "__builtin_%s", d->name);
5583 spu_builtin_decls[i] =
5584 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5585 if (d->fcode == SPU_MASK_FOR_LOAD)
5586 TREE_READONLY (spu_builtin_decls[i]) = 1;
5587
5588 /* These builtins don't throw. */
5589 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5590 }
5591 }
5592
5593 void
5594 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5595 {
5596 static unsigned char arr[16] =
5597 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5598
5599 rtx temp = gen_reg_rtx (Pmode);
5600 rtx temp2 = gen_reg_rtx (V4SImode);
5601 rtx temp3 = gen_reg_rtx (V4SImode);
5602 rtx pat = gen_reg_rtx (TImode);
5603 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5604
5605 emit_move_insn (pat, array_to_constant (TImode, arr));
5606
5607 /* Restore the sp. */
5608 emit_move_insn (temp, op1);
5609 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5610
5611 /* Compute available stack size for sp. */
5612 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5613 emit_insn (gen_shufb (temp3, temp, temp, pat));
5614
5615 emit_insn (gen_addv4si3 (sp, sp, temp3));
5616 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5617 }
5618
5619 int
5620 spu_safe_dma (HOST_WIDE_INT channel)
5621 {
5622 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5623 }
5624
5625 void
5626 spu_builtin_splats (rtx ops[])
5627 {
5628 machine_mode mode = GET_MODE (ops[0]);
5629 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5630 {
5631 unsigned char arr[16];
5632 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5633 emit_move_insn (ops[0], array_to_constant (mode, arr));
5634 }
5635 else
5636 {
5637 rtx reg = gen_reg_rtx (TImode);
5638 rtx shuf;
5639 if (GET_CODE (ops[1]) != REG
5640 && GET_CODE (ops[1]) != SUBREG)
5641 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5642 switch (mode)
5643 {
5644 case V2DImode:
5645 case V2DFmode:
5646 shuf =
5647 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5648 TImode);
5649 break;
5650 case V4SImode:
5651 case V4SFmode:
5652 shuf =
5653 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5654 TImode);
5655 break;
5656 case V8HImode:
5657 shuf =
5658 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5659 TImode);
5660 break;
5661 case V16QImode:
5662 shuf =
5663 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5664 TImode);
5665 break;
5666 default:
5667 abort ();
5668 }
5669 emit_move_insn (reg, shuf);
5670 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5671 }
5672 }
5673
5674 void
5675 spu_builtin_extract (rtx ops[])
5676 {
5677 machine_mode mode;
5678 rtx rot, from, tmp;
5679
5680 mode = GET_MODE (ops[1]);
5681
5682 if (GET_CODE (ops[2]) == CONST_INT)
5683 {
5684 switch (mode)
5685 {
5686 case V16QImode:
5687 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5688 break;
5689 case V8HImode:
5690 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5691 break;
5692 case V4SFmode:
5693 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5694 break;
5695 case V4SImode:
5696 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5697 break;
5698 case V2DImode:
5699 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5700 break;
5701 case V2DFmode:
5702 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5703 break;
5704 default:
5705 abort ();
5706 }
5707 return;
5708 }
5709
5710 from = spu_gen_subreg (TImode, ops[1]);
5711 rot = gen_reg_rtx (TImode);
5712 tmp = gen_reg_rtx (SImode);
5713
5714 switch (mode)
5715 {
5716 case V16QImode:
5717 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5718 break;
5719 case V8HImode:
5720 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5721 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5722 break;
5723 case V4SFmode:
5724 case V4SImode:
5725 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5726 break;
5727 case V2DImode:
5728 case V2DFmode:
5729 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5730 break;
5731 default:
5732 abort ();
5733 }
5734 emit_insn (gen_rotqby_ti (rot, from, tmp));
5735
5736 emit_insn (gen_spu_convert (ops[0], rot));
5737 }
5738
5739 void
5740 spu_builtin_insert (rtx ops[])
5741 {
5742 machine_mode mode = GET_MODE (ops[0]);
5743 machine_mode imode = GET_MODE_INNER (mode);
5744 rtx mask = gen_reg_rtx (TImode);
5745 rtx offset;
5746
5747 if (GET_CODE (ops[3]) == CONST_INT)
5748 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5749 else
5750 {
5751 offset = gen_reg_rtx (SImode);
5752 emit_insn (gen_mulsi3
5753 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5754 }
5755 emit_insn (gen_cpat
5756 (mask, stack_pointer_rtx, offset,
5757 GEN_INT (GET_MODE_SIZE (imode))));
5758 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5759 }
5760
5761 void
5762 spu_builtin_promote (rtx ops[])
5763 {
5764 machine_mode mode, imode;
5765 rtx rot, from, offset;
5766 HOST_WIDE_INT pos;
5767
5768 mode = GET_MODE (ops[0]);
5769 imode = GET_MODE_INNER (mode);
5770
5771 from = gen_reg_rtx (TImode);
5772 rot = spu_gen_subreg (TImode, ops[0]);
5773
5774 emit_insn (gen_spu_convert (from, ops[1]));
5775
5776 if (GET_CODE (ops[2]) == CONST_INT)
5777 {
5778 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5779 if (GET_MODE_SIZE (imode) < 4)
5780 pos += 4 - GET_MODE_SIZE (imode);
5781 offset = GEN_INT (pos & 15);
5782 }
5783 else
5784 {
5785 offset = gen_reg_rtx (SImode);
5786 switch (mode)
5787 {
5788 case V16QImode:
5789 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5790 break;
5791 case V8HImode:
5792 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5793 emit_insn (gen_addsi3 (offset, offset, offset));
5794 break;
5795 case V4SFmode:
5796 case V4SImode:
5797 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5798 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5799 break;
5800 case V2DImode:
5801 case V2DFmode:
5802 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5803 break;
5804 default:
5805 abort ();
5806 }
5807 }
5808 emit_insn (gen_rotqby_ti (rot, from, offset));
5809 }
5810
5811 static void
5812 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5813 {
5814 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5815 rtx shuf = gen_reg_rtx (V4SImode);
5816 rtx insn = gen_reg_rtx (V4SImode);
5817 rtx shufc;
5818 rtx insnc;
5819 rtx mem;
5820
5821 fnaddr = force_reg (SImode, fnaddr);
5822 cxt = force_reg (SImode, cxt);
5823
5824 if (TARGET_LARGE_MEM)
5825 {
5826 rtx rotl = gen_reg_rtx (V4SImode);
5827 rtx mask = gen_reg_rtx (V4SImode);
5828 rtx bi = gen_reg_rtx (SImode);
5829 static unsigned char const shufa[16] = {
5830 2, 3, 0, 1, 18, 19, 16, 17,
5831 0, 1, 2, 3, 16, 17, 18, 19
5832 };
5833 static unsigned char const insna[16] = {
5834 0x41, 0, 0, 79,
5835 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5836 0x60, 0x80, 0, 79,
5837 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5838 };
5839
5840 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5841 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5842
5843 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5844 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5845 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5846 emit_insn (gen_selb (insn, insnc, rotl, mask));
5847
5848 mem = adjust_address (m_tramp, V4SImode, 0);
5849 emit_move_insn (mem, insn);
5850
5851 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5852 mem = adjust_address (m_tramp, Pmode, 16);
5853 emit_move_insn (mem, bi);
5854 }
5855 else
5856 {
5857 rtx scxt = gen_reg_rtx (SImode);
5858 rtx sfnaddr = gen_reg_rtx (SImode);
5859 static unsigned char const insna[16] = {
5860 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5861 0x30, 0, 0, 0,
5862 0, 0, 0, 0,
5863 0, 0, 0, 0
5864 };
5865
5866 shufc = gen_reg_rtx (TImode);
5867 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5868
5869 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5870 fits 18 bits and the last 4 are zeros. This will be true if
5871 the stack pointer is initialized to 0x3fff0 at program start,
5872 otherwise the ila instruction will be garbage. */
5873
5874 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5875 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5876 emit_insn (gen_cpat
5877 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5878 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5879 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5880
5881 mem = adjust_address (m_tramp, V4SImode, 0);
5882 emit_move_insn (mem, insn);
5883 }
5884 emit_insn (gen_sync ());
5885 }
5886
5887 static bool
5888 spu_warn_func_return (tree decl)
5889 {
5890 /* Naked functions are implemented entirely in assembly, including the
5891 return sequence, so suppress warnings about this. */
5892 return !spu_naked_function_p (decl);
5893 }
5894
5895 void
5896 spu_expand_sign_extend (rtx ops[])
5897 {
5898 unsigned char arr[16];
5899 rtx pat = gen_reg_rtx (TImode);
5900 rtx sign, c;
5901 int i, last;
5902 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5903 if (GET_MODE (ops[1]) == QImode)
5904 {
5905 sign = gen_reg_rtx (HImode);
5906 emit_insn (gen_extendqihi2 (sign, ops[1]));
5907 for (i = 0; i < 16; i++)
5908 arr[i] = 0x12;
5909 arr[last] = 0x13;
5910 }
5911 else
5912 {
5913 for (i = 0; i < 16; i++)
5914 arr[i] = 0x10;
5915 switch (GET_MODE (ops[1]))
5916 {
5917 case HImode:
5918 sign = gen_reg_rtx (SImode);
5919 emit_insn (gen_extendhisi2 (sign, ops[1]));
5920 arr[last] = 0x03;
5921 arr[last - 1] = 0x02;
5922 break;
5923 case SImode:
5924 sign = gen_reg_rtx (SImode);
5925 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5926 for (i = 0; i < 4; i++)
5927 arr[last - i] = 3 - i;
5928 break;
5929 case DImode:
5930 sign = gen_reg_rtx (SImode);
5931 c = gen_reg_rtx (SImode);
5932 emit_insn (gen_spu_convert (c, ops[1]));
5933 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5934 for (i = 0; i < 8; i++)
5935 arr[last - i] = 7 - i;
5936 break;
5937 default:
5938 abort ();
5939 }
5940 }
5941 emit_move_insn (pat, array_to_constant (TImode, arr));
5942 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5943 }
5944
5945 /* expand vector initialization. If there are any constant parts,
5946 load constant parts first. Then load any non-constant parts. */
5947 void
5948 spu_expand_vector_init (rtx target, rtx vals)
5949 {
5950 machine_mode mode = GET_MODE (target);
5951 int n_elts = GET_MODE_NUNITS (mode);
5952 int n_var = 0;
5953 bool all_same = true;
5954 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5955 int i;
5956
5957 first = XVECEXP (vals, 0, 0);
5958 for (i = 0; i < n_elts; ++i)
5959 {
5960 x = XVECEXP (vals, 0, i);
5961 if (!(CONST_INT_P (x)
5962 || GET_CODE (x) == CONST_DOUBLE
5963 || GET_CODE (x) == CONST_FIXED))
5964 ++n_var;
5965 else
5966 {
5967 if (first_constant == NULL_RTX)
5968 first_constant = x;
5969 }
5970 if (i > 0 && !rtx_equal_p (x, first))
5971 all_same = false;
5972 }
5973
5974 /* if all elements are the same, use splats to repeat elements */
5975 if (all_same)
5976 {
5977 if (!CONSTANT_P (first)
5978 && !register_operand (first, GET_MODE (x)))
5979 first = force_reg (GET_MODE (first), first);
5980 emit_insn (gen_spu_splats (target, first));
5981 return;
5982 }
5983
5984 /* load constant parts */
5985 if (n_var != n_elts)
5986 {
5987 if (n_var == 0)
5988 {
5989 emit_move_insn (target,
5990 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5991 }
5992 else
5993 {
5994 rtx constant_parts_rtx = copy_rtx (vals);
5995
5996 gcc_assert (first_constant != NULL_RTX);
5997 /* fill empty slots with the first constant, this increases
5998 our chance of using splats in the recursive call below. */
5999 for (i = 0; i < n_elts; ++i)
6000 {
6001 x = XVECEXP (constant_parts_rtx, 0, i);
6002 if (!(CONST_INT_P (x)
6003 || GET_CODE (x) == CONST_DOUBLE
6004 || GET_CODE (x) == CONST_FIXED))
6005 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6006 }
6007
6008 spu_expand_vector_init (target, constant_parts_rtx);
6009 }
6010 }
6011
6012 /* load variable parts */
6013 if (n_var != 0)
6014 {
6015 rtx insert_operands[4];
6016
6017 insert_operands[0] = target;
6018 insert_operands[2] = target;
6019 for (i = 0; i < n_elts; ++i)
6020 {
6021 x = XVECEXP (vals, 0, i);
6022 if (!(CONST_INT_P (x)
6023 || GET_CODE (x) == CONST_DOUBLE
6024 || GET_CODE (x) == CONST_FIXED))
6025 {
6026 if (!register_operand (x, GET_MODE (x)))
6027 x = force_reg (GET_MODE (x), x);
6028 insert_operands[1] = x;
6029 insert_operands[3] = GEN_INT (i);
6030 spu_builtin_insert (insert_operands);
6031 }
6032 }
6033 }
6034 }
6035
6036 /* Return insn index for the vector compare instruction for given CODE,
6037 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6038
6039 static int
6040 get_vec_cmp_insn (enum rtx_code code,
6041 machine_mode dest_mode,
6042 machine_mode op_mode)
6043
6044 {
6045 switch (code)
6046 {
6047 case EQ:
6048 if (dest_mode == V16QImode && op_mode == V16QImode)
6049 return CODE_FOR_ceq_v16qi;
6050 if (dest_mode == V8HImode && op_mode == V8HImode)
6051 return CODE_FOR_ceq_v8hi;
6052 if (dest_mode == V4SImode && op_mode == V4SImode)
6053 return CODE_FOR_ceq_v4si;
6054 if (dest_mode == V4SImode && op_mode == V4SFmode)
6055 return CODE_FOR_ceq_v4sf;
6056 if (dest_mode == V2DImode && op_mode == V2DFmode)
6057 return CODE_FOR_ceq_v2df;
6058 break;
6059 case GT:
6060 if (dest_mode == V16QImode && op_mode == V16QImode)
6061 return CODE_FOR_cgt_v16qi;
6062 if (dest_mode == V8HImode && op_mode == V8HImode)
6063 return CODE_FOR_cgt_v8hi;
6064 if (dest_mode == V4SImode && op_mode == V4SImode)
6065 return CODE_FOR_cgt_v4si;
6066 if (dest_mode == V4SImode && op_mode == V4SFmode)
6067 return CODE_FOR_cgt_v4sf;
6068 if (dest_mode == V2DImode && op_mode == V2DFmode)
6069 return CODE_FOR_cgt_v2df;
6070 break;
6071 case GTU:
6072 if (dest_mode == V16QImode && op_mode == V16QImode)
6073 return CODE_FOR_clgt_v16qi;
6074 if (dest_mode == V8HImode && op_mode == V8HImode)
6075 return CODE_FOR_clgt_v8hi;
6076 if (dest_mode == V4SImode && op_mode == V4SImode)
6077 return CODE_FOR_clgt_v4si;
6078 break;
6079 default:
6080 break;
6081 }
6082 return -1;
6083 }
6084
6085 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6086 DMODE is expected destination mode. This is a recursive function. */
6087
6088 static rtx
6089 spu_emit_vector_compare (enum rtx_code rcode,
6090 rtx op0, rtx op1,
6091 machine_mode dmode)
6092 {
6093 int vec_cmp_insn;
6094 rtx mask;
6095 machine_mode dest_mode;
6096 machine_mode op_mode = GET_MODE (op1);
6097
6098 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6099
6100 /* Floating point vector compare instructions uses destination V4SImode.
6101 Double floating point vector compare instructions uses destination V2DImode.
6102 Move destination to appropriate mode later. */
6103 if (dmode == V4SFmode)
6104 dest_mode = V4SImode;
6105 else if (dmode == V2DFmode)
6106 dest_mode = V2DImode;
6107 else
6108 dest_mode = dmode;
6109
6110 mask = gen_reg_rtx (dest_mode);
6111 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6112
6113 if (vec_cmp_insn == -1)
6114 {
6115 bool swap_operands = false;
6116 bool try_again = false;
6117 switch (rcode)
6118 {
6119 case LT:
6120 rcode = GT;
6121 swap_operands = true;
6122 try_again = true;
6123 break;
6124 case LTU:
6125 rcode = GTU;
6126 swap_operands = true;
6127 try_again = true;
6128 break;
6129 case NE:
6130 case UNEQ:
6131 case UNLE:
6132 case UNLT:
6133 case UNGE:
6134 case UNGT:
6135 case UNORDERED:
6136 /* Treat A != B as ~(A==B). */
6137 {
6138 enum rtx_code rev_code;
6139 enum insn_code nor_code;
6140 rtx rev_mask;
6141
6142 rev_code = reverse_condition_maybe_unordered (rcode);
6143 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6144
6145 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6146 gcc_assert (nor_code != CODE_FOR_nothing);
6147 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6148 if (dmode != dest_mode)
6149 {
6150 rtx temp = gen_reg_rtx (dest_mode);
6151 convert_move (temp, mask, 0);
6152 return temp;
6153 }
6154 return mask;
6155 }
6156 break;
6157 case GE:
6158 case GEU:
6159 case LE:
6160 case LEU:
6161 /* Try GT/GTU/LT/LTU OR EQ */
6162 {
6163 rtx c_rtx, eq_rtx;
6164 enum insn_code ior_code;
6165 enum rtx_code new_code;
6166
6167 switch (rcode)
6168 {
6169 case GE: new_code = GT; break;
6170 case GEU: new_code = GTU; break;
6171 case LE: new_code = LT; break;
6172 case LEU: new_code = LTU; break;
6173 default:
6174 gcc_unreachable ();
6175 }
6176
6177 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6178 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6179
6180 ior_code = optab_handler (ior_optab, dest_mode);
6181 gcc_assert (ior_code != CODE_FOR_nothing);
6182 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6183 if (dmode != dest_mode)
6184 {
6185 rtx temp = gen_reg_rtx (dest_mode);
6186 convert_move (temp, mask, 0);
6187 return temp;
6188 }
6189 return mask;
6190 }
6191 break;
6192 case LTGT:
6193 /* Try LT OR GT */
6194 {
6195 rtx lt_rtx, gt_rtx;
6196 enum insn_code ior_code;
6197
6198 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6199 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6200
6201 ior_code = optab_handler (ior_optab, dest_mode);
6202 gcc_assert (ior_code != CODE_FOR_nothing);
6203 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6204 if (dmode != dest_mode)
6205 {
6206 rtx temp = gen_reg_rtx (dest_mode);
6207 convert_move (temp, mask, 0);
6208 return temp;
6209 }
6210 return mask;
6211 }
6212 break;
6213 case ORDERED:
6214 /* Implement as (A==A) & (B==B) */
6215 {
6216 rtx a_rtx, b_rtx;
6217 enum insn_code and_code;
6218
6219 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6220 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6221
6222 and_code = optab_handler (and_optab, dest_mode);
6223 gcc_assert (and_code != CODE_FOR_nothing);
6224 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6225 if (dmode != dest_mode)
6226 {
6227 rtx temp = gen_reg_rtx (dest_mode);
6228 convert_move (temp, mask, 0);
6229 return temp;
6230 }
6231 return mask;
6232 }
6233 break;
6234 default:
6235 gcc_unreachable ();
6236 }
6237
6238 /* You only get two chances. */
6239 if (try_again)
6240 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6241
6242 gcc_assert (vec_cmp_insn != -1);
6243
6244 if (swap_operands)
6245 {
6246 rtx tmp;
6247 tmp = op0;
6248 op0 = op1;
6249 op1 = tmp;
6250 }
6251 }
6252
6253 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6254 if (dmode != dest_mode)
6255 {
6256 rtx temp = gen_reg_rtx (dest_mode);
6257 convert_move (temp, mask, 0);
6258 return temp;
6259 }
6260 return mask;
6261 }
6262
6263
6264 /* Emit vector conditional expression.
6265 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6266 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6267
6268 int
6269 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6270 rtx cond, rtx cc_op0, rtx cc_op1)
6271 {
6272 machine_mode dest_mode = GET_MODE (dest);
6273 enum rtx_code rcode = GET_CODE (cond);
6274 rtx mask;
6275
6276 /* Get the vector mask for the given relational operations. */
6277 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6278
6279 emit_insn(gen_selb (dest, op2, op1, mask));
6280
6281 return 1;
6282 }
6283
6284 static rtx
6285 spu_force_reg (machine_mode mode, rtx op)
6286 {
6287 rtx x, r;
6288 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6289 {
6290 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6291 || GET_MODE (op) == BLKmode)
6292 return force_reg (mode, convert_to_mode (mode, op, 0));
6293 abort ();
6294 }
6295
6296 r = force_reg (GET_MODE (op), op);
6297 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6298 {
6299 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6300 if (x)
6301 return x;
6302 }
6303
6304 x = gen_reg_rtx (mode);
6305 emit_insn (gen_spu_convert (x, r));
6306 return x;
6307 }
6308
6309 static void
6310 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6311 {
6312 HOST_WIDE_INT v = 0;
6313 int lsbits;
6314 /* Check the range of immediate operands. */
6315 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6316 {
6317 int range = p - SPU_BTI_7;
6318
6319 if (!CONSTANT_P (op))
6320 error ("%s expects an integer literal in the range [%d, %d]",
6321 d->name,
6322 spu_builtin_range[range].low, spu_builtin_range[range].high);
6323
6324 if (GET_CODE (op) == CONST
6325 && (GET_CODE (XEXP (op, 0)) == PLUS
6326 || GET_CODE (XEXP (op, 0)) == MINUS))
6327 {
6328 v = INTVAL (XEXP (XEXP (op, 0), 1));
6329 op = XEXP (XEXP (op, 0), 0);
6330 }
6331 else if (GET_CODE (op) == CONST_INT)
6332 v = INTVAL (op);
6333 else if (GET_CODE (op) == CONST_VECTOR
6334 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6335 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6336
6337 /* The default for v is 0 which is valid in every range. */
6338 if (v < spu_builtin_range[range].low
6339 || v > spu_builtin_range[range].high)
6340 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6341 d->name,
6342 spu_builtin_range[range].low, spu_builtin_range[range].high,
6343 v);
6344
6345 switch (p)
6346 {
6347 case SPU_BTI_S10_4:
6348 lsbits = 4;
6349 break;
6350 case SPU_BTI_U16_2:
6351 /* This is only used in lqa, and stqa. Even though the insns
6352 encode 16 bits of the address (all but the 2 least
6353 significant), only 14 bits are used because it is masked to
6354 be 16 byte aligned. */
6355 lsbits = 4;
6356 break;
6357 case SPU_BTI_S16_2:
6358 /* This is used for lqr and stqr. */
6359 lsbits = 2;
6360 break;
6361 default:
6362 lsbits = 0;
6363 }
6364
6365 if (GET_CODE (op) == LABEL_REF
6366 || (GET_CODE (op) == SYMBOL_REF
6367 && SYMBOL_REF_FUNCTION_P (op))
6368 || (v & ((1 << lsbits) - 1)) != 0)
6369 warning (0, "%d least significant bits of %s are ignored", lsbits,
6370 d->name);
6371 }
6372 }
6373
6374
6375 static int
6376 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6377 rtx target, rtx ops[])
6378 {
6379 enum insn_code icode = (enum insn_code) d->icode;
6380 int i = 0, a;
6381
6382 /* Expand the arguments into rtl. */
6383
6384 if (d->parm[0] != SPU_BTI_VOID)
6385 ops[i++] = target;
6386
6387 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6388 {
6389 tree arg = CALL_EXPR_ARG (exp, a);
6390 if (arg == 0)
6391 abort ();
6392 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6393 }
6394
6395 gcc_assert (i == insn_data[icode].n_generator_args);
6396 return i;
6397 }
6398
6399 static rtx
6400 spu_expand_builtin_1 (struct spu_builtin_description *d,
6401 tree exp, rtx target)
6402 {
6403 rtx pat;
6404 rtx ops[8];
6405 enum insn_code icode = (enum insn_code) d->icode;
6406 machine_mode mode, tmode;
6407 int i, p;
6408 int n_operands;
6409 tree return_type;
6410
6411 /* Set up ops[] with values from arglist. */
6412 n_operands = expand_builtin_args (d, exp, target, ops);
6413
6414 /* Handle the target operand which must be operand 0. */
6415 i = 0;
6416 if (d->parm[0] != SPU_BTI_VOID)
6417 {
6418
6419 /* We prefer the mode specified for the match_operand otherwise
6420 use the mode from the builtin function prototype. */
6421 tmode = insn_data[d->icode].operand[0].mode;
6422 if (tmode == VOIDmode)
6423 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6424
6425 /* Try to use target because not using it can lead to extra copies
6426 and when we are using all of the registers extra copies leads
6427 to extra spills. */
6428 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6429 ops[0] = target;
6430 else
6431 target = ops[0] = gen_reg_rtx (tmode);
6432
6433 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6434 abort ();
6435
6436 i++;
6437 }
6438
6439 if (d->fcode == SPU_MASK_FOR_LOAD)
6440 {
6441 machine_mode mode = insn_data[icode].operand[1].mode;
6442 tree arg;
6443 rtx addr, op, pat;
6444
6445 /* get addr */
6446 arg = CALL_EXPR_ARG (exp, 0);
6447 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6448 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6449 addr = memory_address (mode, op);
6450
6451 /* negate addr */
6452 op = gen_reg_rtx (GET_MODE (addr));
6453 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6454 op = gen_rtx_MEM (mode, op);
6455
6456 pat = GEN_FCN (icode) (target, op);
6457 if (!pat)
6458 return 0;
6459 emit_insn (pat);
6460 return target;
6461 }
6462
6463 /* Ignore align_hint, but still expand it's args in case they have
6464 side effects. */
6465 if (icode == CODE_FOR_spu_align_hint)
6466 return 0;
6467
6468 /* Handle the rest of the operands. */
6469 for (p = 1; i < n_operands; i++, p++)
6470 {
6471 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6472 mode = insn_data[d->icode].operand[i].mode;
6473 else
6474 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6475
6476 /* mode can be VOIDmode here for labels */
6477
6478 /* For specific intrinsics with an immediate operand, e.g.,
6479 si_ai(), we sometimes need to convert the scalar argument to a
6480 vector argument by splatting the scalar. */
6481 if (VECTOR_MODE_P (mode)
6482 && (GET_CODE (ops[i]) == CONST_INT
6483 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6484 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6485 {
6486 if (GET_CODE (ops[i]) == CONST_INT)
6487 ops[i] = spu_const (mode, INTVAL (ops[i]));
6488 else
6489 {
6490 rtx reg = gen_reg_rtx (mode);
6491 machine_mode imode = GET_MODE_INNER (mode);
6492 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6493 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6494 if (imode != GET_MODE (ops[i]))
6495 ops[i] = convert_to_mode (imode, ops[i],
6496 TYPE_UNSIGNED (spu_builtin_types
6497 [d->parm[i]]));
6498 emit_insn (gen_spu_splats (reg, ops[i]));
6499 ops[i] = reg;
6500 }
6501 }
6502
6503 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6504
6505 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6506 ops[i] = spu_force_reg (mode, ops[i]);
6507 }
6508
6509 switch (n_operands)
6510 {
6511 case 0:
6512 pat = GEN_FCN (icode) (0);
6513 break;
6514 case 1:
6515 pat = GEN_FCN (icode) (ops[0]);
6516 break;
6517 case 2:
6518 pat = GEN_FCN (icode) (ops[0], ops[1]);
6519 break;
6520 case 3:
6521 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6522 break;
6523 case 4:
6524 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6525 break;
6526 case 5:
6527 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6528 break;
6529 case 6:
6530 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6531 break;
6532 default:
6533 abort ();
6534 }
6535
6536 if (!pat)
6537 abort ();
6538
6539 if (d->type == B_CALL || d->type == B_BISLED)
6540 emit_call_insn (pat);
6541 else if (d->type == B_JUMP)
6542 {
6543 emit_jump_insn (pat);
6544 emit_barrier ();
6545 }
6546 else
6547 emit_insn (pat);
6548
6549 return_type = spu_builtin_types[d->parm[0]];
6550 if (d->parm[0] != SPU_BTI_VOID
6551 && GET_MODE (target) != TYPE_MODE (return_type))
6552 {
6553 /* target is the return value. It should always be the mode of
6554 the builtin function prototype. */
6555 target = spu_force_reg (TYPE_MODE (return_type), target);
6556 }
6557
6558 return target;
6559 }
6560
6561 rtx
6562 spu_expand_builtin (tree exp,
6563 rtx target,
6564 rtx subtarget ATTRIBUTE_UNUSED,
6565 machine_mode mode ATTRIBUTE_UNUSED,
6566 int ignore ATTRIBUTE_UNUSED)
6567 {
6568 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6569 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6570 struct spu_builtin_description *d;
6571
6572 if (fcode < NUM_SPU_BUILTINS)
6573 {
6574 d = &spu_builtins[fcode];
6575
6576 return spu_expand_builtin_1 (d, exp, target);
6577 }
6578 abort ();
6579 }
6580
6581 /* Implement targetm.vectorize.builtin_mask_for_load. */
6582 static tree
6583 spu_builtin_mask_for_load (void)
6584 {
6585 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6586 }
6587
6588 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6589 static int
6590 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6591 tree vectype,
6592 int misalign ATTRIBUTE_UNUSED)
6593 {
6594 unsigned elements;
6595
6596 switch (type_of_cost)
6597 {
6598 case scalar_stmt:
6599 case vector_stmt:
6600 case vector_load:
6601 case vector_store:
6602 case vec_to_scalar:
6603 case scalar_to_vec:
6604 case cond_branch_not_taken:
6605 case vec_perm:
6606 case vec_promote_demote:
6607 return 1;
6608
6609 case scalar_store:
6610 return 10;
6611
6612 case scalar_load:
6613 /* Load + rotate. */
6614 return 2;
6615
6616 case unaligned_load:
6617 return 2;
6618
6619 case cond_branch_taken:
6620 return 6;
6621
6622 case vec_construct:
6623 elements = TYPE_VECTOR_SUBPARTS (vectype);
6624 return elements / 2 + 1;
6625
6626 default:
6627 gcc_unreachable ();
6628 }
6629 }
6630
6631 /* Implement targetm.vectorize.init_cost. */
6632
6633 static void *
6634 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6635 {
6636 unsigned *cost = XNEWVEC (unsigned, 3);
6637 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6638 return cost;
6639 }
6640
6641 /* Implement targetm.vectorize.add_stmt_cost. */
6642
6643 static unsigned
6644 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6645 struct _stmt_vec_info *stmt_info, int misalign,
6646 enum vect_cost_model_location where)
6647 {
6648 unsigned *cost = (unsigned *) data;
6649 unsigned retval = 0;
6650
6651 if (flag_vect_cost_model)
6652 {
6653 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6654 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6655
6656 /* Statements in an inner loop relative to the loop being
6657 vectorized are weighted more heavily. The value here is
6658 arbitrary and could potentially be improved with analysis. */
6659 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6660 count *= 50; /* FIXME. */
6661
6662 retval = (unsigned) (count * stmt_cost);
6663 cost[where] += retval;
6664 }
6665
6666 return retval;
6667 }
6668
6669 /* Implement targetm.vectorize.finish_cost. */
6670
6671 static void
6672 spu_finish_cost (void *data, unsigned *prologue_cost,
6673 unsigned *body_cost, unsigned *epilogue_cost)
6674 {
6675 unsigned *cost = (unsigned *) data;
6676 *prologue_cost = cost[vect_prologue];
6677 *body_cost = cost[vect_body];
6678 *epilogue_cost = cost[vect_epilogue];
6679 }
6680
6681 /* Implement targetm.vectorize.destroy_cost_data. */
6682
6683 static void
6684 spu_destroy_cost_data (void *data)
6685 {
6686 free (data);
6687 }
6688
6689 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6690 after applying N number of iterations. This routine does not determine
6691 how may iterations are required to reach desired alignment. */
6692
6693 static bool
6694 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6695 {
6696 if (is_packed)
6697 return false;
6698
6699 /* All other types are naturally aligned. */
6700 return true;
6701 }
6702
6703 /* Return the appropriate mode for a named address pointer. */
6704 static machine_mode
6705 spu_addr_space_pointer_mode (addr_space_t addrspace)
6706 {
6707 switch (addrspace)
6708 {
6709 case ADDR_SPACE_GENERIC:
6710 return ptr_mode;
6711 case ADDR_SPACE_EA:
6712 return EAmode;
6713 default:
6714 gcc_unreachable ();
6715 }
6716 }
6717
6718 /* Return the appropriate mode for a named address address. */
6719 static machine_mode
6720 spu_addr_space_address_mode (addr_space_t addrspace)
6721 {
6722 switch (addrspace)
6723 {
6724 case ADDR_SPACE_GENERIC:
6725 return Pmode;
6726 case ADDR_SPACE_EA:
6727 return EAmode;
6728 default:
6729 gcc_unreachable ();
6730 }
6731 }
6732
6733 /* Determine if one named address space is a subset of another. */
6734
6735 static bool
6736 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6737 {
6738 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6739 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6740
6741 if (subset == superset)
6742 return true;
6743
6744 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6745 being subsets but instead as disjoint address spaces. */
6746 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6747 return false;
6748
6749 else
6750 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6751 }
6752
6753 /* Convert from one address space to another. */
6754 static rtx
6755 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6756 {
6757 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6758 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6759
6760 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6761 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6762
6763 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6764 {
6765 rtx result, ls;
6766
6767 ls = gen_const_mem (DImode,
6768 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6769 set_mem_align (ls, 128);
6770
6771 result = gen_reg_rtx (Pmode);
6772 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6773 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6774 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6775 ls, const0_rtx, Pmode, 1);
6776
6777 emit_insn (gen_subsi3 (result, op, ls));
6778
6779 return result;
6780 }
6781
6782 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6783 {
6784 rtx result, ls;
6785
6786 ls = gen_const_mem (DImode,
6787 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6788 set_mem_align (ls, 128);
6789
6790 result = gen_reg_rtx (EAmode);
6791 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6792 op = force_reg (Pmode, op);
6793 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6794 ls, const0_rtx, EAmode, 1);
6795 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6796
6797 if (EAmode == SImode)
6798 emit_insn (gen_addsi3 (result, op, ls));
6799 else
6800 emit_insn (gen_adddi3 (result, op, ls));
6801
6802 return result;
6803 }
6804
6805 else
6806 gcc_unreachable ();
6807 }
6808
6809
6810 /* Count the total number of instructions in each pipe and return the
6811 maximum, which is used as the Minimum Iteration Interval (MII)
6812 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6813 -2 are instructions that can go in pipe0 or pipe1. */
6814 static int
6815 spu_sms_res_mii (struct ddg *g)
6816 {
6817 int i;
6818 unsigned t[4] = {0, 0, 0, 0};
6819
6820 for (i = 0; i < g->num_nodes; i++)
6821 {
6822 rtx_insn *insn = g->nodes[i].insn;
6823 int p = get_pipe (insn) + 2;
6824
6825 gcc_assert (p >= 0);
6826 gcc_assert (p < 4);
6827
6828 t[p]++;
6829 if (dump_file && INSN_P (insn))
6830 fprintf (dump_file, "i%d %s %d %d\n",
6831 INSN_UID (insn),
6832 insn_data[INSN_CODE(insn)].name,
6833 p, t[p]);
6834 }
6835 if (dump_file)
6836 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6837
6838 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6839 }
6840
6841
6842 void
6843 spu_init_expanders (void)
6844 {
6845 if (cfun)
6846 {
6847 rtx r0, r1;
6848 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6849 frame_pointer_needed is true. We don't know that until we're
6850 expanding the prologue. */
6851 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6852
6853 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6854 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6855 to be treated as aligned, so generate them here. */
6856 r0 = gen_reg_rtx (SImode);
6857 r1 = gen_reg_rtx (SImode);
6858 mark_reg_pointer (r0, 128);
6859 mark_reg_pointer (r1, 128);
6860 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6861 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6862 }
6863 }
6864
6865 static machine_mode
6866 spu_libgcc_cmp_return_mode (void)
6867 {
6868
6869 /* For SPU word mode is TI mode so it is better to use SImode
6870 for compare returns. */
6871 return SImode;
6872 }
6873
6874 static machine_mode
6875 spu_libgcc_shift_count_mode (void)
6876 {
6877 /* For SPU word mode is TI mode so it is better to use SImode
6878 for shift counts. */
6879 return SImode;
6880 }
6881
6882 /* Implement targetm.section_type_flags. */
6883 static unsigned int
6884 spu_section_type_flags (tree decl, const char *name, int reloc)
6885 {
6886 /* .toe needs to have type @nobits. */
6887 if (strcmp (name, ".toe") == 0)
6888 return SECTION_BSS;
6889 /* Don't load _ea into the current address space. */
6890 if (strcmp (name, "._ea") == 0)
6891 return SECTION_WRITE | SECTION_DEBUG;
6892 return default_section_type_flags (decl, name, reloc);
6893 }
6894
6895 /* Implement targetm.select_section. */
6896 static section *
6897 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6898 {
6899 /* Variables and constants defined in the __ea address space
6900 go into a special section named "._ea". */
6901 if (TREE_TYPE (decl) != error_mark_node
6902 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6903 {
6904 /* We might get called with string constants, but get_named_section
6905 doesn't like them as they are not DECLs. Also, we need to set
6906 flags in that case. */
6907 if (!DECL_P (decl))
6908 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6909
6910 return get_named_section (decl, "._ea", reloc);
6911 }
6912
6913 return default_elf_select_section (decl, reloc, align);
6914 }
6915
6916 /* Implement targetm.unique_section. */
6917 static void
6918 spu_unique_section (tree decl, int reloc)
6919 {
6920 /* We don't support unique section names in the __ea address
6921 space for now. */
6922 if (TREE_TYPE (decl) != error_mark_node
6923 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6924 return;
6925
6926 default_unique_section (decl, reloc);
6927 }
6928
6929 /* Generate a constant or register which contains 2^SCALE. We assume
6930 the result is valid for MODE. Currently, MODE must be V4SFmode and
6931 SCALE must be SImode. */
6932 rtx
6933 spu_gen_exp2 (machine_mode mode, rtx scale)
6934 {
6935 gcc_assert (mode == V4SFmode);
6936 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6937 if (GET_CODE (scale) != CONST_INT)
6938 {
6939 /* unsigned int exp = (127 + scale) << 23;
6940 __vector float m = (__vector float) spu_splats (exp); */
6941 rtx reg = force_reg (SImode, scale);
6942 rtx exp = gen_reg_rtx (SImode);
6943 rtx mul = gen_reg_rtx (mode);
6944 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6945 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6946 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6947 return mul;
6948 }
6949 else
6950 {
6951 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6952 unsigned char arr[16];
6953 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6954 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6955 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6956 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6957 return array_to_constant (mode, arr);
6958 }
6959 }
6960
6961 /* After reload, just change the convert into a move instruction
6962 or a dead instruction. */
6963 void
6964 spu_split_convert (rtx ops[])
6965 {
6966 if (REGNO (ops[0]) == REGNO (ops[1]))
6967 emit_note (NOTE_INSN_DELETED);
6968 else
6969 {
6970 /* Use TImode always as this might help hard reg copyprop. */
6971 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6972 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6973 emit_insn (gen_move_insn (op0, op1));
6974 }
6975 }
6976
6977 void
6978 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6979 {
6980 fprintf (file, "# profile\n");
6981 fprintf (file, "brsl $75, _mcount\n");
6982 }
6983
6984 /* Implement targetm.ref_may_alias_errno. */
6985 static bool
6986 spu_ref_may_alias_errno (ao_ref *ref)
6987 {
6988 tree base = ao_ref_base (ref);
6989
6990 /* With SPU newlib, errno is defined as something like
6991 _impure_data._errno
6992 The default implementation of this target macro does not
6993 recognize such expressions, so special-code for it here. */
6994
6995 if (TREE_CODE (base) == VAR_DECL
6996 && !TREE_STATIC (base)
6997 && DECL_EXTERNAL (base)
6998 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
6999 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7000 "_impure_data") == 0
7001 /* _errno is the first member of _impure_data. */
7002 && ref->offset == 0)
7003 return true;
7004
7005 return default_ref_may_alias_errno (ref);
7006 }
7007
7008 /* Output thunk to FILE that implements a C++ virtual function call (with
7009 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7010 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7011 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7012 relative to the resulting this pointer. */
7013
7014 static void
7015 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7016 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7017 tree function)
7018 {
7019 rtx op[8];
7020
7021 /* Make sure unwind info is emitted for the thunk if needed. */
7022 final_start_function (emit_barrier (), file, 1);
7023
7024 /* Operand 0 is the target function. */
7025 op[0] = XEXP (DECL_RTL (function), 0);
7026
7027 /* Operand 1 is the 'this' pointer. */
7028 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7029 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7030 else
7031 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7032
7033 /* Operands 2/3 are the low/high halfwords of delta. */
7034 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7035 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7036
7037 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7038 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7039 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7040
7041 /* Operands 6/7 are temporary registers. */
7042 op[6] = gen_rtx_REG (Pmode, 79);
7043 op[7] = gen_rtx_REG (Pmode, 78);
7044
7045 /* Add DELTA to this pointer. */
7046 if (delta)
7047 {
7048 if (delta >= -0x200 && delta < 0x200)
7049 output_asm_insn ("ai\t%1,%1,%2", op);
7050 else if (delta >= -0x8000 && delta < 0x8000)
7051 {
7052 output_asm_insn ("il\t%6,%2", op);
7053 output_asm_insn ("a\t%1,%1,%6", op);
7054 }
7055 else
7056 {
7057 output_asm_insn ("ilhu\t%6,%3", op);
7058 output_asm_insn ("iohl\t%6,%2", op);
7059 output_asm_insn ("a\t%1,%1,%6", op);
7060 }
7061 }
7062
7063 /* Perform vcall adjustment. */
7064 if (vcall_offset)
7065 {
7066 output_asm_insn ("lqd\t%7,0(%1)", op);
7067 output_asm_insn ("rotqby\t%7,%7,%1", op);
7068
7069 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7070 output_asm_insn ("ai\t%7,%7,%4", op);
7071 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7072 {
7073 output_asm_insn ("il\t%6,%4", op);
7074 output_asm_insn ("a\t%7,%7,%6", op);
7075 }
7076 else
7077 {
7078 output_asm_insn ("ilhu\t%6,%5", op);
7079 output_asm_insn ("iohl\t%6,%4", op);
7080 output_asm_insn ("a\t%7,%7,%6", op);
7081 }
7082
7083 output_asm_insn ("lqd\t%6,0(%7)", op);
7084 output_asm_insn ("rotqby\t%6,%6,%7", op);
7085 output_asm_insn ("a\t%1,%1,%6", op);
7086 }
7087
7088 /* Jump to target. */
7089 output_asm_insn ("br\t%0", op);
7090
7091 final_end_function ();
7092 }
7093
7094 /* Canonicalize a comparison from one we don't have to one we do have. */
7095 static void
7096 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7097 bool op0_preserve_value)
7098 {
7099 if (!op0_preserve_value
7100 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7101 {
7102 rtx tem = *op0;
7103 *op0 = *op1;
7104 *op1 = tem;
7105 *code = (int)swap_condition ((enum rtx_code)*code);
7106 }
7107 }
7108
7109 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7110 to perform. MEM is the memory on which to operate. VAL is the second
7111 operand of the binary operator. BEFORE and AFTER are optional locations to
7112 return the value of MEM either before of after the operation. */
7113 void
7114 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7115 rtx orig_before, rtx orig_after)
7116 {
7117 machine_mode mode = GET_MODE (mem);
7118 rtx before = orig_before, after = orig_after;
7119
7120 if (before == NULL_RTX)
7121 before = gen_reg_rtx (mode);
7122
7123 emit_move_insn (before, mem);
7124
7125 if (code == MULT) /* NAND operation */
7126 {
7127 rtx x = expand_simple_binop (mode, AND, before, val,
7128 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7129 after = expand_simple_unop (mode, NOT, x, after, 1);
7130 }
7131 else
7132 {
7133 after = expand_simple_binop (mode, code, before, val,
7134 after, 1, OPTAB_LIB_WIDEN);
7135 }
7136
7137 emit_move_insn (mem, after);
7138
7139 if (orig_after && after != orig_after)
7140 emit_move_insn (orig_after, after);
7141 }
7142
7143 \f
7144 /* Table of machine attributes. */
7145 static const struct attribute_spec spu_attribute_table[] =
7146 {
7147 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7148 affects_type_identity } */
7149 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7150 false },
7151 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7152 false },
7153 { NULL, 0, 0, false, false, false, NULL, false }
7154 };
7155
7156 /* TARGET overrides. */
7157
7158 #undef TARGET_LRA_P
7159 #define TARGET_LRA_P hook_bool_void_false
7160
7161 #undef TARGET_ADDR_SPACE_POINTER_MODE
7162 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7163
7164 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7165 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7166
7167 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7168 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7169 spu_addr_space_legitimate_address_p
7170
7171 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7172 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7173
7174 #undef TARGET_ADDR_SPACE_SUBSET_P
7175 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7176
7177 #undef TARGET_ADDR_SPACE_CONVERT
7178 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7179
7180 #undef TARGET_INIT_BUILTINS
7181 #define TARGET_INIT_BUILTINS spu_init_builtins
7182 #undef TARGET_BUILTIN_DECL
7183 #define TARGET_BUILTIN_DECL spu_builtin_decl
7184
7185 #undef TARGET_EXPAND_BUILTIN
7186 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7187
7188 #undef TARGET_UNWIND_WORD_MODE
7189 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7190
7191 #undef TARGET_LEGITIMIZE_ADDRESS
7192 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7193
7194 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7195 and .quad for the debugger. When it is known that the assembler is fixed,
7196 these can be removed. */
7197 #undef TARGET_ASM_UNALIGNED_SI_OP
7198 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7199
7200 #undef TARGET_ASM_ALIGNED_DI_OP
7201 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7202
7203 /* The .8byte directive doesn't seem to work well for a 32 bit
7204 architecture. */
7205 #undef TARGET_ASM_UNALIGNED_DI_OP
7206 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7207
7208 #undef TARGET_RTX_COSTS
7209 #define TARGET_RTX_COSTS spu_rtx_costs
7210
7211 #undef TARGET_ADDRESS_COST
7212 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7213
7214 #undef TARGET_SCHED_ISSUE_RATE
7215 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7216
7217 #undef TARGET_SCHED_INIT_GLOBAL
7218 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7219
7220 #undef TARGET_SCHED_INIT
7221 #define TARGET_SCHED_INIT spu_sched_init
7222
7223 #undef TARGET_SCHED_VARIABLE_ISSUE
7224 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7225
7226 #undef TARGET_SCHED_REORDER
7227 #define TARGET_SCHED_REORDER spu_sched_reorder
7228
7229 #undef TARGET_SCHED_REORDER2
7230 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7231
7232 #undef TARGET_SCHED_ADJUST_COST
7233 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7234
7235 #undef TARGET_ATTRIBUTE_TABLE
7236 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7237
7238 #undef TARGET_ASM_INTEGER
7239 #define TARGET_ASM_INTEGER spu_assemble_integer
7240
7241 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7242 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7243
7244 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7245 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7246
7247 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7248 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7249
7250 #undef TARGET_ASM_GLOBALIZE_LABEL
7251 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7252
7253 #undef TARGET_PASS_BY_REFERENCE
7254 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7255
7256 #undef TARGET_FUNCTION_ARG
7257 #define TARGET_FUNCTION_ARG spu_function_arg
7258
7259 #undef TARGET_FUNCTION_ARG_ADVANCE
7260 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7261
7262 #undef TARGET_MUST_PASS_IN_STACK
7263 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7264
7265 #undef TARGET_BUILD_BUILTIN_VA_LIST
7266 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7267
7268 #undef TARGET_EXPAND_BUILTIN_VA_START
7269 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7270
7271 #undef TARGET_SETUP_INCOMING_VARARGS
7272 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7273
7274 #undef TARGET_MACHINE_DEPENDENT_REORG
7275 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7276
7277 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7278 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7279
7280 #undef TARGET_INIT_LIBFUNCS
7281 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7282
7283 #undef TARGET_RETURN_IN_MEMORY
7284 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7285
7286 #undef TARGET_ENCODE_SECTION_INFO
7287 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7288
7289 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7290 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7291
7292 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7293 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7294
7295 #undef TARGET_VECTORIZE_INIT_COST
7296 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7297
7298 #undef TARGET_VECTORIZE_ADD_STMT_COST
7299 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7300
7301 #undef TARGET_VECTORIZE_FINISH_COST
7302 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7303
7304 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7305 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7306
7307 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7308 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7309
7310 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7311 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7312
7313 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7314 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7315
7316 #undef TARGET_SCHED_SMS_RES_MII
7317 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7318
7319 #undef TARGET_SECTION_TYPE_FLAGS
7320 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7321
7322 #undef TARGET_ASM_SELECT_SECTION
7323 #define TARGET_ASM_SELECT_SECTION spu_select_section
7324
7325 #undef TARGET_ASM_UNIQUE_SECTION
7326 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7327
7328 #undef TARGET_LEGITIMATE_ADDRESS_P
7329 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7330
7331 #undef TARGET_LEGITIMATE_CONSTANT_P
7332 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7333
7334 #undef TARGET_TRAMPOLINE_INIT
7335 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7336
7337 #undef TARGET_WARN_FUNC_RETURN
7338 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7339
7340 #undef TARGET_OPTION_OVERRIDE
7341 #define TARGET_OPTION_OVERRIDE spu_option_override
7342
7343 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7344 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7345
7346 #undef TARGET_REF_MAY_ALIAS_ERRNO
7347 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7348
7349 #undef TARGET_ASM_OUTPUT_MI_THUNK
7350 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7351 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7352 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7353
7354 /* Variable tracking should be run after all optimizations which
7355 change order of insns. It also needs a valid CFG. */
7356 #undef TARGET_DELAY_VARTRACK
7357 #define TARGET_DELAY_VARTRACK true
7358
7359 #undef TARGET_CANONICALIZE_COMPARISON
7360 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7361
7362 #undef TARGET_CAN_USE_DOLOOP_P
7363 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7364
7365 struct gcc_target targetm = TARGET_INITIALIZER;
7366
7367 #include "gt-spu.h"