]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
gcc/
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2013 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "tree.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "except.h"
34 #include "function.h"
35 #include "output.h"
36 #include "basic-block.h"
37 #include "diagnostic-core.h"
38 #include "ggc.h"
39 #include "hashtab.h"
40 #include "tm_p.h"
41 #include "target.h"
42 #include "target-def.h"
43 #include "langhooks.h"
44 #include "reload.h"
45 #include "sched-int.h"
46 #include "params.h"
47 #include "machmode.h"
48 #include "gimple.h"
49 #include "tm-constrs.h"
50 #include "ddg.h"
51 #include "sbitmap.h"
52 #include "timevar.h"
53 #include "df.h"
54 #include "dumpfile.h"
55 #include "cfgloop.h"
56
57 /* Builtin types, data and prototypes. */
58
59 enum spu_builtin_type_index
60 {
61 SPU_BTI_END_OF_PARAMS,
62
63 /* We create new type nodes for these. */
64 SPU_BTI_V16QI,
65 SPU_BTI_V8HI,
66 SPU_BTI_V4SI,
67 SPU_BTI_V2DI,
68 SPU_BTI_V4SF,
69 SPU_BTI_V2DF,
70 SPU_BTI_UV16QI,
71 SPU_BTI_UV8HI,
72 SPU_BTI_UV4SI,
73 SPU_BTI_UV2DI,
74
75 /* A 16-byte type. (Implemented with V16QI_type_node) */
76 SPU_BTI_QUADWORD,
77
78 /* These all correspond to intSI_type_node */
79 SPU_BTI_7,
80 SPU_BTI_S7,
81 SPU_BTI_U7,
82 SPU_BTI_S10,
83 SPU_BTI_S10_4,
84 SPU_BTI_U14,
85 SPU_BTI_16,
86 SPU_BTI_S16,
87 SPU_BTI_S16_2,
88 SPU_BTI_U16,
89 SPU_BTI_U16_2,
90 SPU_BTI_U18,
91
92 /* These correspond to the standard types */
93 SPU_BTI_INTQI,
94 SPU_BTI_INTHI,
95 SPU_BTI_INTSI,
96 SPU_BTI_INTDI,
97
98 SPU_BTI_UINTQI,
99 SPU_BTI_UINTHI,
100 SPU_BTI_UINTSI,
101 SPU_BTI_UINTDI,
102
103 SPU_BTI_FLOAT,
104 SPU_BTI_DOUBLE,
105
106 SPU_BTI_VOID,
107 SPU_BTI_PTR,
108
109 SPU_BTI_MAX
110 };
111
112 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
113 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
114 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
115 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
116 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
117 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
118 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
119 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
120 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
121 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
122
123 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
124
125 struct spu_builtin_range
126 {
127 int low, high;
128 };
129
130 static struct spu_builtin_range spu_builtin_range[] = {
131 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
132 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
133 {0ll, 0x7fll}, /* SPU_BTI_U7 */
134 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
135 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
136 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
137 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
138 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
139 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
140 {0ll, 0xffffll}, /* SPU_BTI_U16 */
141 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
143 };
144
145 \f
146 /* Target specific attribute specifications. */
147 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
148
149 /* Prototypes and external defs. */
150 static int get_pipe (rtx insn);
151 static int spu_naked_function_p (tree func);
152 static int mem_is_padded_component_ref (rtx x);
153 static void fix_range (const char *);
154 static rtx spu_expand_load (rtx, rtx, rtx, int);
155
156 /* Which instruction set architecture to use. */
157 int spu_arch;
158 /* Which cpu are we tuning for. */
159 int spu_tune;
160
161 /* The hardware requires 8 insns between a hint and the branch it
162 effects. This variable describes how many rtl instructions the
163 compiler needs to see before inserting a hint, and then the compiler
164 will insert enough nops to make it at least 8 insns. The default is
165 for the compiler to allow up to 2 nops be emitted. The nops are
166 inserted in pairs, so we round down. */
167 int spu_hint_dist = (8*4) - (2*4);
168
169 enum spu_immediate {
170 SPU_NONE,
171 SPU_IL,
172 SPU_ILA,
173 SPU_ILH,
174 SPU_ILHU,
175 SPU_ORI,
176 SPU_ORHI,
177 SPU_ORBI,
178 SPU_IOHL
179 };
180 enum immediate_class
181 {
182 IC_POOL, /* constant pool */
183 IC_IL1, /* one il* instruction */
184 IC_IL2, /* both ilhu and iohl instructions */
185 IC_IL1s, /* one il* instruction */
186 IC_IL2s, /* both ilhu and iohl instructions */
187 IC_FSMBI, /* the fsmbi instruction */
188 IC_CPAT, /* one of the c*d instructions */
189 IC_FSMBI2 /* fsmbi plus 1 other instruction */
190 };
191
192 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
193 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
194 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
195 static enum immediate_class classify_immediate (rtx op,
196 enum machine_mode mode);
197
198 /* Pointer mode for __ea references. */
199 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
200
201 \f
202 /* Define the structure for the machine field in struct function. */
203 struct GTY(()) machine_function
204 {
205 /* Register to use for PIC accesses. */
206 rtx pic_reg;
207 };
208
209 /* How to allocate a 'struct machine_function'. */
210 static struct machine_function *
211 spu_init_machine_status (void)
212 {
213 return ggc_alloc_cleared_machine_function ();
214 }
215
216 /* Implement TARGET_OPTION_OVERRIDE. */
217 static void
218 spu_option_override (void)
219 {
220 /* Set up function hooks. */
221 init_machine_status = spu_init_machine_status;
222
223 /* Small loops will be unpeeled at -O3. For SPU it is more important
224 to keep code small by default. */
225 if (!flag_unroll_loops && !flag_peel_loops)
226 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
227 global_options.x_param_values,
228 global_options_set.x_param_values);
229
230 flag_omit_frame_pointer = 1;
231
232 /* Functions must be 8 byte aligned so we correctly handle dual issue */
233 if (align_functions < 8)
234 align_functions = 8;
235
236 spu_hint_dist = 8*4 - spu_max_nops*4;
237 if (spu_hint_dist < 0)
238 spu_hint_dist = 0;
239
240 if (spu_fixed_range_string)
241 fix_range (spu_fixed_range_string);
242
243 /* Determine processor architectural level. */
244 if (spu_arch_string)
245 {
246 if (strcmp (&spu_arch_string[0], "cell") == 0)
247 spu_arch = PROCESSOR_CELL;
248 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
249 spu_arch = PROCESSOR_CELLEDP;
250 else
251 error ("bad value (%s) for -march= switch", spu_arch_string);
252 }
253
254 /* Determine processor to tune for. */
255 if (spu_tune_string)
256 {
257 if (strcmp (&spu_tune_string[0], "cell") == 0)
258 spu_tune = PROCESSOR_CELL;
259 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
260 spu_tune = PROCESSOR_CELLEDP;
261 else
262 error ("bad value (%s) for -mtune= switch", spu_tune_string);
263 }
264
265 /* Change defaults according to the processor architecture. */
266 if (spu_arch == PROCESSOR_CELLEDP)
267 {
268 /* If no command line option has been otherwise specified, change
269 the default to -mno-safe-hints on celledp -- only the original
270 Cell/B.E. processors require this workaround. */
271 if (!(target_flags_explicit & MASK_SAFE_HINTS))
272 target_flags &= ~MASK_SAFE_HINTS;
273 }
274
275 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
276 }
277 \f
278 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
279 struct attribute_spec.handler. */
280
281 /* True if MODE is valid for the target. By "valid", we mean able to
282 be manipulated in non-trivial ways. In particular, this means all
283 the arithmetic is supported. */
284 static bool
285 spu_scalar_mode_supported_p (enum machine_mode mode)
286 {
287 switch (mode)
288 {
289 case QImode:
290 case HImode:
291 case SImode:
292 case SFmode:
293 case DImode:
294 case TImode:
295 case DFmode:
296 return true;
297
298 default:
299 return false;
300 }
301 }
302
303 /* Similarly for vector modes. "Supported" here is less strict. At
304 least some operations are supported; need to check optabs or builtins
305 for further details. */
306 static bool
307 spu_vector_mode_supported_p (enum machine_mode mode)
308 {
309 switch (mode)
310 {
311 case V16QImode:
312 case V8HImode:
313 case V4SImode:
314 case V2DImode:
315 case V4SFmode:
316 case V2DFmode:
317 return true;
318
319 default:
320 return false;
321 }
322 }
323
324 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
325 least significant bytes of the outer mode. This function returns
326 TRUE for the SUBREG's where this is correct. */
327 int
328 valid_subreg (rtx op)
329 {
330 enum machine_mode om = GET_MODE (op);
331 enum machine_mode im = GET_MODE (SUBREG_REG (op));
332 return om != VOIDmode && im != VOIDmode
333 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
334 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
335 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
336 }
337
338 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
339 and adjust the start offset. */
340 static rtx
341 adjust_operand (rtx op, HOST_WIDE_INT * start)
342 {
343 enum machine_mode mode;
344 int op_size;
345 /* Strip any paradoxical SUBREG. */
346 if (GET_CODE (op) == SUBREG
347 && (GET_MODE_BITSIZE (GET_MODE (op))
348 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
349 {
350 if (start)
351 *start -=
352 GET_MODE_BITSIZE (GET_MODE (op)) -
353 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
354 op = SUBREG_REG (op);
355 }
356 /* If it is smaller than SI, assure a SUBREG */
357 op_size = GET_MODE_BITSIZE (GET_MODE (op));
358 if (op_size < 32)
359 {
360 if (start)
361 *start += 32 - op_size;
362 op_size = 32;
363 }
364 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
365 mode = mode_for_size (op_size, MODE_INT, 0);
366 if (mode != GET_MODE (op))
367 op = gen_rtx_SUBREG (mode, op, 0);
368 return op;
369 }
370
371 void
372 spu_expand_extv (rtx ops[], int unsignedp)
373 {
374 rtx dst = ops[0], src = ops[1];
375 HOST_WIDE_INT width = INTVAL (ops[2]);
376 HOST_WIDE_INT start = INTVAL (ops[3]);
377 HOST_WIDE_INT align_mask;
378 rtx s0, s1, mask, r0;
379
380 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
381
382 if (MEM_P (src))
383 {
384 /* First, determine if we need 1 TImode load or 2. We need only 1
385 if the bits being extracted do not cross the alignment boundary
386 as determined by the MEM and its address. */
387
388 align_mask = -MEM_ALIGN (src);
389 if ((start & align_mask) == ((start + width - 1) & align_mask))
390 {
391 /* Alignment is sufficient for 1 load. */
392 s0 = gen_reg_rtx (TImode);
393 r0 = spu_expand_load (s0, 0, src, start / 8);
394 start &= 7;
395 if (r0)
396 emit_insn (gen_rotqby_ti (s0, s0, r0));
397 }
398 else
399 {
400 /* Need 2 loads. */
401 s0 = gen_reg_rtx (TImode);
402 s1 = gen_reg_rtx (TImode);
403 r0 = spu_expand_load (s0, s1, src, start / 8);
404 start &= 7;
405
406 gcc_assert (start + width <= 128);
407 if (r0)
408 {
409 rtx r1 = gen_reg_rtx (SImode);
410 mask = gen_reg_rtx (TImode);
411 emit_move_insn (mask, GEN_INT (-1));
412 emit_insn (gen_rotqby_ti (s0, s0, r0));
413 emit_insn (gen_rotqby_ti (s1, s1, r0));
414 if (GET_CODE (r0) == CONST_INT)
415 r1 = GEN_INT (INTVAL (r0) & 15);
416 else
417 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
418 emit_insn (gen_shlqby_ti (mask, mask, r1));
419 emit_insn (gen_selb (s0, s1, s0, mask));
420 }
421 }
422
423 }
424 else if (GET_CODE (src) == SUBREG)
425 {
426 rtx r = SUBREG_REG (src);
427 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
428 s0 = gen_reg_rtx (TImode);
429 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
430 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
431 else
432 emit_move_insn (s0, src);
433 }
434 else
435 {
436 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
437 s0 = gen_reg_rtx (TImode);
438 emit_move_insn (s0, src);
439 }
440
441 /* Now s0 is TImode and contains the bits to extract at start. */
442
443 if (start)
444 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
445
446 if (128 - width)
447 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
448
449 emit_move_insn (dst, s0);
450 }
451
452 void
453 spu_expand_insv (rtx ops[])
454 {
455 HOST_WIDE_INT width = INTVAL (ops[1]);
456 HOST_WIDE_INT start = INTVAL (ops[2]);
457 HOST_WIDE_INT maskbits;
458 enum machine_mode dst_mode;
459 rtx dst = ops[0], src = ops[3];
460 int dst_size;
461 rtx mask;
462 rtx shift_reg;
463 int shift;
464
465
466 if (GET_CODE (ops[0]) == MEM)
467 dst = gen_reg_rtx (TImode);
468 else
469 dst = adjust_operand (dst, &start);
470 dst_mode = GET_MODE (dst);
471 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
472
473 if (CONSTANT_P (src))
474 {
475 enum machine_mode m =
476 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
477 src = force_reg (m, convert_to_mode (m, src, 0));
478 }
479 src = adjust_operand (src, 0);
480
481 mask = gen_reg_rtx (dst_mode);
482 shift_reg = gen_reg_rtx (dst_mode);
483 shift = dst_size - start - width;
484
485 /* It's not safe to use subreg here because the compiler assumes
486 that the SUBREG_REG is right justified in the SUBREG. */
487 convert_move (shift_reg, src, 1);
488
489 if (shift > 0)
490 {
491 switch (dst_mode)
492 {
493 case SImode:
494 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
495 break;
496 case DImode:
497 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
498 break;
499 case TImode:
500 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
501 break;
502 default:
503 abort ();
504 }
505 }
506 else if (shift < 0)
507 abort ();
508
509 switch (dst_size)
510 {
511 case 32:
512 maskbits = (-1ll << (32 - width - start));
513 if (start)
514 maskbits += (1ll << (32 - start));
515 emit_move_insn (mask, GEN_INT (maskbits));
516 break;
517 case 64:
518 maskbits = (-1ll << (64 - width - start));
519 if (start)
520 maskbits += (1ll << (64 - start));
521 emit_move_insn (mask, GEN_INT (maskbits));
522 break;
523 case 128:
524 {
525 unsigned char arr[16];
526 int i = start / 8;
527 memset (arr, 0, sizeof (arr));
528 arr[i] = 0xff >> (start & 7);
529 for (i++; i <= (start + width - 1) / 8; i++)
530 arr[i] = 0xff;
531 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
532 emit_move_insn (mask, array_to_constant (TImode, arr));
533 }
534 break;
535 default:
536 abort ();
537 }
538 if (GET_CODE (ops[0]) == MEM)
539 {
540 rtx low = gen_reg_rtx (SImode);
541 rtx rotl = gen_reg_rtx (SImode);
542 rtx mask0 = gen_reg_rtx (TImode);
543 rtx addr;
544 rtx addr0;
545 rtx addr1;
546 rtx mem;
547
548 addr = force_reg (Pmode, XEXP (ops[0], 0));
549 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
550 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
551 emit_insn (gen_negsi2 (rotl, low));
552 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
553 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
554 mem = change_address (ops[0], TImode, addr0);
555 set_mem_alias_set (mem, 0);
556 emit_move_insn (dst, mem);
557 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
558 if (start + width > MEM_ALIGN (ops[0]))
559 {
560 rtx shl = gen_reg_rtx (SImode);
561 rtx mask1 = gen_reg_rtx (TImode);
562 rtx dst1 = gen_reg_rtx (TImode);
563 rtx mem1;
564 addr1 = plus_constant (Pmode, addr, 16);
565 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
566 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
567 emit_insn (gen_shlqby_ti (mask1, mask, shl));
568 mem1 = change_address (ops[0], TImode, addr1);
569 set_mem_alias_set (mem1, 0);
570 emit_move_insn (dst1, mem1);
571 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
572 emit_move_insn (mem1, dst1);
573 }
574 emit_move_insn (mem, dst);
575 }
576 else
577 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
578 }
579
580
581 int
582 spu_expand_block_move (rtx ops[])
583 {
584 HOST_WIDE_INT bytes, align, offset;
585 rtx src, dst, sreg, dreg, target;
586 int i;
587 if (GET_CODE (ops[2]) != CONST_INT
588 || GET_CODE (ops[3]) != CONST_INT
589 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
590 return 0;
591
592 bytes = INTVAL (ops[2]);
593 align = INTVAL (ops[3]);
594
595 if (bytes <= 0)
596 return 1;
597
598 dst = ops[0];
599 src = ops[1];
600
601 if (align == 16)
602 {
603 for (offset = 0; offset + 16 <= bytes; offset += 16)
604 {
605 dst = adjust_address (ops[0], V16QImode, offset);
606 src = adjust_address (ops[1], V16QImode, offset);
607 emit_move_insn (dst, src);
608 }
609 if (offset < bytes)
610 {
611 rtx mask;
612 unsigned char arr[16] = { 0 };
613 for (i = 0; i < bytes - offset; i++)
614 arr[i] = 0xff;
615 dst = adjust_address (ops[0], V16QImode, offset);
616 src = adjust_address (ops[1], V16QImode, offset);
617 mask = gen_reg_rtx (V16QImode);
618 sreg = gen_reg_rtx (V16QImode);
619 dreg = gen_reg_rtx (V16QImode);
620 target = gen_reg_rtx (V16QImode);
621 emit_move_insn (mask, array_to_constant (V16QImode, arr));
622 emit_move_insn (dreg, dst);
623 emit_move_insn (sreg, src);
624 emit_insn (gen_selb (target, dreg, sreg, mask));
625 emit_move_insn (dst, target);
626 }
627 return 1;
628 }
629 return 0;
630 }
631
632 enum spu_comp_code
633 { SPU_EQ, SPU_GT, SPU_GTU };
634
635 int spu_comp_icode[12][3] = {
636 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
637 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
638 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
639 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
640 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
641 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
642 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
643 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
644 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
645 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
646 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
647 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
648 };
649
650 /* Generate a compare for CODE. Return a brand-new rtx that represents
651 the result of the compare. GCC can figure this out too if we don't
652 provide all variations of compares, but GCC always wants to use
653 WORD_MODE, we can generate better code in most cases if we do it
654 ourselves. */
655 void
656 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
657 {
658 int reverse_compare = 0;
659 int reverse_test = 0;
660 rtx compare_result, eq_result;
661 rtx comp_rtx, eq_rtx;
662 enum machine_mode comp_mode;
663 enum machine_mode op_mode;
664 enum spu_comp_code scode, eq_code;
665 enum insn_code ior_code;
666 enum rtx_code code = GET_CODE (cmp);
667 rtx op0 = XEXP (cmp, 0);
668 rtx op1 = XEXP (cmp, 1);
669 int index;
670 int eq_test = 0;
671
672 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
673 and so on, to keep the constant in operand 1. */
674 if (GET_CODE (op1) == CONST_INT)
675 {
676 HOST_WIDE_INT val = INTVAL (op1) - 1;
677 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
678 switch (code)
679 {
680 case GE:
681 op1 = GEN_INT (val);
682 code = GT;
683 break;
684 case LT:
685 op1 = GEN_INT (val);
686 code = LE;
687 break;
688 case GEU:
689 op1 = GEN_INT (val);
690 code = GTU;
691 break;
692 case LTU:
693 op1 = GEN_INT (val);
694 code = LEU;
695 break;
696 default:
697 break;
698 }
699 }
700
701 /* However, if we generate an integer result, performing a reverse test
702 would require an extra negation, so avoid that where possible. */
703 if (GET_CODE (op1) == CONST_INT && is_set == 1)
704 {
705 HOST_WIDE_INT val = INTVAL (op1) + 1;
706 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
707 switch (code)
708 {
709 case LE:
710 op1 = GEN_INT (val);
711 code = LT;
712 break;
713 case LEU:
714 op1 = GEN_INT (val);
715 code = LTU;
716 break;
717 default:
718 break;
719 }
720 }
721
722 comp_mode = SImode;
723 op_mode = GET_MODE (op0);
724
725 switch (code)
726 {
727 case GE:
728 scode = SPU_GT;
729 if (HONOR_NANS (op_mode))
730 {
731 reverse_compare = 0;
732 reverse_test = 0;
733 eq_test = 1;
734 eq_code = SPU_EQ;
735 }
736 else
737 {
738 reverse_compare = 1;
739 reverse_test = 1;
740 }
741 break;
742 case LE:
743 scode = SPU_GT;
744 if (HONOR_NANS (op_mode))
745 {
746 reverse_compare = 1;
747 reverse_test = 0;
748 eq_test = 1;
749 eq_code = SPU_EQ;
750 }
751 else
752 {
753 reverse_compare = 0;
754 reverse_test = 1;
755 }
756 break;
757 case LT:
758 reverse_compare = 1;
759 reverse_test = 0;
760 scode = SPU_GT;
761 break;
762 case GEU:
763 reverse_compare = 1;
764 reverse_test = 1;
765 scode = SPU_GTU;
766 break;
767 case LEU:
768 reverse_compare = 0;
769 reverse_test = 1;
770 scode = SPU_GTU;
771 break;
772 case LTU:
773 reverse_compare = 1;
774 reverse_test = 0;
775 scode = SPU_GTU;
776 break;
777 case NE:
778 reverse_compare = 0;
779 reverse_test = 1;
780 scode = SPU_EQ;
781 break;
782
783 case EQ:
784 scode = SPU_EQ;
785 break;
786 case GT:
787 scode = SPU_GT;
788 break;
789 case GTU:
790 scode = SPU_GTU;
791 break;
792 default:
793 scode = SPU_EQ;
794 break;
795 }
796
797 switch (op_mode)
798 {
799 case QImode:
800 index = 0;
801 comp_mode = QImode;
802 break;
803 case HImode:
804 index = 1;
805 comp_mode = HImode;
806 break;
807 case SImode:
808 index = 2;
809 break;
810 case DImode:
811 index = 3;
812 break;
813 case TImode:
814 index = 4;
815 break;
816 case SFmode:
817 index = 5;
818 break;
819 case DFmode:
820 index = 6;
821 break;
822 case V16QImode:
823 index = 7;
824 comp_mode = op_mode;
825 break;
826 case V8HImode:
827 index = 8;
828 comp_mode = op_mode;
829 break;
830 case V4SImode:
831 index = 9;
832 comp_mode = op_mode;
833 break;
834 case V4SFmode:
835 index = 10;
836 comp_mode = V4SImode;
837 break;
838 case V2DFmode:
839 index = 11;
840 comp_mode = V2DImode;
841 break;
842 case V2DImode:
843 default:
844 abort ();
845 }
846
847 if (GET_MODE (op1) == DFmode
848 && (scode != SPU_GT && scode != SPU_EQ))
849 abort ();
850
851 if (is_set == 0 && op1 == const0_rtx
852 && (GET_MODE (op0) == SImode
853 || GET_MODE (op0) == HImode
854 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
855 {
856 /* Don't need to set a register with the result when we are
857 comparing against zero and branching. */
858 reverse_test = !reverse_test;
859 compare_result = op0;
860 }
861 else
862 {
863 compare_result = gen_reg_rtx (comp_mode);
864
865 if (reverse_compare)
866 {
867 rtx t = op1;
868 op1 = op0;
869 op0 = t;
870 }
871
872 if (spu_comp_icode[index][scode] == 0)
873 abort ();
874
875 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
876 (op0, op_mode))
877 op0 = force_reg (op_mode, op0);
878 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
879 (op1, op_mode))
880 op1 = force_reg (op_mode, op1);
881 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
882 op0, op1);
883 if (comp_rtx == 0)
884 abort ();
885 emit_insn (comp_rtx);
886
887 if (eq_test)
888 {
889 eq_result = gen_reg_rtx (comp_mode);
890 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
891 op0, op1);
892 if (eq_rtx == 0)
893 abort ();
894 emit_insn (eq_rtx);
895 ior_code = optab_handler (ior_optab, comp_mode);
896 gcc_assert (ior_code != CODE_FOR_nothing);
897 emit_insn (GEN_FCN (ior_code)
898 (compare_result, compare_result, eq_result));
899 }
900 }
901
902 if (is_set == 0)
903 {
904 rtx bcomp;
905 rtx loc_ref;
906
907 /* We don't have branch on QI compare insns, so we convert the
908 QI compare result to a HI result. */
909 if (comp_mode == QImode)
910 {
911 rtx old_res = compare_result;
912 compare_result = gen_reg_rtx (HImode);
913 comp_mode = HImode;
914 emit_insn (gen_extendqihi2 (compare_result, old_res));
915 }
916
917 if (reverse_test)
918 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
919 else
920 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
921
922 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
923 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
924 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
925 loc_ref, pc_rtx)));
926 }
927 else if (is_set == 2)
928 {
929 rtx target = operands[0];
930 int compare_size = GET_MODE_BITSIZE (comp_mode);
931 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
932 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
933 rtx select_mask;
934 rtx op_t = operands[2];
935 rtx op_f = operands[3];
936
937 /* The result of the comparison can be SI, HI or QI mode. Create a
938 mask based on that result. */
939 if (target_size > compare_size)
940 {
941 select_mask = gen_reg_rtx (mode);
942 emit_insn (gen_extend_compare (select_mask, compare_result));
943 }
944 else if (target_size < compare_size)
945 select_mask =
946 gen_rtx_SUBREG (mode, compare_result,
947 (compare_size - target_size) / BITS_PER_UNIT);
948 else if (comp_mode != mode)
949 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
950 else
951 select_mask = compare_result;
952
953 if (GET_MODE (target) != GET_MODE (op_t)
954 || GET_MODE (target) != GET_MODE (op_f))
955 abort ();
956
957 if (reverse_test)
958 emit_insn (gen_selb (target, op_t, op_f, select_mask));
959 else
960 emit_insn (gen_selb (target, op_f, op_t, select_mask));
961 }
962 else
963 {
964 rtx target = operands[0];
965 if (reverse_test)
966 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
967 gen_rtx_NOT (comp_mode, compare_result)));
968 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
969 emit_insn (gen_extendhisi2 (target, compare_result));
970 else if (GET_MODE (target) == SImode
971 && GET_MODE (compare_result) == QImode)
972 emit_insn (gen_extend_compare (target, compare_result));
973 else
974 emit_move_insn (target, compare_result);
975 }
976 }
977
978 HOST_WIDE_INT
979 const_double_to_hwint (rtx x)
980 {
981 HOST_WIDE_INT val;
982 REAL_VALUE_TYPE rv;
983 if (GET_MODE (x) == SFmode)
984 {
985 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
986 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
987 }
988 else if (GET_MODE (x) == DFmode)
989 {
990 long l[2];
991 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
992 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
993 val = l[0];
994 val = (val << 32) | (l[1] & 0xffffffff);
995 }
996 else
997 abort ();
998 return val;
999 }
1000
1001 rtx
1002 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1003 {
1004 long tv[2];
1005 REAL_VALUE_TYPE rv;
1006 gcc_assert (mode == SFmode || mode == DFmode);
1007
1008 if (mode == SFmode)
1009 tv[0] = (v << 32) >> 32;
1010 else if (mode == DFmode)
1011 {
1012 tv[1] = (v << 32) >> 32;
1013 tv[0] = v >> 32;
1014 }
1015 real_from_target (&rv, tv, mode);
1016 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1017 }
1018
1019 void
1020 print_operand_address (FILE * file, register rtx addr)
1021 {
1022 rtx reg;
1023 rtx offset;
1024
1025 if (GET_CODE (addr) == AND
1026 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1027 && INTVAL (XEXP (addr, 1)) == -16)
1028 addr = XEXP (addr, 0);
1029
1030 switch (GET_CODE (addr))
1031 {
1032 case REG:
1033 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1034 break;
1035
1036 case PLUS:
1037 reg = XEXP (addr, 0);
1038 offset = XEXP (addr, 1);
1039 if (GET_CODE (offset) == REG)
1040 {
1041 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1042 reg_names[REGNO (offset)]);
1043 }
1044 else if (GET_CODE (offset) == CONST_INT)
1045 {
1046 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1047 INTVAL (offset), reg_names[REGNO (reg)]);
1048 }
1049 else
1050 abort ();
1051 break;
1052
1053 case CONST:
1054 case LABEL_REF:
1055 case SYMBOL_REF:
1056 case CONST_INT:
1057 output_addr_const (file, addr);
1058 break;
1059
1060 default:
1061 debug_rtx (addr);
1062 abort ();
1063 }
1064 }
1065
1066 void
1067 print_operand (FILE * file, rtx x, int code)
1068 {
1069 enum machine_mode mode = GET_MODE (x);
1070 HOST_WIDE_INT val;
1071 unsigned char arr[16];
1072 int xcode = GET_CODE (x);
1073 int i, info;
1074 if (GET_MODE (x) == VOIDmode)
1075 switch (code)
1076 {
1077 case 'L': /* 128 bits, signed */
1078 case 'm': /* 128 bits, signed */
1079 case 'T': /* 128 bits, signed */
1080 case 't': /* 128 bits, signed */
1081 mode = TImode;
1082 break;
1083 case 'K': /* 64 bits, signed */
1084 case 'k': /* 64 bits, signed */
1085 case 'D': /* 64 bits, signed */
1086 case 'd': /* 64 bits, signed */
1087 mode = DImode;
1088 break;
1089 case 'J': /* 32 bits, signed */
1090 case 'j': /* 32 bits, signed */
1091 case 's': /* 32 bits, signed */
1092 case 'S': /* 32 bits, signed */
1093 mode = SImode;
1094 break;
1095 }
1096 switch (code)
1097 {
1098
1099 case 'j': /* 32 bits, signed */
1100 case 'k': /* 64 bits, signed */
1101 case 'm': /* 128 bits, signed */
1102 if (xcode == CONST_INT
1103 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1104 {
1105 gcc_assert (logical_immediate_p (x, mode));
1106 constant_to_array (mode, x, arr);
1107 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1108 val = trunc_int_for_mode (val, SImode);
1109 switch (which_logical_immediate (val))
1110 {
1111 case SPU_ORI:
1112 break;
1113 case SPU_ORHI:
1114 fprintf (file, "h");
1115 break;
1116 case SPU_ORBI:
1117 fprintf (file, "b");
1118 break;
1119 default:
1120 gcc_unreachable();
1121 }
1122 }
1123 else
1124 gcc_unreachable();
1125 return;
1126
1127 case 'J': /* 32 bits, signed */
1128 case 'K': /* 64 bits, signed */
1129 case 'L': /* 128 bits, signed */
1130 if (xcode == CONST_INT
1131 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1132 {
1133 gcc_assert (logical_immediate_p (x, mode)
1134 || iohl_immediate_p (x, mode));
1135 constant_to_array (mode, x, arr);
1136 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1137 val = trunc_int_for_mode (val, SImode);
1138 switch (which_logical_immediate (val))
1139 {
1140 case SPU_ORI:
1141 case SPU_IOHL:
1142 break;
1143 case SPU_ORHI:
1144 val = trunc_int_for_mode (val, HImode);
1145 break;
1146 case SPU_ORBI:
1147 val = trunc_int_for_mode (val, QImode);
1148 break;
1149 default:
1150 gcc_unreachable();
1151 }
1152 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1153 }
1154 else
1155 gcc_unreachable();
1156 return;
1157
1158 case 't': /* 128 bits, signed */
1159 case 'd': /* 64 bits, signed */
1160 case 's': /* 32 bits, signed */
1161 if (CONSTANT_P (x))
1162 {
1163 enum immediate_class c = classify_immediate (x, mode);
1164 switch (c)
1165 {
1166 case IC_IL1:
1167 constant_to_array (mode, x, arr);
1168 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1169 val = trunc_int_for_mode (val, SImode);
1170 switch (which_immediate_load (val))
1171 {
1172 case SPU_IL:
1173 break;
1174 case SPU_ILA:
1175 fprintf (file, "a");
1176 break;
1177 case SPU_ILH:
1178 fprintf (file, "h");
1179 break;
1180 case SPU_ILHU:
1181 fprintf (file, "hu");
1182 break;
1183 default:
1184 gcc_unreachable ();
1185 }
1186 break;
1187 case IC_CPAT:
1188 constant_to_array (mode, x, arr);
1189 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1190 if (info == 1)
1191 fprintf (file, "b");
1192 else if (info == 2)
1193 fprintf (file, "h");
1194 else if (info == 4)
1195 fprintf (file, "w");
1196 else if (info == 8)
1197 fprintf (file, "d");
1198 break;
1199 case IC_IL1s:
1200 if (xcode == CONST_VECTOR)
1201 {
1202 x = CONST_VECTOR_ELT (x, 0);
1203 xcode = GET_CODE (x);
1204 }
1205 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1206 fprintf (file, "a");
1207 else if (xcode == HIGH)
1208 fprintf (file, "hu");
1209 break;
1210 case IC_FSMBI:
1211 case IC_FSMBI2:
1212 case IC_IL2:
1213 case IC_IL2s:
1214 case IC_POOL:
1215 abort ();
1216 }
1217 }
1218 else
1219 gcc_unreachable ();
1220 return;
1221
1222 case 'T': /* 128 bits, signed */
1223 case 'D': /* 64 bits, signed */
1224 case 'S': /* 32 bits, signed */
1225 if (CONSTANT_P (x))
1226 {
1227 enum immediate_class c = classify_immediate (x, mode);
1228 switch (c)
1229 {
1230 case IC_IL1:
1231 constant_to_array (mode, x, arr);
1232 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1233 val = trunc_int_for_mode (val, SImode);
1234 switch (which_immediate_load (val))
1235 {
1236 case SPU_IL:
1237 case SPU_ILA:
1238 break;
1239 case SPU_ILH:
1240 case SPU_ILHU:
1241 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1242 break;
1243 default:
1244 gcc_unreachable ();
1245 }
1246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1247 break;
1248 case IC_FSMBI:
1249 constant_to_array (mode, x, arr);
1250 val = 0;
1251 for (i = 0; i < 16; i++)
1252 {
1253 val <<= 1;
1254 val |= arr[i] & 1;
1255 }
1256 print_operand (file, GEN_INT (val), 0);
1257 break;
1258 case IC_CPAT:
1259 constant_to_array (mode, x, arr);
1260 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1262 break;
1263 case IC_IL1s:
1264 if (xcode == HIGH)
1265 x = XEXP (x, 0);
1266 if (GET_CODE (x) == CONST_VECTOR)
1267 x = CONST_VECTOR_ELT (x, 0);
1268 output_addr_const (file, x);
1269 if (xcode == HIGH)
1270 fprintf (file, "@h");
1271 break;
1272 case IC_IL2:
1273 case IC_IL2s:
1274 case IC_FSMBI2:
1275 case IC_POOL:
1276 abort ();
1277 }
1278 }
1279 else
1280 gcc_unreachable ();
1281 return;
1282
1283 case 'C':
1284 if (xcode == CONST_INT)
1285 {
1286 /* Only 4 least significant bits are relevant for generate
1287 control word instructions. */
1288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1289 return;
1290 }
1291 break;
1292
1293 case 'M': /* print code for c*d */
1294 if (GET_CODE (x) == CONST_INT)
1295 switch (INTVAL (x))
1296 {
1297 case 1:
1298 fprintf (file, "b");
1299 break;
1300 case 2:
1301 fprintf (file, "h");
1302 break;
1303 case 4:
1304 fprintf (file, "w");
1305 break;
1306 case 8:
1307 fprintf (file, "d");
1308 break;
1309 default:
1310 gcc_unreachable();
1311 }
1312 else
1313 gcc_unreachable();
1314 return;
1315
1316 case 'N': /* Negate the operand */
1317 if (xcode == CONST_INT)
1318 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1319 else if (xcode == CONST_VECTOR)
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1321 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1322 return;
1323
1324 case 'I': /* enable/disable interrupts */
1325 if (xcode == CONST_INT)
1326 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1327 return;
1328
1329 case 'b': /* branch modifiers */
1330 if (xcode == REG)
1331 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1332 else if (COMPARISON_P (x))
1333 fprintf (file, "%s", xcode == NE ? "n" : "");
1334 return;
1335
1336 case 'i': /* indirect call */
1337 if (xcode == MEM)
1338 {
1339 if (GET_CODE (XEXP (x, 0)) == REG)
1340 /* Used in indirect function calls. */
1341 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1342 else
1343 output_address (XEXP (x, 0));
1344 }
1345 return;
1346
1347 case 'p': /* load/store */
1348 if (xcode == MEM)
1349 {
1350 x = XEXP (x, 0);
1351 xcode = GET_CODE (x);
1352 }
1353 if (xcode == AND)
1354 {
1355 x = XEXP (x, 0);
1356 xcode = GET_CODE (x);
1357 }
1358 if (xcode == REG)
1359 fprintf (file, "d");
1360 else if (xcode == CONST_INT)
1361 fprintf (file, "a");
1362 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1363 fprintf (file, "r");
1364 else if (xcode == PLUS || xcode == LO_SUM)
1365 {
1366 if (GET_CODE (XEXP (x, 1)) == REG)
1367 fprintf (file, "x");
1368 else
1369 fprintf (file, "d");
1370 }
1371 return;
1372
1373 case 'e':
1374 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1375 val &= 0x7;
1376 output_addr_const (file, GEN_INT (val));
1377 return;
1378
1379 case 'f':
1380 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1381 val &= 0x1f;
1382 output_addr_const (file, GEN_INT (val));
1383 return;
1384
1385 case 'g':
1386 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1387 val &= 0x3f;
1388 output_addr_const (file, GEN_INT (val));
1389 return;
1390
1391 case 'h':
1392 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1393 val = (val >> 3) & 0x1f;
1394 output_addr_const (file, GEN_INT (val));
1395 return;
1396
1397 case 'E':
1398 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1399 val = -val;
1400 val &= 0x7;
1401 output_addr_const (file, GEN_INT (val));
1402 return;
1403
1404 case 'F':
1405 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1406 val = -val;
1407 val &= 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'G':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val = -val;
1414 val &= 0x3f;
1415 output_addr_const (file, GEN_INT (val));
1416 return;
1417
1418 case 'H':
1419 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420 val = -(val & -8ll);
1421 val = (val >> 3) & 0x1f;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1424
1425 case 'v':
1426 case 'w':
1427 constant_to_array (mode, x, arr);
1428 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1429 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1430 return;
1431
1432 case 0:
1433 if (xcode == REG)
1434 fprintf (file, "%s", reg_names[REGNO (x)]);
1435 else if (xcode == MEM)
1436 output_address (XEXP (x, 0));
1437 else if (xcode == CONST_VECTOR)
1438 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1439 else
1440 output_addr_const (file, x);
1441 return;
1442
1443 /* unused letters
1444 o qr u yz
1445 AB OPQR UVWXYZ */
1446 default:
1447 output_operand_lossage ("invalid %%xn code");
1448 }
1449 gcc_unreachable ();
1450 }
1451
1452 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1453 caller saved register. For leaf functions it is more efficient to
1454 use a volatile register because we won't need to save and restore the
1455 pic register. This routine is only valid after register allocation
1456 is completed, so we can pick an unused register. */
1457 static rtx
1458 get_pic_reg (void)
1459 {
1460 if (!reload_completed && !reload_in_progress)
1461 abort ();
1462
1463 /* If we've already made the decision, we need to keep with it. Once we've
1464 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1465 return true since the register is now live; this should not cause us to
1466 "switch back" to using pic_offset_table_rtx. */
1467 if (!cfun->machine->pic_reg)
1468 {
1469 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1470 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1471 else
1472 cfun->machine->pic_reg = pic_offset_table_rtx;
1473 }
1474
1475 return cfun->machine->pic_reg;
1476 }
1477
1478 /* Split constant addresses to handle cases that are too large.
1479 Add in the pic register when in PIC mode.
1480 Split immediates that require more than 1 instruction. */
1481 int
1482 spu_split_immediate (rtx * ops)
1483 {
1484 enum machine_mode mode = GET_MODE (ops[0]);
1485 enum immediate_class c = classify_immediate (ops[1], mode);
1486
1487 switch (c)
1488 {
1489 case IC_IL2:
1490 {
1491 unsigned char arrhi[16];
1492 unsigned char arrlo[16];
1493 rtx to, temp, hi, lo;
1494 int i;
1495 enum machine_mode imode = mode;
1496 /* We need to do reals as ints because the constant used in the
1497 IOR might not be a legitimate real constant. */
1498 imode = int_mode_for_mode (mode);
1499 constant_to_array (mode, ops[1], arrhi);
1500 if (imode != mode)
1501 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1502 else
1503 to = ops[0];
1504 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1505 for (i = 0; i < 16; i += 4)
1506 {
1507 arrlo[i + 2] = arrhi[i + 2];
1508 arrlo[i + 3] = arrhi[i + 3];
1509 arrlo[i + 0] = arrlo[i + 1] = 0;
1510 arrhi[i + 2] = arrhi[i + 3] = 0;
1511 }
1512 hi = array_to_constant (imode, arrhi);
1513 lo = array_to_constant (imode, arrlo);
1514 emit_move_insn (temp, hi);
1515 emit_insn (gen_rtx_SET
1516 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1517 return 1;
1518 }
1519 case IC_FSMBI2:
1520 {
1521 unsigned char arr_fsmbi[16];
1522 unsigned char arr_andbi[16];
1523 rtx to, reg_fsmbi, reg_and;
1524 int i;
1525 enum machine_mode imode = mode;
1526 /* We need to do reals as ints because the constant used in the
1527 * AND might not be a legitimate real constant. */
1528 imode = int_mode_for_mode (mode);
1529 constant_to_array (mode, ops[1], arr_fsmbi);
1530 if (imode != mode)
1531 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1532 else
1533 to = ops[0];
1534 for (i = 0; i < 16; i++)
1535 if (arr_fsmbi[i] != 0)
1536 {
1537 arr_andbi[0] = arr_fsmbi[i];
1538 arr_fsmbi[i] = 0xff;
1539 }
1540 for (i = 1; i < 16; i++)
1541 arr_andbi[i] = arr_andbi[0];
1542 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1543 reg_and = array_to_constant (imode, arr_andbi);
1544 emit_move_insn (to, reg_fsmbi);
1545 emit_insn (gen_rtx_SET
1546 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1547 return 1;
1548 }
1549 case IC_POOL:
1550 if (reload_in_progress || reload_completed)
1551 {
1552 rtx mem = force_const_mem (mode, ops[1]);
1553 if (TARGET_LARGE_MEM)
1554 {
1555 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1556 emit_move_insn (addr, XEXP (mem, 0));
1557 mem = replace_equiv_address (mem, addr);
1558 }
1559 emit_move_insn (ops[0], mem);
1560 return 1;
1561 }
1562 break;
1563 case IC_IL1s:
1564 case IC_IL2s:
1565 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1566 {
1567 if (c == IC_IL2s)
1568 {
1569 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1570 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1571 }
1572 else if (flag_pic)
1573 emit_insn (gen_pic (ops[0], ops[1]));
1574 if (flag_pic)
1575 {
1576 rtx pic_reg = get_pic_reg ();
1577 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1578 }
1579 return flag_pic || c == IC_IL2s;
1580 }
1581 break;
1582 case IC_IL1:
1583 case IC_FSMBI:
1584 case IC_CPAT:
1585 break;
1586 }
1587 return 0;
1588 }
1589
1590 /* SAVING is TRUE when we are generating the actual load and store
1591 instructions for REGNO. When determining the size of the stack
1592 needed for saving register we must allocate enough space for the
1593 worst case, because we don't always have the information early enough
1594 to not allocate it. But we can at least eliminate the actual loads
1595 and stores during the prologue/epilogue. */
1596 static int
1597 need_to_save_reg (int regno, int saving)
1598 {
1599 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1600 return 1;
1601 if (flag_pic
1602 && regno == PIC_OFFSET_TABLE_REGNUM
1603 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1604 return 1;
1605 return 0;
1606 }
1607
1608 /* This function is only correct starting with local register
1609 allocation */
1610 int
1611 spu_saved_regs_size (void)
1612 {
1613 int reg_save_size = 0;
1614 int regno;
1615
1616 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1617 if (need_to_save_reg (regno, 0))
1618 reg_save_size += 0x10;
1619 return reg_save_size;
1620 }
1621
1622 static rtx
1623 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1624 {
1625 rtx reg = gen_rtx_REG (V4SImode, regno);
1626 rtx mem =
1627 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1628 return emit_insn (gen_movv4si (mem, reg));
1629 }
1630
1631 static rtx
1632 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1633 {
1634 rtx reg = gen_rtx_REG (V4SImode, regno);
1635 rtx mem =
1636 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1637 return emit_insn (gen_movv4si (reg, mem));
1638 }
1639
1640 /* This happens after reload, so we need to expand it. */
1641 static rtx
1642 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1643 {
1644 rtx insn;
1645 if (satisfies_constraint_K (GEN_INT (imm)))
1646 {
1647 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1648 }
1649 else
1650 {
1651 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1652 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1653 if (REGNO (src) == REGNO (scratch))
1654 abort ();
1655 }
1656 return insn;
1657 }
1658
1659 /* Return nonzero if this function is known to have a null epilogue. */
1660
1661 int
1662 direct_return (void)
1663 {
1664 if (reload_completed)
1665 {
1666 if (cfun->static_chain_decl == 0
1667 && (spu_saved_regs_size ()
1668 + get_frame_size ()
1669 + crtl->outgoing_args_size
1670 + crtl->args.pretend_args_size == 0)
1671 && crtl->is_leaf)
1672 return 1;
1673 }
1674 return 0;
1675 }
1676
1677 /*
1678 The stack frame looks like this:
1679 +-------------+
1680 | incoming |
1681 | args |
1682 AP -> +-------------+
1683 | $lr save |
1684 +-------------+
1685 prev SP | back chain |
1686 +-------------+
1687 | var args |
1688 | reg save | crtl->args.pretend_args_size bytes
1689 +-------------+
1690 | ... |
1691 | saved regs | spu_saved_regs_size() bytes
1692 FP -> +-------------+
1693 | ... |
1694 | vars | get_frame_size() bytes
1695 HFP -> +-------------+
1696 | ... |
1697 | outgoing |
1698 | args | crtl->outgoing_args_size bytes
1699 +-------------+
1700 | $lr of next |
1701 | frame |
1702 +-------------+
1703 | back chain |
1704 SP -> +-------------+
1705
1706 */
1707 void
1708 spu_expand_prologue (void)
1709 {
1710 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1711 HOST_WIDE_INT total_size;
1712 HOST_WIDE_INT saved_regs_size;
1713 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1714 rtx scratch_reg_0, scratch_reg_1;
1715 rtx insn, real;
1716
1717 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1718 cfun->machine->pic_reg = pic_offset_table_rtx;
1719
1720 if (spu_naked_function_p (current_function_decl))
1721 return;
1722
1723 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1724 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1725
1726 saved_regs_size = spu_saved_regs_size ();
1727 total_size = size + saved_regs_size
1728 + crtl->outgoing_args_size
1729 + crtl->args.pretend_args_size;
1730
1731 if (!crtl->is_leaf
1732 || cfun->calls_alloca || total_size > 0)
1733 total_size += STACK_POINTER_OFFSET;
1734
1735 /* Save this first because code after this might use the link
1736 register as a scratch register. */
1737 if (!crtl->is_leaf)
1738 {
1739 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1740 RTX_FRAME_RELATED_P (insn) = 1;
1741 }
1742
1743 if (total_size > 0)
1744 {
1745 offset = -crtl->args.pretend_args_size;
1746 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1747 if (need_to_save_reg (regno, 1))
1748 {
1749 offset -= 16;
1750 insn = frame_emit_store (regno, sp_reg, offset);
1751 RTX_FRAME_RELATED_P (insn) = 1;
1752 }
1753 }
1754
1755 if (flag_pic && cfun->machine->pic_reg)
1756 {
1757 rtx pic_reg = cfun->machine->pic_reg;
1758 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1759 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1760 }
1761
1762 if (total_size > 0)
1763 {
1764 if (flag_stack_check)
1765 {
1766 /* We compare against total_size-1 because
1767 ($sp >= total_size) <=> ($sp > total_size-1) */
1768 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1769 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1770 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1771 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1772 {
1773 emit_move_insn (scratch_v4si, size_v4si);
1774 size_v4si = scratch_v4si;
1775 }
1776 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1777 emit_insn (gen_vec_extractv4si
1778 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1779 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1780 }
1781
1782 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1783 the value of the previous $sp because we save it as the back
1784 chain. */
1785 if (total_size <= 2000)
1786 {
1787 /* In this case we save the back chain first. */
1788 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1789 insn =
1790 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1791 }
1792 else
1793 {
1794 insn = emit_move_insn (scratch_reg_0, sp_reg);
1795 insn =
1796 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1797 }
1798 RTX_FRAME_RELATED_P (insn) = 1;
1799 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1800 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1801
1802 if (total_size > 2000)
1803 {
1804 /* Save the back chain ptr */
1805 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1806 }
1807
1808 if (frame_pointer_needed)
1809 {
1810 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1811 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1812 + crtl->outgoing_args_size;
1813 /* Set the new frame_pointer */
1814 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1815 RTX_FRAME_RELATED_P (insn) = 1;
1816 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1817 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1818 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1819 }
1820 }
1821
1822 if (flag_stack_usage_info)
1823 current_function_static_stack_size = total_size;
1824 }
1825
1826 void
1827 spu_expand_epilogue (bool sibcall_p)
1828 {
1829 int size = get_frame_size (), offset, regno;
1830 HOST_WIDE_INT saved_regs_size, total_size;
1831 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1832 rtx scratch_reg_0;
1833
1834 if (spu_naked_function_p (current_function_decl))
1835 return;
1836
1837 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1838
1839 saved_regs_size = spu_saved_regs_size ();
1840 total_size = size + saved_regs_size
1841 + crtl->outgoing_args_size
1842 + crtl->args.pretend_args_size;
1843
1844 if (!crtl->is_leaf
1845 || cfun->calls_alloca || total_size > 0)
1846 total_size += STACK_POINTER_OFFSET;
1847
1848 if (total_size > 0)
1849 {
1850 if (cfun->calls_alloca)
1851 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1852 else
1853 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1854
1855
1856 if (saved_regs_size > 0)
1857 {
1858 offset = -crtl->args.pretend_args_size;
1859 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1860 if (need_to_save_reg (regno, 1))
1861 {
1862 offset -= 0x10;
1863 frame_emit_load (regno, sp_reg, offset);
1864 }
1865 }
1866 }
1867
1868 if (!crtl->is_leaf)
1869 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1870
1871 if (!sibcall_p)
1872 {
1873 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1874 emit_jump_insn (gen__return ());
1875 }
1876 }
1877
1878 rtx
1879 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1880 {
1881 if (count != 0)
1882 return 0;
1883 /* This is inefficient because it ends up copying to a save-register
1884 which then gets saved even though $lr has already been saved. But
1885 it does generate better code for leaf functions and we don't need
1886 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1887 used for __builtin_return_address anyway, so maybe we don't care if
1888 it's inefficient. */
1889 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1890 }
1891 \f
1892
1893 /* Given VAL, generate a constant appropriate for MODE.
1894 If MODE is a vector mode, every element will be VAL.
1895 For TImode, VAL will be zero extended to 128 bits. */
1896 rtx
1897 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1898 {
1899 rtx inner;
1900 rtvec v;
1901 int units, i;
1902
1903 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1904 || GET_MODE_CLASS (mode) == MODE_FLOAT
1905 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1906 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1907
1908 if (GET_MODE_CLASS (mode) == MODE_INT)
1909 return immed_double_const (val, 0, mode);
1910
1911 /* val is the bit representation of the float */
1912 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1913 return hwint_to_const_double (mode, val);
1914
1915 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1916 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1917 else
1918 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1919
1920 units = GET_MODE_NUNITS (mode);
1921
1922 v = rtvec_alloc (units);
1923
1924 for (i = 0; i < units; ++i)
1925 RTVEC_ELT (v, i) = inner;
1926
1927 return gen_rtx_CONST_VECTOR (mode, v);
1928 }
1929
1930 /* Create a MODE vector constant from 4 ints. */
1931 rtx
1932 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1933 {
1934 unsigned char arr[16];
1935 arr[0] = (a >> 24) & 0xff;
1936 arr[1] = (a >> 16) & 0xff;
1937 arr[2] = (a >> 8) & 0xff;
1938 arr[3] = (a >> 0) & 0xff;
1939 arr[4] = (b >> 24) & 0xff;
1940 arr[5] = (b >> 16) & 0xff;
1941 arr[6] = (b >> 8) & 0xff;
1942 arr[7] = (b >> 0) & 0xff;
1943 arr[8] = (c >> 24) & 0xff;
1944 arr[9] = (c >> 16) & 0xff;
1945 arr[10] = (c >> 8) & 0xff;
1946 arr[11] = (c >> 0) & 0xff;
1947 arr[12] = (d >> 24) & 0xff;
1948 arr[13] = (d >> 16) & 0xff;
1949 arr[14] = (d >> 8) & 0xff;
1950 arr[15] = (d >> 0) & 0xff;
1951 return array_to_constant(mode, arr);
1952 }
1953 \f
1954 /* branch hint stuff */
1955
1956 /* An array of these is used to propagate hints to predecessor blocks. */
1957 struct spu_bb_info
1958 {
1959 rtx prop_jump; /* propagated from another block */
1960 int bb_index; /* the original block. */
1961 };
1962 static struct spu_bb_info *spu_bb_info;
1963
1964 #define STOP_HINT_P(INSN) \
1965 (CALL_P(INSN) \
1966 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1967 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1968
1969 /* 1 when RTX is a hinted branch or its target. We keep track of
1970 what has been hinted so the safe-hint code can test it easily. */
1971 #define HINTED_P(RTX) \
1972 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1973
1974 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1975 #define SCHED_ON_EVEN_P(RTX) \
1976 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1977
1978 /* Emit a nop for INSN such that the two will dual issue. This assumes
1979 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1980 We check for TImode to handle a MULTI1 insn which has dual issued its
1981 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1982 static void
1983 emit_nop_for_insn (rtx insn)
1984 {
1985 int p;
1986 rtx new_insn;
1987
1988 /* We need to handle JUMP_TABLE_DATA separately. */
1989 if (JUMP_TABLE_DATA_P (insn))
1990 {
1991 new_insn = emit_insn_after (gen_lnop(), insn);
1992 recog_memoized (new_insn);
1993 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1994 return;
1995 }
1996
1997 p = get_pipe (insn);
1998 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1999 new_insn = emit_insn_after (gen_lnop (), insn);
2000 else if (p == 1 && GET_MODE (insn) == TImode)
2001 {
2002 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2003 PUT_MODE (new_insn, TImode);
2004 PUT_MODE (insn, VOIDmode);
2005 }
2006 else
2007 new_insn = emit_insn_after (gen_lnop (), insn);
2008 recog_memoized (new_insn);
2009 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2010 }
2011
2012 /* Insert nops in basic blocks to meet dual issue alignment
2013 requirements. Also make sure hbrp and hint instructions are at least
2014 one cycle apart, possibly inserting a nop. */
2015 static void
2016 pad_bb(void)
2017 {
2018 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2019 int length;
2020 int addr;
2021
2022 /* This sets up INSN_ADDRESSES. */
2023 shorten_branches (get_insns ());
2024
2025 /* Keep track of length added by nops. */
2026 length = 0;
2027
2028 prev_insn = 0;
2029 insn = get_insns ();
2030 if (!active_insn_p (insn))
2031 insn = next_active_insn (insn);
2032 for (; insn; insn = next_insn)
2033 {
2034 next_insn = next_active_insn (insn);
2035 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2036 || INSN_CODE (insn) == CODE_FOR_hbr)
2037 {
2038 if (hbr_insn)
2039 {
2040 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2041 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2042 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2043 || (a1 - a0 == 4))
2044 {
2045 prev_insn = emit_insn_before (gen_lnop (), insn);
2046 PUT_MODE (prev_insn, GET_MODE (insn));
2047 PUT_MODE (insn, TImode);
2048 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2049 length += 4;
2050 }
2051 }
2052 hbr_insn = insn;
2053 }
2054 if (INSN_CODE (insn) == CODE_FOR_blockage)
2055 {
2056 if (GET_MODE (insn) == TImode)
2057 PUT_MODE (next_insn, TImode);
2058 insn = next_insn;
2059 next_insn = next_active_insn (insn);
2060 }
2061 addr = INSN_ADDRESSES (INSN_UID (insn));
2062 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2063 {
2064 if (((addr + length) & 7) != 0)
2065 {
2066 emit_nop_for_insn (prev_insn);
2067 length += 4;
2068 }
2069 }
2070 else if (GET_MODE (insn) == TImode
2071 && ((next_insn && GET_MODE (next_insn) != TImode)
2072 || get_attr_type (insn) == TYPE_MULTI0)
2073 && ((addr + length) & 7) != 0)
2074 {
2075 /* prev_insn will always be set because the first insn is
2076 always 8-byte aligned. */
2077 emit_nop_for_insn (prev_insn);
2078 length += 4;
2079 }
2080 prev_insn = insn;
2081 }
2082 }
2083
2084 \f
2085 /* Routines for branch hints. */
2086
2087 static void
2088 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2089 int distance, sbitmap blocks)
2090 {
2091 rtx branch_label = 0;
2092 rtx hint;
2093 rtx insn;
2094 rtx table;
2095
2096 if (before == 0 || branch == 0 || target == 0)
2097 return;
2098
2099 /* While scheduling we require hints to be no further than 600, so
2100 we need to enforce that here too */
2101 if (distance > 600)
2102 return;
2103
2104 /* If we have a Basic block note, emit it after the basic block note. */
2105 if (NOTE_INSN_BASIC_BLOCK_P (before))
2106 before = NEXT_INSN (before);
2107
2108 branch_label = gen_label_rtx ();
2109 LABEL_NUSES (branch_label)++;
2110 LABEL_PRESERVE_P (branch_label) = 1;
2111 insn = emit_label_before (branch_label, branch);
2112 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2113 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2114
2115 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2116 recog_memoized (hint);
2117 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2118 HINTED_P (branch) = 1;
2119
2120 if (GET_CODE (target) == LABEL_REF)
2121 HINTED_P (XEXP (target, 0)) = 1;
2122 else if (tablejump_p (branch, 0, &table))
2123 {
2124 rtvec vec;
2125 int j;
2126 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2127 vec = XVEC (PATTERN (table), 0);
2128 else
2129 vec = XVEC (PATTERN (table), 1);
2130 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2131 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2132 }
2133
2134 if (distance >= 588)
2135 {
2136 /* Make sure the hint isn't scheduled any earlier than this point,
2137 which could make it too far for the branch offest to fit */
2138 insn = emit_insn_before (gen_blockage (), hint);
2139 recog_memoized (insn);
2140 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2141 }
2142 else if (distance <= 8 * 4)
2143 {
2144 /* To guarantee at least 8 insns between the hint and branch we
2145 insert nops. */
2146 int d;
2147 for (d = distance; d < 8 * 4; d += 4)
2148 {
2149 insn =
2150 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2151 recog_memoized (insn);
2152 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2153 }
2154
2155 /* Make sure any nops inserted aren't scheduled before the hint. */
2156 insn = emit_insn_after (gen_blockage (), hint);
2157 recog_memoized (insn);
2158 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2159
2160 /* Make sure any nops inserted aren't scheduled after the call. */
2161 if (CALL_P (branch) && distance < 8 * 4)
2162 {
2163 insn = emit_insn_before (gen_blockage (), branch);
2164 recog_memoized (insn);
2165 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2166 }
2167 }
2168 }
2169
2170 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2171 the rtx for the branch target. */
2172 static rtx
2173 get_branch_target (rtx branch)
2174 {
2175 if (JUMP_P (branch))
2176 {
2177 rtx set, src;
2178
2179 /* Return statements */
2180 if (GET_CODE (PATTERN (branch)) == RETURN)
2181 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2182
2183 /* ASM GOTOs. */
2184 if (extract_asm_operands (PATTERN (branch)) != NULL)
2185 return NULL;
2186
2187 set = single_set (branch);
2188 src = SET_SRC (set);
2189 if (GET_CODE (SET_DEST (set)) != PC)
2190 abort ();
2191
2192 if (GET_CODE (src) == IF_THEN_ELSE)
2193 {
2194 rtx lab = 0;
2195 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2196 if (note)
2197 {
2198 /* If the more probable case is not a fall through, then
2199 try a branch hint. */
2200 int prob = XINT (note, 0);
2201 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2202 && GET_CODE (XEXP (src, 1)) != PC)
2203 lab = XEXP (src, 1);
2204 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2205 && GET_CODE (XEXP (src, 2)) != PC)
2206 lab = XEXP (src, 2);
2207 }
2208 if (lab)
2209 {
2210 if (GET_CODE (lab) == RETURN)
2211 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2212 return lab;
2213 }
2214 return 0;
2215 }
2216
2217 return src;
2218 }
2219 else if (CALL_P (branch))
2220 {
2221 rtx call;
2222 /* All of our call patterns are in a PARALLEL and the CALL is
2223 the first pattern in the PARALLEL. */
2224 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2225 abort ();
2226 call = XVECEXP (PATTERN (branch), 0, 0);
2227 if (GET_CODE (call) == SET)
2228 call = SET_SRC (call);
2229 if (GET_CODE (call) != CALL)
2230 abort ();
2231 return XEXP (XEXP (call, 0), 0);
2232 }
2233 return 0;
2234 }
2235
2236 /* The special $hbr register is used to prevent the insn scheduler from
2237 moving hbr insns across instructions which invalidate them. It
2238 should only be used in a clobber, and this function searches for
2239 insns which clobber it. */
2240 static bool
2241 insn_clobbers_hbr (rtx insn)
2242 {
2243 if (INSN_P (insn)
2244 && GET_CODE (PATTERN (insn)) == PARALLEL)
2245 {
2246 rtx parallel = PATTERN (insn);
2247 rtx clobber;
2248 int j;
2249 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2250 {
2251 clobber = XVECEXP (parallel, 0, j);
2252 if (GET_CODE (clobber) == CLOBBER
2253 && GET_CODE (XEXP (clobber, 0)) == REG
2254 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2255 return 1;
2256 }
2257 }
2258 return 0;
2259 }
2260
2261 /* Search up to 32 insns starting at FIRST:
2262 - at any kind of hinted branch, just return
2263 - at any unconditional branch in the first 15 insns, just return
2264 - at a call or indirect branch, after the first 15 insns, force it to
2265 an even address and return
2266 - at any unconditional branch, after the first 15 insns, force it to
2267 an even address.
2268 At then end of the search, insert an hbrp within 4 insns of FIRST,
2269 and an hbrp within 16 instructions of FIRST.
2270 */
2271 static void
2272 insert_hbrp_for_ilb_runout (rtx first)
2273 {
2274 rtx insn, before_4 = 0, before_16 = 0;
2275 int addr = 0, length, first_addr = -1;
2276 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2277 int insert_lnop_after = 0;
2278 for (insn = first; insn; insn = NEXT_INSN (insn))
2279 if (INSN_P (insn))
2280 {
2281 if (first_addr == -1)
2282 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2283 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2284 length = get_attr_length (insn);
2285
2286 if (before_4 == 0 && addr + length >= 4 * 4)
2287 before_4 = insn;
2288 /* We test for 14 instructions because the first hbrp will add
2289 up to 2 instructions. */
2290 if (before_16 == 0 && addr + length >= 14 * 4)
2291 before_16 = insn;
2292
2293 if (INSN_CODE (insn) == CODE_FOR_hbr)
2294 {
2295 /* Make sure an hbrp is at least 2 cycles away from a hint.
2296 Insert an lnop after the hbrp when necessary. */
2297 if (before_4 == 0 && addr > 0)
2298 {
2299 before_4 = insn;
2300 insert_lnop_after |= 1;
2301 }
2302 else if (before_4 && addr <= 4 * 4)
2303 insert_lnop_after |= 1;
2304 if (before_16 == 0 && addr > 10 * 4)
2305 {
2306 before_16 = insn;
2307 insert_lnop_after |= 2;
2308 }
2309 else if (before_16 && addr <= 14 * 4)
2310 insert_lnop_after |= 2;
2311 }
2312
2313 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2314 {
2315 if (addr < hbrp_addr0)
2316 hbrp_addr0 = addr;
2317 else if (addr < hbrp_addr1)
2318 hbrp_addr1 = addr;
2319 }
2320
2321 if (CALL_P (insn) || JUMP_P (insn))
2322 {
2323 if (HINTED_P (insn))
2324 return;
2325
2326 /* Any branch after the first 15 insns should be on an even
2327 address to avoid a special case branch. There might be
2328 some nops and/or hbrps inserted, so we test after 10
2329 insns. */
2330 if (addr > 10 * 4)
2331 SCHED_ON_EVEN_P (insn) = 1;
2332 }
2333
2334 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2335 return;
2336
2337
2338 if (addr + length >= 32 * 4)
2339 {
2340 gcc_assert (before_4 && before_16);
2341 if (hbrp_addr0 > 4 * 4)
2342 {
2343 insn =
2344 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2345 recog_memoized (insn);
2346 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2347 INSN_ADDRESSES_NEW (insn,
2348 INSN_ADDRESSES (INSN_UID (before_4)));
2349 PUT_MODE (insn, GET_MODE (before_4));
2350 PUT_MODE (before_4, TImode);
2351 if (insert_lnop_after & 1)
2352 {
2353 insn = emit_insn_before (gen_lnop (), before_4);
2354 recog_memoized (insn);
2355 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2356 INSN_ADDRESSES_NEW (insn,
2357 INSN_ADDRESSES (INSN_UID (before_4)));
2358 PUT_MODE (insn, TImode);
2359 }
2360 }
2361 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2362 && hbrp_addr1 > 16 * 4)
2363 {
2364 insn =
2365 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2366 recog_memoized (insn);
2367 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2368 INSN_ADDRESSES_NEW (insn,
2369 INSN_ADDRESSES (INSN_UID (before_16)));
2370 PUT_MODE (insn, GET_MODE (before_16));
2371 PUT_MODE (before_16, TImode);
2372 if (insert_lnop_after & 2)
2373 {
2374 insn = emit_insn_before (gen_lnop (), before_16);
2375 recog_memoized (insn);
2376 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2377 INSN_ADDRESSES_NEW (insn,
2378 INSN_ADDRESSES (INSN_UID
2379 (before_16)));
2380 PUT_MODE (insn, TImode);
2381 }
2382 }
2383 return;
2384 }
2385 }
2386 else if (BARRIER_P (insn))
2387 return;
2388
2389 }
2390
2391 /* The SPU might hang when it executes 48 inline instructions after a
2392 hinted branch jumps to its hinted target. The beginning of a
2393 function and the return from a call might have been hinted, and
2394 must be handled as well. To prevent a hang we insert 2 hbrps. The
2395 first should be within 6 insns of the branch target. The second
2396 should be within 22 insns of the branch target. When determining
2397 if hbrps are necessary, we look for only 32 inline instructions,
2398 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2399 when inserting new hbrps, we insert them within 4 and 16 insns of
2400 the target. */
2401 static void
2402 insert_hbrp (void)
2403 {
2404 rtx insn;
2405 if (TARGET_SAFE_HINTS)
2406 {
2407 shorten_branches (get_insns ());
2408 /* Insert hbrp at beginning of function */
2409 insn = next_active_insn (get_insns ());
2410 if (insn)
2411 insert_hbrp_for_ilb_runout (insn);
2412 /* Insert hbrp after hinted targets. */
2413 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2414 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2415 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2416 }
2417 }
2418
2419 static int in_spu_reorg;
2420
2421 static void
2422 spu_var_tracking (void)
2423 {
2424 if (flag_var_tracking)
2425 {
2426 df_analyze ();
2427 timevar_push (TV_VAR_TRACKING);
2428 variable_tracking_main ();
2429 timevar_pop (TV_VAR_TRACKING);
2430 df_finish_pass (false);
2431 }
2432 }
2433
2434 /* Insert branch hints. There are no branch optimizations after this
2435 pass, so it's safe to set our branch hints now. */
2436 static void
2437 spu_machine_dependent_reorg (void)
2438 {
2439 sbitmap blocks;
2440 basic_block bb;
2441 rtx branch, insn;
2442 rtx branch_target = 0;
2443 int branch_addr = 0, insn_addr, required_dist = 0;
2444 int i;
2445 unsigned int j;
2446
2447 if (!TARGET_BRANCH_HINTS || optimize == 0)
2448 {
2449 /* We still do it for unoptimized code because an external
2450 function might have hinted a call or return. */
2451 compute_bb_for_insn ();
2452 insert_hbrp ();
2453 pad_bb ();
2454 spu_var_tracking ();
2455 free_bb_for_insn ();
2456 return;
2457 }
2458
2459 blocks = sbitmap_alloc (last_basic_block);
2460 bitmap_clear (blocks);
2461
2462 in_spu_reorg = 1;
2463 compute_bb_for_insn ();
2464
2465 /* (Re-)discover loops so that bb->loop_father can be used
2466 in the analysis below. */
2467 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2468
2469 compact_blocks ();
2470
2471 spu_bb_info =
2472 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2473 sizeof (struct spu_bb_info));
2474
2475 /* We need exact insn addresses and lengths. */
2476 shorten_branches (get_insns ());
2477
2478 for (i = n_basic_blocks - 1; i >= 0; i--)
2479 {
2480 bb = BASIC_BLOCK (i);
2481 branch = 0;
2482 if (spu_bb_info[i].prop_jump)
2483 {
2484 branch = spu_bb_info[i].prop_jump;
2485 branch_target = get_branch_target (branch);
2486 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2487 required_dist = spu_hint_dist;
2488 }
2489 /* Search from end of a block to beginning. In this loop, find
2490 jumps which need a branch and emit them only when:
2491 - it's an indirect branch and we're at the insn which sets
2492 the register
2493 - we're at an insn that will invalidate the hint. e.g., a
2494 call, another hint insn, inline asm that clobbers $hbr, and
2495 some inlined operations (divmodsi4). Don't consider jumps
2496 because they are only at the end of a block and are
2497 considered when we are deciding whether to propagate
2498 - we're getting too far away from the branch. The hbr insns
2499 only have a signed 10 bit offset
2500 We go back as far as possible so the branch will be considered
2501 for propagation when we get to the beginning of the block. */
2502 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2503 {
2504 if (INSN_P (insn))
2505 {
2506 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2507 if (branch
2508 && ((GET_CODE (branch_target) == REG
2509 && set_of (branch_target, insn) != NULL_RTX)
2510 || insn_clobbers_hbr (insn)
2511 || branch_addr - insn_addr > 600))
2512 {
2513 rtx next = NEXT_INSN (insn);
2514 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2515 if (insn != BB_END (bb)
2516 && branch_addr - next_addr >= required_dist)
2517 {
2518 if (dump_file)
2519 fprintf (dump_file,
2520 "hint for %i in block %i before %i\n",
2521 INSN_UID (branch), bb->index,
2522 INSN_UID (next));
2523 spu_emit_branch_hint (next, branch, branch_target,
2524 branch_addr - next_addr, blocks);
2525 }
2526 branch = 0;
2527 }
2528
2529 /* JUMP_P will only be true at the end of a block. When
2530 branch is already set it means we've previously decided
2531 to propagate a hint for that branch into this block. */
2532 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2533 {
2534 branch = 0;
2535 if ((branch_target = get_branch_target (insn)))
2536 {
2537 branch = insn;
2538 branch_addr = insn_addr;
2539 required_dist = spu_hint_dist;
2540 }
2541 }
2542 }
2543 if (insn == BB_HEAD (bb))
2544 break;
2545 }
2546
2547 if (branch)
2548 {
2549 /* If we haven't emitted a hint for this branch yet, it might
2550 be profitable to emit it in one of the predecessor blocks,
2551 especially for loops. */
2552 rtx bbend;
2553 basic_block prev = 0, prop = 0, prev2 = 0;
2554 int loop_exit = 0, simple_loop = 0;
2555 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2556
2557 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2558 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2559 prev = EDGE_PRED (bb, j)->src;
2560 else
2561 prev2 = EDGE_PRED (bb, j)->src;
2562
2563 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2564 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2565 loop_exit = 1;
2566 else if (EDGE_SUCC (bb, j)->dest == bb)
2567 simple_loop = 1;
2568
2569 /* If this branch is a loop exit then propagate to previous
2570 fallthru block. This catches the cases when it is a simple
2571 loop or when there is an initial branch into the loop. */
2572 if (prev && (loop_exit || simple_loop)
2573 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2574 prop = prev;
2575
2576 /* If there is only one adjacent predecessor. Don't propagate
2577 outside this loop. */
2578 else if (prev && single_pred_p (bb)
2579 && prev->loop_father == bb->loop_father)
2580 prop = prev;
2581
2582 /* If this is the JOIN block of a simple IF-THEN then
2583 propagate the hint to the HEADER block. */
2584 else if (prev && prev2
2585 && EDGE_COUNT (bb->preds) == 2
2586 && EDGE_COUNT (prev->preds) == 1
2587 && EDGE_PRED (prev, 0)->src == prev2
2588 && prev2->loop_father == bb->loop_father
2589 && GET_CODE (branch_target) != REG)
2590 prop = prev;
2591
2592 /* Don't propagate when:
2593 - this is a simple loop and the hint would be too far
2594 - this is not a simple loop and there are 16 insns in
2595 this block already
2596 - the predecessor block ends in a branch that will be
2597 hinted
2598 - the predecessor block ends in an insn that invalidates
2599 the hint */
2600 if (prop
2601 && prop->index >= 0
2602 && (bbend = BB_END (prop))
2603 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2604 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2605 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2606 {
2607 if (dump_file)
2608 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2609 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2610 bb->index, prop->index, bb_loop_depth (bb),
2611 INSN_UID (branch), loop_exit, simple_loop,
2612 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2613
2614 spu_bb_info[prop->index].prop_jump = branch;
2615 spu_bb_info[prop->index].bb_index = i;
2616 }
2617 else if (branch_addr - next_addr >= required_dist)
2618 {
2619 if (dump_file)
2620 fprintf (dump_file, "hint for %i in block %i before %i\n",
2621 INSN_UID (branch), bb->index,
2622 INSN_UID (NEXT_INSN (insn)));
2623 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2624 branch_addr - next_addr, blocks);
2625 }
2626 branch = 0;
2627 }
2628 }
2629 free (spu_bb_info);
2630
2631 if (!bitmap_empty_p (blocks))
2632 find_many_sub_basic_blocks (blocks);
2633
2634 /* We have to schedule to make sure alignment is ok. */
2635 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2636
2637 /* The hints need to be scheduled, so call it again. */
2638 schedule_insns ();
2639 df_finish_pass (true);
2640
2641 insert_hbrp ();
2642
2643 pad_bb ();
2644
2645 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2646 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2647 {
2648 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2649 between its branch label and the branch . We don't move the
2650 label because GCC expects it at the beginning of the block. */
2651 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2652 rtx label_ref = XVECEXP (unspec, 0, 0);
2653 rtx label = XEXP (label_ref, 0);
2654 rtx branch;
2655 int offset = 0;
2656 for (branch = NEXT_INSN (label);
2657 !JUMP_P (branch) && !CALL_P (branch);
2658 branch = NEXT_INSN (branch))
2659 if (NONJUMP_INSN_P (branch))
2660 offset += get_attr_length (branch);
2661 if (offset > 0)
2662 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2663 }
2664
2665 spu_var_tracking ();
2666
2667 loop_optimizer_finalize ();
2668
2669 free_bb_for_insn ();
2670
2671 in_spu_reorg = 0;
2672 }
2673 \f
2674
2675 /* Insn scheduling routines, primarily for dual issue. */
2676 static int
2677 spu_sched_issue_rate (void)
2678 {
2679 return 2;
2680 }
2681
2682 static int
2683 uses_ls_unit(rtx insn)
2684 {
2685 rtx set = single_set (insn);
2686 if (set != 0
2687 && (GET_CODE (SET_DEST (set)) == MEM
2688 || GET_CODE (SET_SRC (set)) == MEM))
2689 return 1;
2690 return 0;
2691 }
2692
2693 static int
2694 get_pipe (rtx insn)
2695 {
2696 enum attr_type t;
2697 /* Handle inline asm */
2698 if (INSN_CODE (insn) == -1)
2699 return -1;
2700 t = get_attr_type (insn);
2701 switch (t)
2702 {
2703 case TYPE_CONVERT:
2704 return -2;
2705 case TYPE_MULTI0:
2706 return -1;
2707
2708 case TYPE_FX2:
2709 case TYPE_FX3:
2710 case TYPE_SPR:
2711 case TYPE_NOP:
2712 case TYPE_FXB:
2713 case TYPE_FPD:
2714 case TYPE_FP6:
2715 case TYPE_FP7:
2716 return 0;
2717
2718 case TYPE_LNOP:
2719 case TYPE_SHUF:
2720 case TYPE_LOAD:
2721 case TYPE_STORE:
2722 case TYPE_BR:
2723 case TYPE_MULTI1:
2724 case TYPE_HBR:
2725 case TYPE_IPREFETCH:
2726 return 1;
2727 default:
2728 abort ();
2729 }
2730 }
2731
2732
2733 /* haifa-sched.c has a static variable that keeps track of the current
2734 cycle. It is passed to spu_sched_reorder, and we record it here for
2735 use by spu_sched_variable_issue. It won't be accurate if the
2736 scheduler updates it's clock_var between the two calls. */
2737 static int clock_var;
2738
2739 /* This is used to keep track of insn alignment. Set to 0 at the
2740 beginning of each block and increased by the "length" attr of each
2741 insn scheduled. */
2742 static int spu_sched_length;
2743
2744 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2745 ready list appropriately in spu_sched_reorder(). */
2746 static int pipe0_clock;
2747 static int pipe1_clock;
2748
2749 static int prev_clock_var;
2750
2751 static int prev_priority;
2752
2753 /* The SPU needs to load the next ilb sometime during the execution of
2754 the previous ilb. There is a potential conflict if every cycle has a
2755 load or store. To avoid the conflict we make sure the load/store
2756 unit is free for at least one cycle during the execution of insns in
2757 the previous ilb. */
2758 static int spu_ls_first;
2759 static int prev_ls_clock;
2760
2761 static void
2762 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2763 int max_ready ATTRIBUTE_UNUSED)
2764 {
2765 spu_sched_length = 0;
2766 }
2767
2768 static void
2769 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2770 int max_ready ATTRIBUTE_UNUSED)
2771 {
2772 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2773 {
2774 /* When any block might be at least 8-byte aligned, assume they
2775 will all be at least 8-byte aligned to make sure dual issue
2776 works out correctly. */
2777 spu_sched_length = 0;
2778 }
2779 spu_ls_first = INT_MAX;
2780 clock_var = -1;
2781 prev_ls_clock = -1;
2782 pipe0_clock = -1;
2783 pipe1_clock = -1;
2784 prev_clock_var = -1;
2785 prev_priority = -1;
2786 }
2787
2788 static int
2789 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2790 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2791 {
2792 int len;
2793 int p;
2794 if (GET_CODE (PATTERN (insn)) == USE
2795 || GET_CODE (PATTERN (insn)) == CLOBBER
2796 || (len = get_attr_length (insn)) == 0)
2797 return more;
2798
2799 spu_sched_length += len;
2800
2801 /* Reset on inline asm */
2802 if (INSN_CODE (insn) == -1)
2803 {
2804 spu_ls_first = INT_MAX;
2805 pipe0_clock = -1;
2806 pipe1_clock = -1;
2807 return 0;
2808 }
2809 p = get_pipe (insn);
2810 if (p == 0)
2811 pipe0_clock = clock_var;
2812 else
2813 pipe1_clock = clock_var;
2814
2815 if (in_spu_reorg)
2816 {
2817 if (clock_var - prev_ls_clock > 1
2818 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2819 spu_ls_first = INT_MAX;
2820 if (uses_ls_unit (insn))
2821 {
2822 if (spu_ls_first == INT_MAX)
2823 spu_ls_first = spu_sched_length;
2824 prev_ls_clock = clock_var;
2825 }
2826
2827 /* The scheduler hasn't inserted the nop, but we will later on.
2828 Include those nops in spu_sched_length. */
2829 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2830 spu_sched_length += 4;
2831 prev_clock_var = clock_var;
2832
2833 /* more is -1 when called from spu_sched_reorder for new insns
2834 that don't have INSN_PRIORITY */
2835 if (more >= 0)
2836 prev_priority = INSN_PRIORITY (insn);
2837 }
2838
2839 /* Always try issuing more insns. spu_sched_reorder will decide
2840 when the cycle should be advanced. */
2841 return 1;
2842 }
2843
2844 /* This function is called for both TARGET_SCHED_REORDER and
2845 TARGET_SCHED_REORDER2. */
2846 static int
2847 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2848 rtx *ready, int *nreadyp, int clock)
2849 {
2850 int i, nready = *nreadyp;
2851 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2852 rtx insn;
2853
2854 clock_var = clock;
2855
2856 if (nready <= 0 || pipe1_clock >= clock)
2857 return 0;
2858
2859 /* Find any rtl insns that don't generate assembly insns and schedule
2860 them first. */
2861 for (i = nready - 1; i >= 0; i--)
2862 {
2863 insn = ready[i];
2864 if (INSN_CODE (insn) == -1
2865 || INSN_CODE (insn) == CODE_FOR_blockage
2866 || (INSN_P (insn) && get_attr_length (insn) == 0))
2867 {
2868 ready[i] = ready[nready - 1];
2869 ready[nready - 1] = insn;
2870 return 1;
2871 }
2872 }
2873
2874 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2875 for (i = 0; i < nready; i++)
2876 if (INSN_CODE (ready[i]) != -1)
2877 {
2878 insn = ready[i];
2879 switch (get_attr_type (insn))
2880 {
2881 default:
2882 case TYPE_MULTI0:
2883 case TYPE_CONVERT:
2884 case TYPE_FX2:
2885 case TYPE_FX3:
2886 case TYPE_SPR:
2887 case TYPE_NOP:
2888 case TYPE_FXB:
2889 case TYPE_FPD:
2890 case TYPE_FP6:
2891 case TYPE_FP7:
2892 pipe_0 = i;
2893 break;
2894 case TYPE_LOAD:
2895 case TYPE_STORE:
2896 pipe_ls = i;
2897 case TYPE_LNOP:
2898 case TYPE_SHUF:
2899 case TYPE_BR:
2900 case TYPE_MULTI1:
2901 case TYPE_HBR:
2902 pipe_1 = i;
2903 break;
2904 case TYPE_IPREFETCH:
2905 pipe_hbrp = i;
2906 break;
2907 }
2908 }
2909
2910 /* In the first scheduling phase, schedule loads and stores together
2911 to increase the chance they will get merged during postreload CSE. */
2912 if (!reload_completed && pipe_ls >= 0)
2913 {
2914 insn = ready[pipe_ls];
2915 ready[pipe_ls] = ready[nready - 1];
2916 ready[nready - 1] = insn;
2917 return 1;
2918 }
2919
2920 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2921 if (pipe_hbrp >= 0)
2922 pipe_1 = pipe_hbrp;
2923
2924 /* When we have loads/stores in every cycle of the last 15 insns and
2925 we are about to schedule another load/store, emit an hbrp insn
2926 instead. */
2927 if (in_spu_reorg
2928 && spu_sched_length - spu_ls_first >= 4 * 15
2929 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2930 {
2931 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2932 recog_memoized (insn);
2933 if (pipe0_clock < clock)
2934 PUT_MODE (insn, TImode);
2935 spu_sched_variable_issue (file, verbose, insn, -1);
2936 return 0;
2937 }
2938
2939 /* In general, we want to emit nops to increase dual issue, but dual
2940 issue isn't faster when one of the insns could be scheduled later
2941 without effecting the critical path. We look at INSN_PRIORITY to
2942 make a good guess, but it isn't perfect so -mdual-nops=n can be
2943 used to effect it. */
2944 if (in_spu_reorg && spu_dual_nops < 10)
2945 {
2946 /* When we are at an even address and we are not issuing nops to
2947 improve scheduling then we need to advance the cycle. */
2948 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2949 && (spu_dual_nops == 0
2950 || (pipe_1 != -1
2951 && prev_priority >
2952 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2953 return 0;
2954
2955 /* When at an odd address, schedule the highest priority insn
2956 without considering pipeline. */
2957 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2958 && (spu_dual_nops == 0
2959 || (prev_priority >
2960 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2961 return 1;
2962 }
2963
2964
2965 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2966 pipe0 insn in the ready list, schedule it. */
2967 if (pipe0_clock < clock && pipe_0 >= 0)
2968 schedule_i = pipe_0;
2969
2970 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2971 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2972 else
2973 schedule_i = pipe_1;
2974
2975 if (schedule_i > -1)
2976 {
2977 insn = ready[schedule_i];
2978 ready[schedule_i] = ready[nready - 1];
2979 ready[nready - 1] = insn;
2980 return 1;
2981 }
2982 return 0;
2983 }
2984
2985 /* INSN is dependent on DEP_INSN. */
2986 static int
2987 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
2988 {
2989 rtx set;
2990
2991 /* The blockage pattern is used to prevent instructions from being
2992 moved across it and has no cost. */
2993 if (INSN_CODE (insn) == CODE_FOR_blockage
2994 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2995 return 0;
2996
2997 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2998 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
2999 return 0;
3000
3001 /* Make sure hbrps are spread out. */
3002 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3003 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3004 return 8;
3005
3006 /* Make sure hints and hbrps are 2 cycles apart. */
3007 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3008 || INSN_CODE (insn) == CODE_FOR_hbr)
3009 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3010 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3011 return 2;
3012
3013 /* An hbrp has no real dependency on other insns. */
3014 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3015 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3016 return 0;
3017
3018 /* Assuming that it is unlikely an argument register will be used in
3019 the first cycle of the called function, we reduce the cost for
3020 slightly better scheduling of dep_insn. When not hinted, the
3021 mispredicted branch would hide the cost as well. */
3022 if (CALL_P (insn))
3023 {
3024 rtx target = get_branch_target (insn);
3025 if (GET_CODE (target) != REG || !set_of (target, insn))
3026 return cost - 2;
3027 return cost;
3028 }
3029
3030 /* And when returning from a function, let's assume the return values
3031 are completed sooner too. */
3032 if (CALL_P (dep_insn))
3033 return cost - 2;
3034
3035 /* Make sure an instruction that loads from the back chain is schedule
3036 away from the return instruction so a hint is more likely to get
3037 issued. */
3038 if (INSN_CODE (insn) == CODE_FOR__return
3039 && (set = single_set (dep_insn))
3040 && GET_CODE (SET_DEST (set)) == REG
3041 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3042 return 20;
3043
3044 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3045 scheduler makes every insn in a block anti-dependent on the final
3046 jump_insn. We adjust here so higher cost insns will get scheduled
3047 earlier. */
3048 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3049 return insn_cost (dep_insn) - 3;
3050
3051 return cost;
3052 }
3053 \f
3054 /* Create a CONST_DOUBLE from a string. */
3055 rtx
3056 spu_float_const (const char *string, enum machine_mode mode)
3057 {
3058 REAL_VALUE_TYPE value;
3059 value = REAL_VALUE_ATOF (string, mode);
3060 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3061 }
3062
3063 int
3064 spu_constant_address_p (rtx x)
3065 {
3066 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3067 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3068 || GET_CODE (x) == HIGH);
3069 }
3070
3071 static enum spu_immediate
3072 which_immediate_load (HOST_WIDE_INT val)
3073 {
3074 gcc_assert (val == trunc_int_for_mode (val, SImode));
3075
3076 if (val >= -0x8000 && val <= 0x7fff)
3077 return SPU_IL;
3078 if (val >= 0 && val <= 0x3ffff)
3079 return SPU_ILA;
3080 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3081 return SPU_ILH;
3082 if ((val & 0xffff) == 0)
3083 return SPU_ILHU;
3084
3085 return SPU_NONE;
3086 }
3087
3088 /* Return true when OP can be loaded by one of the il instructions, or
3089 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3090 int
3091 immediate_load_p (rtx op, enum machine_mode mode)
3092 {
3093 if (CONSTANT_P (op))
3094 {
3095 enum immediate_class c = classify_immediate (op, mode);
3096 return c == IC_IL1 || c == IC_IL1s
3097 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3098 }
3099 return 0;
3100 }
3101
3102 /* Return true if the first SIZE bytes of arr is a constant that can be
3103 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3104 represent the size and offset of the instruction to use. */
3105 static int
3106 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3107 {
3108 int cpat, run, i, start;
3109 cpat = 1;
3110 run = 0;
3111 start = -1;
3112 for (i = 0; i < size && cpat; i++)
3113 if (arr[i] != i+16)
3114 {
3115 if (!run)
3116 {
3117 start = i;
3118 if (arr[i] == 3)
3119 run = 1;
3120 else if (arr[i] == 2 && arr[i+1] == 3)
3121 run = 2;
3122 else if (arr[i] == 0)
3123 {
3124 while (arr[i+run] == run && i+run < 16)
3125 run++;
3126 if (run != 4 && run != 8)
3127 cpat = 0;
3128 }
3129 else
3130 cpat = 0;
3131 if ((i & (run-1)) != 0)
3132 cpat = 0;
3133 i += run;
3134 }
3135 else
3136 cpat = 0;
3137 }
3138 if (cpat && (run || size < 16))
3139 {
3140 if (run == 0)
3141 run = 1;
3142 if (prun)
3143 *prun = run;
3144 if (pstart)
3145 *pstart = start == -1 ? 16-run : start;
3146 return 1;
3147 }
3148 return 0;
3149 }
3150
3151 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3152 it into a register. MODE is only valid when OP is a CONST_INT. */
3153 static enum immediate_class
3154 classify_immediate (rtx op, enum machine_mode mode)
3155 {
3156 HOST_WIDE_INT val;
3157 unsigned char arr[16];
3158 int i, j, repeated, fsmbi, repeat;
3159
3160 gcc_assert (CONSTANT_P (op));
3161
3162 if (GET_MODE (op) != VOIDmode)
3163 mode = GET_MODE (op);
3164
3165 /* A V4SI const_vector with all identical symbols is ok. */
3166 if (!flag_pic
3167 && mode == V4SImode
3168 && GET_CODE (op) == CONST_VECTOR
3169 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3170 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3171 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3172 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3173 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3174 op = CONST_VECTOR_ELT (op, 0);
3175
3176 switch (GET_CODE (op))
3177 {
3178 case SYMBOL_REF:
3179 case LABEL_REF:
3180 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3181
3182 case CONST:
3183 /* We can never know if the resulting address fits in 18 bits and can be
3184 loaded with ila. For now, assume the address will not overflow if
3185 the displacement is "small" (fits 'K' constraint). */
3186 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3187 {
3188 rtx sym = XEXP (XEXP (op, 0), 0);
3189 rtx cst = XEXP (XEXP (op, 0), 1);
3190
3191 if (GET_CODE (sym) == SYMBOL_REF
3192 && GET_CODE (cst) == CONST_INT
3193 && satisfies_constraint_K (cst))
3194 return IC_IL1s;
3195 }
3196 return IC_IL2s;
3197
3198 case HIGH:
3199 return IC_IL1s;
3200
3201 case CONST_VECTOR:
3202 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3203 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3204 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3205 return IC_POOL;
3206 /* Fall through. */
3207
3208 case CONST_INT:
3209 case CONST_DOUBLE:
3210 constant_to_array (mode, op, arr);
3211
3212 /* Check that each 4-byte slot is identical. */
3213 repeated = 1;
3214 for (i = 4; i < 16; i += 4)
3215 for (j = 0; j < 4; j++)
3216 if (arr[j] != arr[i + j])
3217 repeated = 0;
3218
3219 if (repeated)
3220 {
3221 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3222 val = trunc_int_for_mode (val, SImode);
3223
3224 if (which_immediate_load (val) != SPU_NONE)
3225 return IC_IL1;
3226 }
3227
3228 /* Any mode of 2 bytes or smaller can be loaded with an il
3229 instruction. */
3230 gcc_assert (GET_MODE_SIZE (mode) > 2);
3231
3232 fsmbi = 1;
3233 repeat = 0;
3234 for (i = 0; i < 16 && fsmbi; i++)
3235 if (arr[i] != 0 && repeat == 0)
3236 repeat = arr[i];
3237 else if (arr[i] != 0 && arr[i] != repeat)
3238 fsmbi = 0;
3239 if (fsmbi)
3240 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3241
3242 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3243 return IC_CPAT;
3244
3245 if (repeated)
3246 return IC_IL2;
3247
3248 return IC_POOL;
3249 default:
3250 break;
3251 }
3252 gcc_unreachable ();
3253 }
3254
3255 static enum spu_immediate
3256 which_logical_immediate (HOST_WIDE_INT val)
3257 {
3258 gcc_assert (val == trunc_int_for_mode (val, SImode));
3259
3260 if (val >= -0x200 && val <= 0x1ff)
3261 return SPU_ORI;
3262 if (val >= 0 && val <= 0xffff)
3263 return SPU_IOHL;
3264 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3265 {
3266 val = trunc_int_for_mode (val, HImode);
3267 if (val >= -0x200 && val <= 0x1ff)
3268 return SPU_ORHI;
3269 if ((val & 0xff) == ((val >> 8) & 0xff))
3270 {
3271 val = trunc_int_for_mode (val, QImode);
3272 if (val >= -0x200 && val <= 0x1ff)
3273 return SPU_ORBI;
3274 }
3275 }
3276 return SPU_NONE;
3277 }
3278
3279 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3280 CONST_DOUBLEs. */
3281 static int
3282 const_vector_immediate_p (rtx x)
3283 {
3284 int i;
3285 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3286 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3287 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3288 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3289 return 0;
3290 return 1;
3291 }
3292
3293 int
3294 logical_immediate_p (rtx op, enum machine_mode mode)
3295 {
3296 HOST_WIDE_INT val;
3297 unsigned char arr[16];
3298 int i, j;
3299
3300 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3301 || GET_CODE (op) == CONST_VECTOR);
3302
3303 if (GET_CODE (op) == CONST_VECTOR
3304 && !const_vector_immediate_p (op))
3305 return 0;
3306
3307 if (GET_MODE (op) != VOIDmode)
3308 mode = GET_MODE (op);
3309
3310 constant_to_array (mode, op, arr);
3311
3312 /* Check that bytes are repeated. */
3313 for (i = 4; i < 16; i += 4)
3314 for (j = 0; j < 4; j++)
3315 if (arr[j] != arr[i + j])
3316 return 0;
3317
3318 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3319 val = trunc_int_for_mode (val, SImode);
3320
3321 i = which_logical_immediate (val);
3322 return i != SPU_NONE && i != SPU_IOHL;
3323 }
3324
3325 int
3326 iohl_immediate_p (rtx op, enum machine_mode mode)
3327 {
3328 HOST_WIDE_INT val;
3329 unsigned char arr[16];
3330 int i, j;
3331
3332 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3333 || GET_CODE (op) == CONST_VECTOR);
3334
3335 if (GET_CODE (op) == CONST_VECTOR
3336 && !const_vector_immediate_p (op))
3337 return 0;
3338
3339 if (GET_MODE (op) != VOIDmode)
3340 mode = GET_MODE (op);
3341
3342 constant_to_array (mode, op, arr);
3343
3344 /* Check that bytes are repeated. */
3345 for (i = 4; i < 16; i += 4)
3346 for (j = 0; j < 4; j++)
3347 if (arr[j] != arr[i + j])
3348 return 0;
3349
3350 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3351 val = trunc_int_for_mode (val, SImode);
3352
3353 return val >= 0 && val <= 0xffff;
3354 }
3355
3356 int
3357 arith_immediate_p (rtx op, enum machine_mode mode,
3358 HOST_WIDE_INT low, HOST_WIDE_INT high)
3359 {
3360 HOST_WIDE_INT val;
3361 unsigned char arr[16];
3362 int bytes, i, j;
3363
3364 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3365 || GET_CODE (op) == CONST_VECTOR);
3366
3367 if (GET_CODE (op) == CONST_VECTOR
3368 && !const_vector_immediate_p (op))
3369 return 0;
3370
3371 if (GET_MODE (op) != VOIDmode)
3372 mode = GET_MODE (op);
3373
3374 constant_to_array (mode, op, arr);
3375
3376 if (VECTOR_MODE_P (mode))
3377 mode = GET_MODE_INNER (mode);
3378
3379 bytes = GET_MODE_SIZE (mode);
3380 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3381
3382 /* Check that bytes are repeated. */
3383 for (i = bytes; i < 16; i += bytes)
3384 for (j = 0; j < bytes; j++)
3385 if (arr[j] != arr[i + j])
3386 return 0;
3387
3388 val = arr[0];
3389 for (j = 1; j < bytes; j++)
3390 val = (val << 8) | arr[j];
3391
3392 val = trunc_int_for_mode (val, mode);
3393
3394 return val >= low && val <= high;
3395 }
3396
3397 /* TRUE when op is an immediate and an exact power of 2, and given that
3398 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3399 all entries must be the same. */
3400 bool
3401 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3402 {
3403 enum machine_mode int_mode;
3404 HOST_WIDE_INT val;
3405 unsigned char arr[16];
3406 int bytes, i, j;
3407
3408 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3409 || GET_CODE (op) == CONST_VECTOR);
3410
3411 if (GET_CODE (op) == CONST_VECTOR
3412 && !const_vector_immediate_p (op))
3413 return 0;
3414
3415 if (GET_MODE (op) != VOIDmode)
3416 mode = GET_MODE (op);
3417
3418 constant_to_array (mode, op, arr);
3419
3420 if (VECTOR_MODE_P (mode))
3421 mode = GET_MODE_INNER (mode);
3422
3423 bytes = GET_MODE_SIZE (mode);
3424 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3425
3426 /* Check that bytes are repeated. */
3427 for (i = bytes; i < 16; i += bytes)
3428 for (j = 0; j < bytes; j++)
3429 if (arr[j] != arr[i + j])
3430 return 0;
3431
3432 val = arr[0];
3433 for (j = 1; j < bytes; j++)
3434 val = (val << 8) | arr[j];
3435
3436 val = trunc_int_for_mode (val, int_mode);
3437
3438 /* Currently, we only handle SFmode */
3439 gcc_assert (mode == SFmode);
3440 if (mode == SFmode)
3441 {
3442 int exp = (val >> 23) - 127;
3443 return val > 0 && (val & 0x007fffff) == 0
3444 && exp >= low && exp <= high;
3445 }
3446 return FALSE;
3447 }
3448
3449 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3450
3451 static int
3452 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3453 {
3454 rtx x = *px;
3455 tree decl;
3456
3457 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3458 {
3459 rtx plus = XEXP (x, 0);
3460 rtx op0 = XEXP (plus, 0);
3461 rtx op1 = XEXP (plus, 1);
3462 if (GET_CODE (op1) == CONST_INT)
3463 x = op0;
3464 }
3465
3466 return (GET_CODE (x) == SYMBOL_REF
3467 && (decl = SYMBOL_REF_DECL (x)) != 0
3468 && TREE_CODE (decl) == VAR_DECL
3469 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3470 }
3471
3472 /* We accept:
3473 - any 32-bit constant (SImode, SFmode)
3474 - any constant that can be generated with fsmbi (any mode)
3475 - a 64-bit constant where the high and low bits are identical
3476 (DImode, DFmode)
3477 - a 128-bit constant where the four 32-bit words match. */
3478 bool
3479 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3480 {
3481 if (GET_CODE (x) == HIGH)
3482 x = XEXP (x, 0);
3483
3484 /* Reject any __ea qualified reference. These can't appear in
3485 instructions but must be forced to the constant pool. */
3486 if (for_each_rtx (&x, ea_symbol_ref, 0))
3487 return 0;
3488
3489 /* V4SI with all identical symbols is valid. */
3490 if (!flag_pic
3491 && mode == V4SImode
3492 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3493 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3494 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3495 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3496 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3497 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3498
3499 if (GET_CODE (x) == CONST_VECTOR
3500 && !const_vector_immediate_p (x))
3501 return 0;
3502 return 1;
3503 }
3504
3505 /* Valid address are:
3506 - symbol_ref, label_ref, const
3507 - reg
3508 - reg + const_int, where const_int is 16 byte aligned
3509 - reg + reg, alignment doesn't matter
3510 The alignment matters in the reg+const case because lqd and stqd
3511 ignore the 4 least significant bits of the const. We only care about
3512 16 byte modes because the expand phase will change all smaller MEM
3513 references to TImode. */
3514 static bool
3515 spu_legitimate_address_p (enum machine_mode mode,
3516 rtx x, bool reg_ok_strict)
3517 {
3518 int aligned = GET_MODE_SIZE (mode) >= 16;
3519 if (aligned
3520 && GET_CODE (x) == AND
3521 && GET_CODE (XEXP (x, 1)) == CONST_INT
3522 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3523 x = XEXP (x, 0);
3524 switch (GET_CODE (x))
3525 {
3526 case LABEL_REF:
3527 return !TARGET_LARGE_MEM;
3528
3529 case SYMBOL_REF:
3530 case CONST:
3531 /* Keep __ea references until reload so that spu_expand_mov can see them
3532 in MEMs. */
3533 if (ea_symbol_ref (&x, 0))
3534 return !reload_in_progress && !reload_completed;
3535 return !TARGET_LARGE_MEM;
3536
3537 case CONST_INT:
3538 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3539
3540 case SUBREG:
3541 x = XEXP (x, 0);
3542 if (REG_P (x))
3543 return 0;
3544
3545 case REG:
3546 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3547
3548 case PLUS:
3549 case LO_SUM:
3550 {
3551 rtx op0 = XEXP (x, 0);
3552 rtx op1 = XEXP (x, 1);
3553 if (GET_CODE (op0) == SUBREG)
3554 op0 = XEXP (op0, 0);
3555 if (GET_CODE (op1) == SUBREG)
3556 op1 = XEXP (op1, 0);
3557 if (GET_CODE (op0) == REG
3558 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3559 && GET_CODE (op1) == CONST_INT
3560 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3561 /* If virtual registers are involved, the displacement will
3562 change later on anyway, so checking would be premature.
3563 Reload will make sure the final displacement after
3564 register elimination is OK. */
3565 || op0 == arg_pointer_rtx
3566 || op0 == frame_pointer_rtx
3567 || op0 == virtual_stack_vars_rtx)
3568 && (!aligned || (INTVAL (op1) & 15) == 0))
3569 return TRUE;
3570 if (GET_CODE (op0) == REG
3571 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3572 && GET_CODE (op1) == REG
3573 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3574 return TRUE;
3575 }
3576 break;
3577
3578 default:
3579 break;
3580 }
3581 return FALSE;
3582 }
3583
3584 /* Like spu_legitimate_address_p, except with named addresses. */
3585 static bool
3586 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3587 bool reg_ok_strict, addr_space_t as)
3588 {
3589 if (as == ADDR_SPACE_EA)
3590 return (REG_P (x) && (GET_MODE (x) == EAmode));
3591
3592 else if (as != ADDR_SPACE_GENERIC)
3593 gcc_unreachable ();
3594
3595 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3596 }
3597
3598 /* When the address is reg + const_int, force the const_int into a
3599 register. */
3600 static rtx
3601 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3602 enum machine_mode mode ATTRIBUTE_UNUSED)
3603 {
3604 rtx op0, op1;
3605 /* Make sure both operands are registers. */
3606 if (GET_CODE (x) == PLUS)
3607 {
3608 op0 = XEXP (x, 0);
3609 op1 = XEXP (x, 1);
3610 if (ALIGNED_SYMBOL_REF_P (op0))
3611 {
3612 op0 = force_reg (Pmode, op0);
3613 mark_reg_pointer (op0, 128);
3614 }
3615 else if (GET_CODE (op0) != REG)
3616 op0 = force_reg (Pmode, op0);
3617 if (ALIGNED_SYMBOL_REF_P (op1))
3618 {
3619 op1 = force_reg (Pmode, op1);
3620 mark_reg_pointer (op1, 128);
3621 }
3622 else if (GET_CODE (op1) != REG)
3623 op1 = force_reg (Pmode, op1);
3624 x = gen_rtx_PLUS (Pmode, op0, op1);
3625 }
3626 return x;
3627 }
3628
3629 /* Like spu_legitimate_address, except with named address support. */
3630 static rtx
3631 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3632 addr_space_t as)
3633 {
3634 if (as != ADDR_SPACE_GENERIC)
3635 return x;
3636
3637 return spu_legitimize_address (x, oldx, mode);
3638 }
3639
3640 /* Reload reg + const_int for out-of-range displacements. */
3641 rtx
3642 spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3643 int opnum, int type)
3644 {
3645 bool removed_and = false;
3646
3647 if (GET_CODE (ad) == AND
3648 && CONST_INT_P (XEXP (ad, 1))
3649 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3650 {
3651 ad = XEXP (ad, 0);
3652 removed_and = true;
3653 }
3654
3655 if (GET_CODE (ad) == PLUS
3656 && REG_P (XEXP (ad, 0))
3657 && CONST_INT_P (XEXP (ad, 1))
3658 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3659 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3660 {
3661 /* Unshare the sum. */
3662 ad = copy_rtx (ad);
3663
3664 /* Reload the displacement. */
3665 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3666 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3667 opnum, (enum reload_type) type);
3668
3669 /* Add back AND for alignment if we stripped it. */
3670 if (removed_and)
3671 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3672
3673 return ad;
3674 }
3675
3676 return NULL_RTX;
3677 }
3678
3679 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3680 struct attribute_spec.handler. */
3681 static tree
3682 spu_handle_fndecl_attribute (tree * node,
3683 tree name,
3684 tree args ATTRIBUTE_UNUSED,
3685 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3686 {
3687 if (TREE_CODE (*node) != FUNCTION_DECL)
3688 {
3689 warning (0, "%qE attribute only applies to functions",
3690 name);
3691 *no_add_attrs = true;
3692 }
3693
3694 return NULL_TREE;
3695 }
3696
3697 /* Handle the "vector" attribute. */
3698 static tree
3699 spu_handle_vector_attribute (tree * node, tree name,
3700 tree args ATTRIBUTE_UNUSED,
3701 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3702 {
3703 tree type = *node, result = NULL_TREE;
3704 enum machine_mode mode;
3705 int unsigned_p;
3706
3707 while (POINTER_TYPE_P (type)
3708 || TREE_CODE (type) == FUNCTION_TYPE
3709 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3710 type = TREE_TYPE (type);
3711
3712 mode = TYPE_MODE (type);
3713
3714 unsigned_p = TYPE_UNSIGNED (type);
3715 switch (mode)
3716 {
3717 case DImode:
3718 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3719 break;
3720 case SImode:
3721 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3722 break;
3723 case HImode:
3724 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3725 break;
3726 case QImode:
3727 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3728 break;
3729 case SFmode:
3730 result = V4SF_type_node;
3731 break;
3732 case DFmode:
3733 result = V2DF_type_node;
3734 break;
3735 default:
3736 break;
3737 }
3738
3739 /* Propagate qualifiers attached to the element type
3740 onto the vector type. */
3741 if (result && result != type && TYPE_QUALS (type))
3742 result = build_qualified_type (result, TYPE_QUALS (type));
3743
3744 *no_add_attrs = true; /* No need to hang on to the attribute. */
3745
3746 if (!result)
3747 warning (0, "%qE attribute ignored", name);
3748 else
3749 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3750
3751 return NULL_TREE;
3752 }
3753
3754 /* Return nonzero if FUNC is a naked function. */
3755 static int
3756 spu_naked_function_p (tree func)
3757 {
3758 tree a;
3759
3760 if (TREE_CODE (func) != FUNCTION_DECL)
3761 abort ();
3762
3763 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3764 return a != NULL_TREE;
3765 }
3766
3767 int
3768 spu_initial_elimination_offset (int from, int to)
3769 {
3770 int saved_regs_size = spu_saved_regs_size ();
3771 int sp_offset = 0;
3772 if (!crtl->is_leaf || crtl->outgoing_args_size
3773 || get_frame_size () || saved_regs_size)
3774 sp_offset = STACK_POINTER_OFFSET;
3775 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3776 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3777 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3778 return get_frame_size ();
3779 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3780 return sp_offset + crtl->outgoing_args_size
3781 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3782 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3783 return get_frame_size () + saved_regs_size + sp_offset;
3784 else
3785 gcc_unreachable ();
3786 }
3787
3788 rtx
3789 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3790 {
3791 enum machine_mode mode = TYPE_MODE (type);
3792 int byte_size = ((mode == BLKmode)
3793 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3794
3795 /* Make sure small structs are left justified in a register. */
3796 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3797 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3798 {
3799 enum machine_mode smode;
3800 rtvec v;
3801 int i;
3802 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3803 int n = byte_size / UNITS_PER_WORD;
3804 v = rtvec_alloc (nregs);
3805 for (i = 0; i < n; i++)
3806 {
3807 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3808 gen_rtx_REG (TImode,
3809 FIRST_RETURN_REGNUM
3810 + i),
3811 GEN_INT (UNITS_PER_WORD * i));
3812 byte_size -= UNITS_PER_WORD;
3813 }
3814
3815 if (n < nregs)
3816 {
3817 if (byte_size < 4)
3818 byte_size = 4;
3819 smode =
3820 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3821 RTVEC_ELT (v, n) =
3822 gen_rtx_EXPR_LIST (VOIDmode,
3823 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3824 GEN_INT (UNITS_PER_WORD * n));
3825 }
3826 return gen_rtx_PARALLEL (mode, v);
3827 }
3828 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3829 }
3830
3831 static rtx
3832 spu_function_arg (cumulative_args_t cum_v,
3833 enum machine_mode mode,
3834 const_tree type, bool named ATTRIBUTE_UNUSED)
3835 {
3836 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3837 int byte_size;
3838
3839 if (*cum >= MAX_REGISTER_ARGS)
3840 return 0;
3841
3842 byte_size = ((mode == BLKmode)
3843 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3844
3845 /* The ABI does not allow parameters to be passed partially in
3846 reg and partially in stack. */
3847 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3848 return 0;
3849
3850 /* Make sure small structs are left justified in a register. */
3851 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3852 && byte_size < UNITS_PER_WORD && byte_size > 0)
3853 {
3854 enum machine_mode smode;
3855 rtx gr_reg;
3856 if (byte_size < 4)
3857 byte_size = 4;
3858 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3859 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3860 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3861 const0_rtx);
3862 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3863 }
3864 else
3865 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3866 }
3867
3868 static void
3869 spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
3870 const_tree type, bool named ATTRIBUTE_UNUSED)
3871 {
3872 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3873
3874 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3875 ? 1
3876 : mode == BLKmode
3877 ? ((int_size_in_bytes (type) + 15) / 16)
3878 : mode == VOIDmode
3879 ? 1
3880 : HARD_REGNO_NREGS (cum, mode));
3881 }
3882
3883 /* Variable sized types are passed by reference. */
3884 static bool
3885 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3886 enum machine_mode mode ATTRIBUTE_UNUSED,
3887 const_tree type, bool named ATTRIBUTE_UNUSED)
3888 {
3889 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3890 }
3891 \f
3892
3893 /* Var args. */
3894
3895 /* Create and return the va_list datatype.
3896
3897 On SPU, va_list is an array type equivalent to
3898
3899 typedef struct __va_list_tag
3900 {
3901 void *__args __attribute__((__aligned(16)));
3902 void *__skip __attribute__((__aligned(16)));
3903
3904 } va_list[1];
3905
3906 where __args points to the arg that will be returned by the next
3907 va_arg(), and __skip points to the previous stack frame such that
3908 when __args == __skip we should advance __args by 32 bytes. */
3909 static tree
3910 spu_build_builtin_va_list (void)
3911 {
3912 tree f_args, f_skip, record, type_decl;
3913 bool owp;
3914
3915 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3916
3917 type_decl =
3918 build_decl (BUILTINS_LOCATION,
3919 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3920
3921 f_args = build_decl (BUILTINS_LOCATION,
3922 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3923 f_skip = build_decl (BUILTINS_LOCATION,
3924 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3925
3926 DECL_FIELD_CONTEXT (f_args) = record;
3927 DECL_ALIGN (f_args) = 128;
3928 DECL_USER_ALIGN (f_args) = 1;
3929
3930 DECL_FIELD_CONTEXT (f_skip) = record;
3931 DECL_ALIGN (f_skip) = 128;
3932 DECL_USER_ALIGN (f_skip) = 1;
3933
3934 TYPE_STUB_DECL (record) = type_decl;
3935 TYPE_NAME (record) = type_decl;
3936 TYPE_FIELDS (record) = f_args;
3937 DECL_CHAIN (f_args) = f_skip;
3938
3939 /* We know this is being padded and we want it too. It is an internal
3940 type so hide the warnings from the user. */
3941 owp = warn_padded;
3942 warn_padded = false;
3943
3944 layout_type (record);
3945
3946 warn_padded = owp;
3947
3948 /* The correct type is an array type of one element. */
3949 return build_array_type (record, build_index_type (size_zero_node));
3950 }
3951
3952 /* Implement va_start by filling the va_list structure VALIST.
3953 NEXTARG points to the first anonymous stack argument.
3954
3955 The following global variables are used to initialize
3956 the va_list structure:
3957
3958 crtl->args.info;
3959 the CUMULATIVE_ARGS for this function
3960
3961 crtl->args.arg_offset_rtx:
3962 holds the offset of the first anonymous stack argument
3963 (relative to the virtual arg pointer). */
3964
3965 static void
3966 spu_va_start (tree valist, rtx nextarg)
3967 {
3968 tree f_args, f_skip;
3969 tree args, skip, t;
3970
3971 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3972 f_skip = DECL_CHAIN (f_args);
3973
3974 valist = build_simple_mem_ref (valist);
3975 args =
3976 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3977 skip =
3978 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3979
3980 /* Find the __args area. */
3981 t = make_tree (TREE_TYPE (args), nextarg);
3982 if (crtl->args.pretend_args_size > 0)
3983 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3984 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3985 TREE_SIDE_EFFECTS (t) = 1;
3986 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3987
3988 /* Find the __skip area. */
3989 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3990 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3991 - STACK_POINTER_OFFSET));
3992 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3993 TREE_SIDE_EFFECTS (t) = 1;
3994 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3995 }
3996
3997 /* Gimplify va_arg by updating the va_list structure
3998 VALIST as required to retrieve an argument of type
3999 TYPE, and returning that argument.
4000
4001 ret = va_arg(VALIST, TYPE);
4002
4003 generates code equivalent to:
4004
4005 paddedsize = (sizeof(TYPE) + 15) & -16;
4006 if (VALIST.__args + paddedsize > VALIST.__skip
4007 && VALIST.__args <= VALIST.__skip)
4008 addr = VALIST.__skip + 32;
4009 else
4010 addr = VALIST.__args;
4011 VALIST.__args = addr + paddedsize;
4012 ret = *(TYPE *)addr;
4013 */
4014 static tree
4015 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4016 gimple_seq * post_p ATTRIBUTE_UNUSED)
4017 {
4018 tree f_args, f_skip;
4019 tree args, skip;
4020 HOST_WIDE_INT size, rsize;
4021 tree addr, tmp;
4022 bool pass_by_reference_p;
4023
4024 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4025 f_skip = DECL_CHAIN (f_args);
4026
4027 valist = build_simple_mem_ref (valist);
4028 args =
4029 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4030 skip =
4031 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4032
4033 addr = create_tmp_var (ptr_type_node, "va_arg");
4034
4035 /* if an object is dynamically sized, a pointer to it is passed
4036 instead of the object itself. */
4037 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4038 false);
4039 if (pass_by_reference_p)
4040 type = build_pointer_type (type);
4041 size = int_size_in_bytes (type);
4042 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4043
4044 /* build conditional expression to calculate addr. The expression
4045 will be gimplified later. */
4046 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4047 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4048 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4049 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4050 unshare_expr (skip)));
4051
4052 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4053 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4054 unshare_expr (args));
4055
4056 gimplify_assign (addr, tmp, pre_p);
4057
4058 /* update VALIST.__args */
4059 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4060 gimplify_assign (unshare_expr (args), tmp, pre_p);
4061
4062 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4063 addr);
4064
4065 if (pass_by_reference_p)
4066 addr = build_va_arg_indirect_ref (addr);
4067
4068 return build_va_arg_indirect_ref (addr);
4069 }
4070
4071 /* Save parameter registers starting with the register that corresponds
4072 to the first unnamed parameters. If the first unnamed parameter is
4073 in the stack then save no registers. Set pretend_args_size to the
4074 amount of space needed to save the registers. */
4075 static void
4076 spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4077 tree type, int *pretend_size, int no_rtl)
4078 {
4079 if (!no_rtl)
4080 {
4081 rtx tmp;
4082 int regno;
4083 int offset;
4084 int ncum = *get_cumulative_args (cum);
4085
4086 /* cum currently points to the last named argument, we want to
4087 start at the next argument. */
4088 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4089
4090 offset = -STACK_POINTER_OFFSET;
4091 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4092 {
4093 tmp = gen_frame_mem (V4SImode,
4094 plus_constant (Pmode, virtual_incoming_args_rtx,
4095 offset));
4096 emit_move_insn (tmp,
4097 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4098 offset += 16;
4099 }
4100 *pretend_size = offset + STACK_POINTER_OFFSET;
4101 }
4102 }
4103 \f
4104 static void
4105 spu_conditional_register_usage (void)
4106 {
4107 if (flag_pic)
4108 {
4109 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4110 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4111 }
4112 }
4113
4114 /* This is called any time we inspect the alignment of a register for
4115 addresses. */
4116 static int
4117 reg_aligned_for_addr (rtx x)
4118 {
4119 int regno =
4120 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4121 return REGNO_POINTER_ALIGN (regno) >= 128;
4122 }
4123
4124 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4125 into its SYMBOL_REF_FLAGS. */
4126 static void
4127 spu_encode_section_info (tree decl, rtx rtl, int first)
4128 {
4129 default_encode_section_info (decl, rtl, first);
4130
4131 /* If a variable has a forced alignment to < 16 bytes, mark it with
4132 SYMBOL_FLAG_ALIGN1. */
4133 if (TREE_CODE (decl) == VAR_DECL
4134 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4135 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4136 }
4137
4138 /* Return TRUE if we are certain the mem refers to a complete object
4139 which is both 16-byte aligned and padded to a 16-byte boundary. This
4140 would make it safe to store with a single instruction.
4141 We guarantee the alignment and padding for static objects by aligning
4142 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4143 FIXME: We currently cannot guarantee this for objects on the stack
4144 because assign_parm_setup_stack calls assign_stack_local with the
4145 alignment of the parameter mode and in that case the alignment never
4146 gets adjusted by LOCAL_ALIGNMENT. */
4147 static int
4148 store_with_one_insn_p (rtx mem)
4149 {
4150 enum machine_mode mode = GET_MODE (mem);
4151 rtx addr = XEXP (mem, 0);
4152 if (mode == BLKmode)
4153 return 0;
4154 if (GET_MODE_SIZE (mode) >= 16)
4155 return 1;
4156 /* Only static objects. */
4157 if (GET_CODE (addr) == SYMBOL_REF)
4158 {
4159 /* We use the associated declaration to make sure the access is
4160 referring to the whole object.
4161 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4162 if it is necessary. Will there be cases where one exists, and
4163 the other does not? Will there be cases where both exist, but
4164 have different types? */
4165 tree decl = MEM_EXPR (mem);
4166 if (decl
4167 && TREE_CODE (decl) == VAR_DECL
4168 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4169 return 1;
4170 decl = SYMBOL_REF_DECL (addr);
4171 if (decl
4172 && TREE_CODE (decl) == VAR_DECL
4173 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4174 return 1;
4175 }
4176 return 0;
4177 }
4178
4179 /* Return 1 when the address is not valid for a simple load and store as
4180 required by the '_mov*' patterns. We could make this less strict
4181 for loads, but we prefer mem's to look the same so they are more
4182 likely to be merged. */
4183 static int
4184 address_needs_split (rtx mem)
4185 {
4186 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4187 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4188 || !(store_with_one_insn_p (mem)
4189 || mem_is_padded_component_ref (mem))))
4190 return 1;
4191
4192 return 0;
4193 }
4194
4195 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4196 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4197 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4198
4199 /* MEM is known to be an __ea qualified memory access. Emit a call to
4200 fetch the ppu memory to local store, and return its address in local
4201 store. */
4202
4203 static void
4204 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4205 {
4206 if (is_store)
4207 {
4208 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4209 if (!cache_fetch_dirty)
4210 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4211 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4212 2, ea_addr, EAmode, ndirty, SImode);
4213 }
4214 else
4215 {
4216 if (!cache_fetch)
4217 cache_fetch = init_one_libfunc ("__cache_fetch");
4218 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4219 1, ea_addr, EAmode);
4220 }
4221 }
4222
4223 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4224 dirty bit marking, inline.
4225
4226 The cache control data structure is an array of
4227
4228 struct __cache_tag_array
4229 {
4230 unsigned int tag_lo[4];
4231 unsigned int tag_hi[4];
4232 void *data_pointer[4];
4233 int reserved[4];
4234 vector unsigned short dirty_bits[4];
4235 } */
4236
4237 static void
4238 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4239 {
4240 rtx ea_addr_si;
4241 HOST_WIDE_INT v;
4242 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4243 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4244 rtx index_mask = gen_reg_rtx (SImode);
4245 rtx tag_arr = gen_reg_rtx (Pmode);
4246 rtx splat_mask = gen_reg_rtx (TImode);
4247 rtx splat = gen_reg_rtx (V4SImode);
4248 rtx splat_hi = NULL_RTX;
4249 rtx tag_index = gen_reg_rtx (Pmode);
4250 rtx block_off = gen_reg_rtx (SImode);
4251 rtx tag_addr = gen_reg_rtx (Pmode);
4252 rtx tag = gen_reg_rtx (V4SImode);
4253 rtx cache_tag = gen_reg_rtx (V4SImode);
4254 rtx cache_tag_hi = NULL_RTX;
4255 rtx cache_ptrs = gen_reg_rtx (TImode);
4256 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4257 rtx tag_equal = gen_reg_rtx (V4SImode);
4258 rtx tag_equal_hi = NULL_RTX;
4259 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4260 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4261 rtx eq_index = gen_reg_rtx (SImode);
4262 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4263
4264 if (spu_ea_model != 32)
4265 {
4266 splat_hi = gen_reg_rtx (V4SImode);
4267 cache_tag_hi = gen_reg_rtx (V4SImode);
4268 tag_equal_hi = gen_reg_rtx (V4SImode);
4269 }
4270
4271 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4272 emit_move_insn (tag_arr, tag_arr_sym);
4273 v = 0x0001020300010203LL;
4274 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4275 ea_addr_si = ea_addr;
4276 if (spu_ea_model != 32)
4277 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4278
4279 /* tag_index = ea_addr & (tag_array_size - 128) */
4280 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4281
4282 /* splat ea_addr to all 4 slots. */
4283 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4284 /* Similarly for high 32 bits of ea_addr. */
4285 if (spu_ea_model != 32)
4286 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4287
4288 /* block_off = ea_addr & 127 */
4289 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4290
4291 /* tag_addr = tag_arr + tag_index */
4292 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4293
4294 /* Read cache tags. */
4295 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4296 if (spu_ea_model != 32)
4297 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4298 plus_constant (Pmode,
4299 tag_addr, 16)));
4300
4301 /* tag = ea_addr & -128 */
4302 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4303
4304 /* Read all four cache data pointers. */
4305 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4306 plus_constant (Pmode,
4307 tag_addr, 32)));
4308
4309 /* Compare tags. */
4310 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4311 if (spu_ea_model != 32)
4312 {
4313 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4314 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4315 }
4316
4317 /* At most one of the tags compare equal, so tag_equal has one
4318 32-bit slot set to all 1's, with the other slots all zero.
4319 gbb picks off low bit from each byte in the 128-bit registers,
4320 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4321 we have a hit. */
4322 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4323 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4324
4325 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4326 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4327
4328 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4329 (rotating eq_index mod 16 bytes). */
4330 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4331 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4332
4333 /* Add block offset to form final data address. */
4334 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4335
4336 /* Check that we did hit. */
4337 hit_label = gen_label_rtx ();
4338 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4339 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4340 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4341 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4342 hit_ref, pc_rtx)));
4343 /* Say that this branch is very likely to happen. */
4344 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4345 add_int_reg_note (insn, REG_BR_PROB, v);
4346
4347 ea_load_store (mem, is_store, ea_addr, data_addr);
4348 cont_label = gen_label_rtx ();
4349 emit_jump_insn (gen_jump (cont_label));
4350 emit_barrier ();
4351
4352 emit_label (hit_label);
4353
4354 if (is_store)
4355 {
4356 HOST_WIDE_INT v_hi;
4357 rtx dirty_bits = gen_reg_rtx (TImode);
4358 rtx dirty_off = gen_reg_rtx (SImode);
4359 rtx dirty_128 = gen_reg_rtx (TImode);
4360 rtx neg_block_off = gen_reg_rtx (SImode);
4361
4362 /* Set up mask with one dirty bit per byte of the mem we are
4363 writing, starting from top bit. */
4364 v_hi = v = -1;
4365 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4366 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4367 {
4368 v_hi = v;
4369 v = 0;
4370 }
4371 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4372
4373 /* Form index into cache dirty_bits. eq_index is one of
4374 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4375 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4376 offset to each of the four dirty_bits elements. */
4377 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4378
4379 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4380
4381 /* Rotate bit mask to proper bit. */
4382 emit_insn (gen_negsi2 (neg_block_off, block_off));
4383 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4384 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4385
4386 /* Or in the new dirty bits. */
4387 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4388
4389 /* Store. */
4390 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4391 }
4392
4393 emit_label (cont_label);
4394 }
4395
4396 static rtx
4397 expand_ea_mem (rtx mem, bool is_store)
4398 {
4399 rtx ea_addr;
4400 rtx data_addr = gen_reg_rtx (Pmode);
4401 rtx new_mem;
4402
4403 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4404 if (optimize_size || optimize == 0)
4405 ea_load_store (mem, is_store, ea_addr, data_addr);
4406 else
4407 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4408
4409 if (ea_alias_set == -1)
4410 ea_alias_set = new_alias_set ();
4411
4412 /* We generate a new MEM RTX to refer to the copy of the data
4413 in the cache. We do not copy memory attributes (except the
4414 alignment) from the original MEM, as they may no longer apply
4415 to the cache copy. */
4416 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4417 set_mem_alias_set (new_mem, ea_alias_set);
4418 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4419
4420 return new_mem;
4421 }
4422
4423 int
4424 spu_expand_mov (rtx * ops, enum machine_mode mode)
4425 {
4426 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4427 {
4428 /* Perform the move in the destination SUBREG's inner mode. */
4429 ops[0] = SUBREG_REG (ops[0]);
4430 mode = GET_MODE (ops[0]);
4431 ops[1] = gen_lowpart_common (mode, ops[1]);
4432 gcc_assert (ops[1]);
4433 }
4434
4435 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4436 {
4437 rtx from = SUBREG_REG (ops[1]);
4438 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4439
4440 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4441 && GET_MODE_CLASS (imode) == MODE_INT
4442 && subreg_lowpart_p (ops[1]));
4443
4444 if (GET_MODE_SIZE (imode) < 4)
4445 imode = SImode;
4446 if (imode != GET_MODE (from))
4447 from = gen_rtx_SUBREG (imode, from, 0);
4448
4449 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4450 {
4451 enum insn_code icode = convert_optab_handler (trunc_optab,
4452 mode, imode);
4453 emit_insn (GEN_FCN (icode) (ops[0], from));
4454 }
4455 else
4456 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4457 return 1;
4458 }
4459
4460 /* At least one of the operands needs to be a register. */
4461 if ((reload_in_progress | reload_completed) == 0
4462 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4463 {
4464 rtx temp = force_reg (mode, ops[1]);
4465 emit_move_insn (ops[0], temp);
4466 return 1;
4467 }
4468 if (reload_in_progress || reload_completed)
4469 {
4470 if (CONSTANT_P (ops[1]))
4471 return spu_split_immediate (ops);
4472 return 0;
4473 }
4474
4475 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4476 extend them. */
4477 if (GET_CODE (ops[1]) == CONST_INT)
4478 {
4479 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4480 if (val != INTVAL (ops[1]))
4481 {
4482 emit_move_insn (ops[0], GEN_INT (val));
4483 return 1;
4484 }
4485 }
4486 if (MEM_P (ops[0]))
4487 {
4488 if (MEM_ADDR_SPACE (ops[0]))
4489 ops[0] = expand_ea_mem (ops[0], true);
4490 return spu_split_store (ops);
4491 }
4492 if (MEM_P (ops[1]))
4493 {
4494 if (MEM_ADDR_SPACE (ops[1]))
4495 ops[1] = expand_ea_mem (ops[1], false);
4496 return spu_split_load (ops);
4497 }
4498
4499 return 0;
4500 }
4501
4502 static void
4503 spu_convert_move (rtx dst, rtx src)
4504 {
4505 enum machine_mode mode = GET_MODE (dst);
4506 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4507 rtx reg;
4508 gcc_assert (GET_MODE (src) == TImode);
4509 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4510 emit_insn (gen_rtx_SET (VOIDmode, reg,
4511 gen_rtx_TRUNCATE (int_mode,
4512 gen_rtx_LSHIFTRT (TImode, src,
4513 GEN_INT (int_mode == DImode ? 64 : 96)))));
4514 if (int_mode != mode)
4515 {
4516 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4517 emit_move_insn (dst, reg);
4518 }
4519 }
4520
4521 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4522 the address from SRC and SRC+16. Return a REG or CONST_INT that
4523 specifies how many bytes to rotate the loaded registers, plus any
4524 extra from EXTRA_ROTQBY. The address and rotate amounts are
4525 normalized to improve merging of loads and rotate computations. */
4526 static rtx
4527 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4528 {
4529 rtx addr = XEXP (src, 0);
4530 rtx p0, p1, rot, addr0, addr1;
4531 int rot_amt;
4532
4533 rot = 0;
4534 rot_amt = 0;
4535
4536 if (MEM_ALIGN (src) >= 128)
4537 /* Address is already aligned; simply perform a TImode load. */ ;
4538 else if (GET_CODE (addr) == PLUS)
4539 {
4540 /* 8 cases:
4541 aligned reg + aligned reg => lqx
4542 aligned reg + unaligned reg => lqx, rotqby
4543 aligned reg + aligned const => lqd
4544 aligned reg + unaligned const => lqd, rotqbyi
4545 unaligned reg + aligned reg => lqx, rotqby
4546 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4547 unaligned reg + aligned const => lqd, rotqby
4548 unaligned reg + unaligned const -> not allowed by legitimate address
4549 */
4550 p0 = XEXP (addr, 0);
4551 p1 = XEXP (addr, 1);
4552 if (!reg_aligned_for_addr (p0))
4553 {
4554 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4555 {
4556 rot = gen_reg_rtx (SImode);
4557 emit_insn (gen_addsi3 (rot, p0, p1));
4558 }
4559 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4560 {
4561 if (INTVAL (p1) > 0
4562 && REG_POINTER (p0)
4563 && INTVAL (p1) * BITS_PER_UNIT
4564 < REGNO_POINTER_ALIGN (REGNO (p0)))
4565 {
4566 rot = gen_reg_rtx (SImode);
4567 emit_insn (gen_addsi3 (rot, p0, p1));
4568 addr = p0;
4569 }
4570 else
4571 {
4572 rtx x = gen_reg_rtx (SImode);
4573 emit_move_insn (x, p1);
4574 if (!spu_arith_operand (p1, SImode))
4575 p1 = x;
4576 rot = gen_reg_rtx (SImode);
4577 emit_insn (gen_addsi3 (rot, p0, p1));
4578 addr = gen_rtx_PLUS (Pmode, p0, x);
4579 }
4580 }
4581 else
4582 rot = p0;
4583 }
4584 else
4585 {
4586 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4587 {
4588 rot_amt = INTVAL (p1) & 15;
4589 if (INTVAL (p1) & -16)
4590 {
4591 p1 = GEN_INT (INTVAL (p1) & -16);
4592 addr = gen_rtx_PLUS (SImode, p0, p1);
4593 }
4594 else
4595 addr = p0;
4596 }
4597 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4598 rot = p1;
4599 }
4600 }
4601 else if (REG_P (addr))
4602 {
4603 if (!reg_aligned_for_addr (addr))
4604 rot = addr;
4605 }
4606 else if (GET_CODE (addr) == CONST)
4607 {
4608 if (GET_CODE (XEXP (addr, 0)) == PLUS
4609 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4610 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4611 {
4612 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4613 if (rot_amt & -16)
4614 addr = gen_rtx_CONST (Pmode,
4615 gen_rtx_PLUS (Pmode,
4616 XEXP (XEXP (addr, 0), 0),
4617 GEN_INT (rot_amt & -16)));
4618 else
4619 addr = XEXP (XEXP (addr, 0), 0);
4620 }
4621 else
4622 {
4623 rot = gen_reg_rtx (Pmode);
4624 emit_move_insn (rot, addr);
4625 }
4626 }
4627 else if (GET_CODE (addr) == CONST_INT)
4628 {
4629 rot_amt = INTVAL (addr);
4630 addr = GEN_INT (rot_amt & -16);
4631 }
4632 else if (!ALIGNED_SYMBOL_REF_P (addr))
4633 {
4634 rot = gen_reg_rtx (Pmode);
4635 emit_move_insn (rot, addr);
4636 }
4637
4638 rot_amt += extra_rotby;
4639
4640 rot_amt &= 15;
4641
4642 if (rot && rot_amt)
4643 {
4644 rtx x = gen_reg_rtx (SImode);
4645 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4646 rot = x;
4647 rot_amt = 0;
4648 }
4649 if (!rot && rot_amt)
4650 rot = GEN_INT (rot_amt);
4651
4652 addr0 = copy_rtx (addr);
4653 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4654 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4655
4656 if (dst1)
4657 {
4658 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4659 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4660 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4661 }
4662
4663 return rot;
4664 }
4665
4666 int
4667 spu_split_load (rtx * ops)
4668 {
4669 enum machine_mode mode = GET_MODE (ops[0]);
4670 rtx addr, load, rot;
4671 int rot_amt;
4672
4673 if (GET_MODE_SIZE (mode) >= 16)
4674 return 0;
4675
4676 addr = XEXP (ops[1], 0);
4677 gcc_assert (GET_CODE (addr) != AND);
4678
4679 if (!address_needs_split (ops[1]))
4680 {
4681 ops[1] = change_address (ops[1], TImode, addr);
4682 load = gen_reg_rtx (TImode);
4683 emit_insn (gen__movti (load, ops[1]));
4684 spu_convert_move (ops[0], load);
4685 return 1;
4686 }
4687
4688 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4689
4690 load = gen_reg_rtx (TImode);
4691 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4692
4693 if (rot)
4694 emit_insn (gen_rotqby_ti (load, load, rot));
4695
4696 spu_convert_move (ops[0], load);
4697 return 1;
4698 }
4699
4700 int
4701 spu_split_store (rtx * ops)
4702 {
4703 enum machine_mode mode = GET_MODE (ops[0]);
4704 rtx reg;
4705 rtx addr, p0, p1, p1_lo, smem;
4706 int aform;
4707 int scalar;
4708
4709 if (GET_MODE_SIZE (mode) >= 16)
4710 return 0;
4711
4712 addr = XEXP (ops[0], 0);
4713 gcc_assert (GET_CODE (addr) != AND);
4714
4715 if (!address_needs_split (ops[0]))
4716 {
4717 reg = gen_reg_rtx (TImode);
4718 emit_insn (gen_spu_convert (reg, ops[1]));
4719 ops[0] = change_address (ops[0], TImode, addr);
4720 emit_move_insn (ops[0], reg);
4721 return 1;
4722 }
4723
4724 if (GET_CODE (addr) == PLUS)
4725 {
4726 /* 8 cases:
4727 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4728 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4729 aligned reg + aligned const => lqd, c?d, shuf, stqx
4730 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4731 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4732 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4733 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4734 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4735 */
4736 aform = 0;
4737 p0 = XEXP (addr, 0);
4738 p1 = p1_lo = XEXP (addr, 1);
4739 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4740 {
4741 p1_lo = GEN_INT (INTVAL (p1) & 15);
4742 if (reg_aligned_for_addr (p0))
4743 {
4744 p1 = GEN_INT (INTVAL (p1) & -16);
4745 if (p1 == const0_rtx)
4746 addr = p0;
4747 else
4748 addr = gen_rtx_PLUS (SImode, p0, p1);
4749 }
4750 else
4751 {
4752 rtx x = gen_reg_rtx (SImode);
4753 emit_move_insn (x, p1);
4754 addr = gen_rtx_PLUS (SImode, p0, x);
4755 }
4756 }
4757 }
4758 else if (REG_P (addr))
4759 {
4760 aform = 0;
4761 p0 = addr;
4762 p1 = p1_lo = const0_rtx;
4763 }
4764 else
4765 {
4766 aform = 1;
4767 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4768 p1 = 0; /* aform doesn't use p1 */
4769 p1_lo = addr;
4770 if (ALIGNED_SYMBOL_REF_P (addr))
4771 p1_lo = const0_rtx;
4772 else if (GET_CODE (addr) == CONST
4773 && GET_CODE (XEXP (addr, 0)) == PLUS
4774 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4775 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4776 {
4777 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4778 if ((v & -16) != 0)
4779 addr = gen_rtx_CONST (Pmode,
4780 gen_rtx_PLUS (Pmode,
4781 XEXP (XEXP (addr, 0), 0),
4782 GEN_INT (v & -16)));
4783 else
4784 addr = XEXP (XEXP (addr, 0), 0);
4785 p1_lo = GEN_INT (v & 15);
4786 }
4787 else if (GET_CODE (addr) == CONST_INT)
4788 {
4789 p1_lo = GEN_INT (INTVAL (addr) & 15);
4790 addr = GEN_INT (INTVAL (addr) & -16);
4791 }
4792 else
4793 {
4794 p1_lo = gen_reg_rtx (SImode);
4795 emit_move_insn (p1_lo, addr);
4796 }
4797 }
4798
4799 gcc_assert (aform == 0 || aform == 1);
4800 reg = gen_reg_rtx (TImode);
4801
4802 scalar = store_with_one_insn_p (ops[0]);
4803 if (!scalar)
4804 {
4805 /* We could copy the flags from the ops[0] MEM to mem here,
4806 We don't because we want this load to be optimized away if
4807 possible, and copying the flags will prevent that in certain
4808 cases, e.g. consider the volatile flag. */
4809
4810 rtx pat = gen_reg_rtx (TImode);
4811 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4812 set_mem_alias_set (lmem, 0);
4813 emit_insn (gen_movti (reg, lmem));
4814
4815 if (!p0 || reg_aligned_for_addr (p0))
4816 p0 = stack_pointer_rtx;
4817 if (!p1_lo)
4818 p1_lo = const0_rtx;
4819
4820 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4821 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4822 }
4823 else
4824 {
4825 if (GET_CODE (ops[1]) == REG)
4826 emit_insn (gen_spu_convert (reg, ops[1]));
4827 else if (GET_CODE (ops[1]) == SUBREG)
4828 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4829 else
4830 abort ();
4831 }
4832
4833 if (GET_MODE_SIZE (mode) < 4 && scalar)
4834 emit_insn (gen_ashlti3
4835 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4836
4837 smem = change_address (ops[0], TImode, copy_rtx (addr));
4838 /* We can't use the previous alias set because the memory has changed
4839 size and can potentially overlap objects of other types. */
4840 set_mem_alias_set (smem, 0);
4841
4842 emit_insn (gen_movti (smem, reg));
4843 return 1;
4844 }
4845
4846 /* Return TRUE if X is MEM which is a struct member reference
4847 and the member can safely be loaded and stored with a single
4848 instruction because it is padded. */
4849 static int
4850 mem_is_padded_component_ref (rtx x)
4851 {
4852 tree t = MEM_EXPR (x);
4853 tree r;
4854 if (!t || TREE_CODE (t) != COMPONENT_REF)
4855 return 0;
4856 t = TREE_OPERAND (t, 1);
4857 if (!t || TREE_CODE (t) != FIELD_DECL
4858 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4859 return 0;
4860 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4861 r = DECL_FIELD_CONTEXT (t);
4862 if (!r || TREE_CODE (r) != RECORD_TYPE)
4863 return 0;
4864 /* Make sure they are the same mode */
4865 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4866 return 0;
4867 /* If there are no following fields then the field alignment assures
4868 the structure is padded to the alignment which means this field is
4869 padded too. */
4870 if (TREE_CHAIN (t) == 0)
4871 return 1;
4872 /* If the following field is also aligned then this field will be
4873 padded. */
4874 t = TREE_CHAIN (t);
4875 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4876 return 1;
4877 return 0;
4878 }
4879
4880 /* Parse the -mfixed-range= option string. */
4881 static void
4882 fix_range (const char *const_str)
4883 {
4884 int i, first, last;
4885 char *str, *dash, *comma;
4886
4887 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4888 REG2 are either register names or register numbers. The effect
4889 of this option is to mark the registers in the range from REG1 to
4890 REG2 as ``fixed'' so they won't be used by the compiler. */
4891
4892 i = strlen (const_str);
4893 str = (char *) alloca (i + 1);
4894 memcpy (str, const_str, i + 1);
4895
4896 while (1)
4897 {
4898 dash = strchr (str, '-');
4899 if (!dash)
4900 {
4901 warning (0, "value of -mfixed-range must have form REG1-REG2");
4902 return;
4903 }
4904 *dash = '\0';
4905 comma = strchr (dash + 1, ',');
4906 if (comma)
4907 *comma = '\0';
4908
4909 first = decode_reg_name (str);
4910 if (first < 0)
4911 {
4912 warning (0, "unknown register name: %s", str);
4913 return;
4914 }
4915
4916 last = decode_reg_name (dash + 1);
4917 if (last < 0)
4918 {
4919 warning (0, "unknown register name: %s", dash + 1);
4920 return;
4921 }
4922
4923 *dash = '-';
4924
4925 if (first > last)
4926 {
4927 warning (0, "%s-%s is an empty range", str, dash + 1);
4928 return;
4929 }
4930
4931 for (i = first; i <= last; ++i)
4932 fixed_regs[i] = call_used_regs[i] = 1;
4933
4934 if (!comma)
4935 break;
4936
4937 *comma = ',';
4938 str = comma + 1;
4939 }
4940 }
4941
4942 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4943 can be generated using the fsmbi instruction. */
4944 int
4945 fsmbi_const_p (rtx x)
4946 {
4947 if (CONSTANT_P (x))
4948 {
4949 /* We can always choose TImode for CONST_INT because the high bits
4950 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4951 enum immediate_class c = classify_immediate (x, TImode);
4952 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4953 }
4954 return 0;
4955 }
4956
4957 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4958 can be generated using the cbd, chd, cwd or cdd instruction. */
4959 int
4960 cpat_const_p (rtx x, enum machine_mode mode)
4961 {
4962 if (CONSTANT_P (x))
4963 {
4964 enum immediate_class c = classify_immediate (x, mode);
4965 return c == IC_CPAT;
4966 }
4967 return 0;
4968 }
4969
4970 rtx
4971 gen_cpat_const (rtx * ops)
4972 {
4973 unsigned char dst[16];
4974 int i, offset, shift, isize;
4975 if (GET_CODE (ops[3]) != CONST_INT
4976 || GET_CODE (ops[2]) != CONST_INT
4977 || (GET_CODE (ops[1]) != CONST_INT
4978 && GET_CODE (ops[1]) != REG))
4979 return 0;
4980 if (GET_CODE (ops[1]) == REG
4981 && (!REG_POINTER (ops[1])
4982 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4983 return 0;
4984
4985 for (i = 0; i < 16; i++)
4986 dst[i] = i + 16;
4987 isize = INTVAL (ops[3]);
4988 if (isize == 1)
4989 shift = 3;
4990 else if (isize == 2)
4991 shift = 2;
4992 else
4993 shift = 0;
4994 offset = (INTVAL (ops[2]) +
4995 (GET_CODE (ops[1]) ==
4996 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4997 for (i = 0; i < isize; i++)
4998 dst[offset + i] = i + shift;
4999 return array_to_constant (TImode, dst);
5000 }
5001
5002 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5003 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5004 than 16 bytes, the value is repeated across the rest of the array. */
5005 void
5006 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5007 {
5008 HOST_WIDE_INT val;
5009 int i, j, first;
5010
5011 memset (arr, 0, 16);
5012 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5013 if (GET_CODE (x) == CONST_INT
5014 || (GET_CODE (x) == CONST_DOUBLE
5015 && (mode == SFmode || mode == DFmode)))
5016 {
5017 gcc_assert (mode != VOIDmode && mode != BLKmode);
5018
5019 if (GET_CODE (x) == CONST_DOUBLE)
5020 val = const_double_to_hwint (x);
5021 else
5022 val = INTVAL (x);
5023 first = GET_MODE_SIZE (mode) - 1;
5024 for (i = first; i >= 0; i--)
5025 {
5026 arr[i] = val & 0xff;
5027 val >>= 8;
5028 }
5029 /* Splat the constant across the whole array. */
5030 for (j = 0, i = first + 1; i < 16; i++)
5031 {
5032 arr[i] = arr[j];
5033 j = (j == first) ? 0 : j + 1;
5034 }
5035 }
5036 else if (GET_CODE (x) == CONST_DOUBLE)
5037 {
5038 val = CONST_DOUBLE_LOW (x);
5039 for (i = 15; i >= 8; i--)
5040 {
5041 arr[i] = val & 0xff;
5042 val >>= 8;
5043 }
5044 val = CONST_DOUBLE_HIGH (x);
5045 for (i = 7; i >= 0; i--)
5046 {
5047 arr[i] = val & 0xff;
5048 val >>= 8;
5049 }
5050 }
5051 else if (GET_CODE (x) == CONST_VECTOR)
5052 {
5053 int units;
5054 rtx elt;
5055 mode = GET_MODE_INNER (mode);
5056 units = CONST_VECTOR_NUNITS (x);
5057 for (i = 0; i < units; i++)
5058 {
5059 elt = CONST_VECTOR_ELT (x, i);
5060 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5061 {
5062 if (GET_CODE (elt) == CONST_DOUBLE)
5063 val = const_double_to_hwint (elt);
5064 else
5065 val = INTVAL (elt);
5066 first = GET_MODE_SIZE (mode) - 1;
5067 if (first + i * GET_MODE_SIZE (mode) > 16)
5068 abort ();
5069 for (j = first; j >= 0; j--)
5070 {
5071 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5072 val >>= 8;
5073 }
5074 }
5075 }
5076 }
5077 else
5078 gcc_unreachable();
5079 }
5080
5081 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5082 smaller than 16 bytes, use the bytes that would represent that value
5083 in a register, e.g., for QImode return the value of arr[3]. */
5084 rtx
5085 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5086 {
5087 enum machine_mode inner_mode;
5088 rtvec v;
5089 int units, size, i, j, k;
5090 HOST_WIDE_INT val;
5091
5092 if (GET_MODE_CLASS (mode) == MODE_INT
5093 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5094 {
5095 j = GET_MODE_SIZE (mode);
5096 i = j < 4 ? 4 - j : 0;
5097 for (val = 0; i < j; i++)
5098 val = (val << 8) | arr[i];
5099 val = trunc_int_for_mode (val, mode);
5100 return GEN_INT (val);
5101 }
5102
5103 if (mode == TImode)
5104 {
5105 HOST_WIDE_INT high;
5106 for (i = high = 0; i < 8; i++)
5107 high = (high << 8) | arr[i];
5108 for (i = 8, val = 0; i < 16; i++)
5109 val = (val << 8) | arr[i];
5110 return immed_double_const (val, high, TImode);
5111 }
5112 if (mode == SFmode)
5113 {
5114 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5115 val = trunc_int_for_mode (val, SImode);
5116 return hwint_to_const_double (SFmode, val);
5117 }
5118 if (mode == DFmode)
5119 {
5120 for (i = 0, val = 0; i < 8; i++)
5121 val = (val << 8) | arr[i];
5122 return hwint_to_const_double (DFmode, val);
5123 }
5124
5125 if (!VECTOR_MODE_P (mode))
5126 abort ();
5127
5128 units = GET_MODE_NUNITS (mode);
5129 size = GET_MODE_UNIT_SIZE (mode);
5130 inner_mode = GET_MODE_INNER (mode);
5131 v = rtvec_alloc (units);
5132
5133 for (k = i = 0; i < units; ++i)
5134 {
5135 val = 0;
5136 for (j = 0; j < size; j++, k++)
5137 val = (val << 8) | arr[k];
5138
5139 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5140 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5141 else
5142 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5143 }
5144 if (k > 16)
5145 abort ();
5146
5147 return gen_rtx_CONST_VECTOR (mode, v);
5148 }
5149
5150 static void
5151 reloc_diagnostic (rtx x)
5152 {
5153 tree decl = 0;
5154 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5155 return;
5156
5157 if (GET_CODE (x) == SYMBOL_REF)
5158 decl = SYMBOL_REF_DECL (x);
5159 else if (GET_CODE (x) == CONST
5160 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5161 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5162
5163 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5164 if (decl && !DECL_P (decl))
5165 decl = 0;
5166
5167 /* The decl could be a string constant. */
5168 if (decl && DECL_P (decl))
5169 {
5170 location_t loc;
5171 /* We use last_assemble_variable_decl to get line information. It's
5172 not always going to be right and might not even be close, but will
5173 be right for the more common cases. */
5174 if (!last_assemble_variable_decl || in_section == ctors_section)
5175 loc = DECL_SOURCE_LOCATION (decl);
5176 else
5177 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5178
5179 if (TARGET_WARN_RELOC)
5180 warning_at (loc, 0,
5181 "creating run-time relocation for %qD", decl);
5182 else
5183 error_at (loc,
5184 "creating run-time relocation for %qD", decl);
5185 }
5186 else
5187 {
5188 if (TARGET_WARN_RELOC)
5189 warning_at (input_location, 0, "creating run-time relocation");
5190 else
5191 error_at (input_location, "creating run-time relocation");
5192 }
5193 }
5194
5195 /* Hook into assemble_integer so we can generate an error for run-time
5196 relocations. The SPU ABI disallows them. */
5197 static bool
5198 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5199 {
5200 /* By default run-time relocations aren't supported, but we allow them
5201 in case users support it in their own run-time loader. And we provide
5202 a warning for those users that don't. */
5203 if ((GET_CODE (x) == SYMBOL_REF)
5204 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5205 reloc_diagnostic (x);
5206
5207 return default_assemble_integer (x, size, aligned_p);
5208 }
5209
5210 static void
5211 spu_asm_globalize_label (FILE * file, const char *name)
5212 {
5213 fputs ("\t.global\t", file);
5214 assemble_name (file, name);
5215 fputs ("\n", file);
5216 }
5217
5218 static bool
5219 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5220 int opno ATTRIBUTE_UNUSED, int *total,
5221 bool speed ATTRIBUTE_UNUSED)
5222 {
5223 enum machine_mode mode = GET_MODE (x);
5224 int cost = COSTS_N_INSNS (2);
5225
5226 /* Folding to a CONST_VECTOR will use extra space but there might
5227 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5228 only if it allows us to fold away multiple insns. Changing the cost
5229 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5230 because this cost will only be compared against a single insn.
5231 if (code == CONST_VECTOR)
5232 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5233 */
5234
5235 /* Use defaults for float operations. Not accurate but good enough. */
5236 if (mode == DFmode)
5237 {
5238 *total = COSTS_N_INSNS (13);
5239 return true;
5240 }
5241 if (mode == SFmode)
5242 {
5243 *total = COSTS_N_INSNS (6);
5244 return true;
5245 }
5246 switch (code)
5247 {
5248 case CONST_INT:
5249 if (satisfies_constraint_K (x))
5250 *total = 0;
5251 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5252 *total = COSTS_N_INSNS (1);
5253 else
5254 *total = COSTS_N_INSNS (3);
5255 return true;
5256
5257 case CONST:
5258 *total = COSTS_N_INSNS (3);
5259 return true;
5260
5261 case LABEL_REF:
5262 case SYMBOL_REF:
5263 *total = COSTS_N_INSNS (0);
5264 return true;
5265
5266 case CONST_DOUBLE:
5267 *total = COSTS_N_INSNS (5);
5268 return true;
5269
5270 case FLOAT_EXTEND:
5271 case FLOAT_TRUNCATE:
5272 case FLOAT:
5273 case UNSIGNED_FLOAT:
5274 case FIX:
5275 case UNSIGNED_FIX:
5276 *total = COSTS_N_INSNS (7);
5277 return true;
5278
5279 case PLUS:
5280 if (mode == TImode)
5281 {
5282 *total = COSTS_N_INSNS (9);
5283 return true;
5284 }
5285 break;
5286
5287 case MULT:
5288 cost =
5289 GET_CODE (XEXP (x, 0)) ==
5290 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5291 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5292 {
5293 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5294 {
5295 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5296 cost = COSTS_N_INSNS (14);
5297 if ((val & 0xffff) == 0)
5298 cost = COSTS_N_INSNS (9);
5299 else if (val > 0 && val < 0x10000)
5300 cost = COSTS_N_INSNS (11);
5301 }
5302 }
5303 *total = cost;
5304 return true;
5305 case DIV:
5306 case UDIV:
5307 case MOD:
5308 case UMOD:
5309 *total = COSTS_N_INSNS (20);
5310 return true;
5311 case ROTATE:
5312 case ROTATERT:
5313 case ASHIFT:
5314 case ASHIFTRT:
5315 case LSHIFTRT:
5316 *total = COSTS_N_INSNS (4);
5317 return true;
5318 case UNSPEC:
5319 if (XINT (x, 1) == UNSPEC_CONVERT)
5320 *total = COSTS_N_INSNS (0);
5321 else
5322 *total = COSTS_N_INSNS (4);
5323 return true;
5324 }
5325 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5326 if (GET_MODE_CLASS (mode) == MODE_INT
5327 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5328 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5329 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5330 *total = cost;
5331 return true;
5332 }
5333
5334 static enum machine_mode
5335 spu_unwind_word_mode (void)
5336 {
5337 return SImode;
5338 }
5339
5340 /* Decide whether we can make a sibling call to a function. DECL is the
5341 declaration of the function being targeted by the call and EXP is the
5342 CALL_EXPR representing the call. */
5343 static bool
5344 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5345 {
5346 return decl && !TARGET_LARGE_MEM;
5347 }
5348
5349 /* We need to correctly update the back chain pointer and the Available
5350 Stack Size (which is in the second slot of the sp register.) */
5351 void
5352 spu_allocate_stack (rtx op0, rtx op1)
5353 {
5354 HOST_WIDE_INT v;
5355 rtx chain = gen_reg_rtx (V4SImode);
5356 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5357 rtx sp = gen_reg_rtx (V4SImode);
5358 rtx splatted = gen_reg_rtx (V4SImode);
5359 rtx pat = gen_reg_rtx (TImode);
5360
5361 /* copy the back chain so we can save it back again. */
5362 emit_move_insn (chain, stack_bot);
5363
5364 op1 = force_reg (SImode, op1);
5365
5366 v = 0x1020300010203ll;
5367 emit_move_insn (pat, immed_double_const (v, v, TImode));
5368 emit_insn (gen_shufb (splatted, op1, op1, pat));
5369
5370 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5371 emit_insn (gen_subv4si3 (sp, sp, splatted));
5372
5373 if (flag_stack_check)
5374 {
5375 rtx avail = gen_reg_rtx(SImode);
5376 rtx result = gen_reg_rtx(SImode);
5377 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5378 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5379 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5380 }
5381
5382 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5383
5384 emit_move_insn (stack_bot, chain);
5385
5386 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5387 }
5388
5389 void
5390 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5391 {
5392 static unsigned char arr[16] =
5393 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5394 rtx temp = gen_reg_rtx (SImode);
5395 rtx temp2 = gen_reg_rtx (SImode);
5396 rtx temp3 = gen_reg_rtx (V4SImode);
5397 rtx temp4 = gen_reg_rtx (V4SImode);
5398 rtx pat = gen_reg_rtx (TImode);
5399 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5400
5401 /* Restore the backchain from the first word, sp from the second. */
5402 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5403 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5404
5405 emit_move_insn (pat, array_to_constant (TImode, arr));
5406
5407 /* Compute Available Stack Size for sp */
5408 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5409 emit_insn (gen_shufb (temp3, temp, temp, pat));
5410
5411 /* Compute Available Stack Size for back chain */
5412 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5413 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5414 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5415
5416 emit_insn (gen_addv4si3 (sp, sp, temp3));
5417 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5418 }
5419
5420 static void
5421 spu_init_libfuncs (void)
5422 {
5423 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5424 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5425 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5426 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5427 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5428 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5429 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5430 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5431 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5432 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5433 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5434 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5435
5436 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5437 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5438
5439 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5440 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5441 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5442 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5443 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5444 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5445 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5446 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5447 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5448 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5449 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5450 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5451
5452 set_optab_libfunc (smul_optab, TImode, "__multi3");
5453 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5454 set_optab_libfunc (smod_optab, TImode, "__modti3");
5455 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5456 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5457 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5458 }
5459
5460 /* Make a subreg, stripping any existing subreg. We could possibly just
5461 call simplify_subreg, but in this case we know what we want. */
5462 rtx
5463 spu_gen_subreg (enum machine_mode mode, rtx x)
5464 {
5465 if (GET_CODE (x) == SUBREG)
5466 x = SUBREG_REG (x);
5467 if (GET_MODE (x) == mode)
5468 return x;
5469 return gen_rtx_SUBREG (mode, x, 0);
5470 }
5471
5472 static bool
5473 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5474 {
5475 return (TYPE_MODE (type) == BLKmode
5476 && ((type) == 0
5477 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5478 || int_size_in_bytes (type) >
5479 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5480 }
5481 \f
5482 /* Create the built-in types and functions */
5483
5484 enum spu_function_code
5485 {
5486 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5487 #include "spu-builtins.def"
5488 #undef DEF_BUILTIN
5489 NUM_SPU_BUILTINS
5490 };
5491
5492 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5493
5494 struct spu_builtin_description spu_builtins[] = {
5495 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5496 {fcode, icode, name, type, params},
5497 #include "spu-builtins.def"
5498 #undef DEF_BUILTIN
5499 };
5500
5501 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5502
5503 /* Returns the spu builtin decl for CODE. */
5504
5505 static tree
5506 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5507 {
5508 if (code >= NUM_SPU_BUILTINS)
5509 return error_mark_node;
5510
5511 return spu_builtin_decls[code];
5512 }
5513
5514
5515 static void
5516 spu_init_builtins (void)
5517 {
5518 struct spu_builtin_description *d;
5519 unsigned int i;
5520
5521 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5522 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5523 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5524 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5525 V4SF_type_node = build_vector_type (float_type_node, 4);
5526 V2DF_type_node = build_vector_type (double_type_node, 2);
5527
5528 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5529 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5530 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5531 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5532
5533 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5534
5535 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5536 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5537 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5538 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5539 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5540 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5541 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5542 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5543 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5544 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5545 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5546 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5547
5548 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5549 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5550 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5552 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5553 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5554 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5556
5557 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5558 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5559
5560 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5561
5562 spu_builtin_types[SPU_BTI_PTR] =
5563 build_pointer_type (build_qualified_type
5564 (void_type_node,
5565 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5566
5567 /* For each builtin we build a new prototype. The tree code will make
5568 sure nodes are shared. */
5569 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5570 {
5571 tree p;
5572 char name[64]; /* build_function will make a copy. */
5573 int parm;
5574
5575 if (d->name == 0)
5576 continue;
5577
5578 /* Find last parm. */
5579 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5580 ;
5581
5582 p = void_list_node;
5583 while (parm > 1)
5584 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5585
5586 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5587
5588 sprintf (name, "__builtin_%s", d->name);
5589 spu_builtin_decls[i] =
5590 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5591 if (d->fcode == SPU_MASK_FOR_LOAD)
5592 TREE_READONLY (spu_builtin_decls[i]) = 1;
5593
5594 /* These builtins don't throw. */
5595 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5596 }
5597 }
5598
5599 void
5600 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5601 {
5602 static unsigned char arr[16] =
5603 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5604
5605 rtx temp = gen_reg_rtx (Pmode);
5606 rtx temp2 = gen_reg_rtx (V4SImode);
5607 rtx temp3 = gen_reg_rtx (V4SImode);
5608 rtx pat = gen_reg_rtx (TImode);
5609 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5610
5611 emit_move_insn (pat, array_to_constant (TImode, arr));
5612
5613 /* Restore the sp. */
5614 emit_move_insn (temp, op1);
5615 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5616
5617 /* Compute available stack size for sp. */
5618 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5619 emit_insn (gen_shufb (temp3, temp, temp, pat));
5620
5621 emit_insn (gen_addv4si3 (sp, sp, temp3));
5622 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5623 }
5624
5625 int
5626 spu_safe_dma (HOST_WIDE_INT channel)
5627 {
5628 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5629 }
5630
5631 void
5632 spu_builtin_splats (rtx ops[])
5633 {
5634 enum machine_mode mode = GET_MODE (ops[0]);
5635 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5636 {
5637 unsigned char arr[16];
5638 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5639 emit_move_insn (ops[0], array_to_constant (mode, arr));
5640 }
5641 else
5642 {
5643 rtx reg = gen_reg_rtx (TImode);
5644 rtx shuf;
5645 if (GET_CODE (ops[1]) != REG
5646 && GET_CODE (ops[1]) != SUBREG)
5647 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5648 switch (mode)
5649 {
5650 case V2DImode:
5651 case V2DFmode:
5652 shuf =
5653 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5654 TImode);
5655 break;
5656 case V4SImode:
5657 case V4SFmode:
5658 shuf =
5659 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5660 TImode);
5661 break;
5662 case V8HImode:
5663 shuf =
5664 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5665 TImode);
5666 break;
5667 case V16QImode:
5668 shuf =
5669 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5670 TImode);
5671 break;
5672 default:
5673 abort ();
5674 }
5675 emit_move_insn (reg, shuf);
5676 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5677 }
5678 }
5679
5680 void
5681 spu_builtin_extract (rtx ops[])
5682 {
5683 enum machine_mode mode;
5684 rtx rot, from, tmp;
5685
5686 mode = GET_MODE (ops[1]);
5687
5688 if (GET_CODE (ops[2]) == CONST_INT)
5689 {
5690 switch (mode)
5691 {
5692 case V16QImode:
5693 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5694 break;
5695 case V8HImode:
5696 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5697 break;
5698 case V4SFmode:
5699 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5700 break;
5701 case V4SImode:
5702 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5703 break;
5704 case V2DImode:
5705 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5706 break;
5707 case V2DFmode:
5708 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5709 break;
5710 default:
5711 abort ();
5712 }
5713 return;
5714 }
5715
5716 from = spu_gen_subreg (TImode, ops[1]);
5717 rot = gen_reg_rtx (TImode);
5718 tmp = gen_reg_rtx (SImode);
5719
5720 switch (mode)
5721 {
5722 case V16QImode:
5723 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5724 break;
5725 case V8HImode:
5726 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5727 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5728 break;
5729 case V4SFmode:
5730 case V4SImode:
5731 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5732 break;
5733 case V2DImode:
5734 case V2DFmode:
5735 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5736 break;
5737 default:
5738 abort ();
5739 }
5740 emit_insn (gen_rotqby_ti (rot, from, tmp));
5741
5742 emit_insn (gen_spu_convert (ops[0], rot));
5743 }
5744
5745 void
5746 spu_builtin_insert (rtx ops[])
5747 {
5748 enum machine_mode mode = GET_MODE (ops[0]);
5749 enum machine_mode imode = GET_MODE_INNER (mode);
5750 rtx mask = gen_reg_rtx (TImode);
5751 rtx offset;
5752
5753 if (GET_CODE (ops[3]) == CONST_INT)
5754 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5755 else
5756 {
5757 offset = gen_reg_rtx (SImode);
5758 emit_insn (gen_mulsi3
5759 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5760 }
5761 emit_insn (gen_cpat
5762 (mask, stack_pointer_rtx, offset,
5763 GEN_INT (GET_MODE_SIZE (imode))));
5764 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5765 }
5766
5767 void
5768 spu_builtin_promote (rtx ops[])
5769 {
5770 enum machine_mode mode, imode;
5771 rtx rot, from, offset;
5772 HOST_WIDE_INT pos;
5773
5774 mode = GET_MODE (ops[0]);
5775 imode = GET_MODE_INNER (mode);
5776
5777 from = gen_reg_rtx (TImode);
5778 rot = spu_gen_subreg (TImode, ops[0]);
5779
5780 emit_insn (gen_spu_convert (from, ops[1]));
5781
5782 if (GET_CODE (ops[2]) == CONST_INT)
5783 {
5784 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5785 if (GET_MODE_SIZE (imode) < 4)
5786 pos += 4 - GET_MODE_SIZE (imode);
5787 offset = GEN_INT (pos & 15);
5788 }
5789 else
5790 {
5791 offset = gen_reg_rtx (SImode);
5792 switch (mode)
5793 {
5794 case V16QImode:
5795 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5796 break;
5797 case V8HImode:
5798 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5799 emit_insn (gen_addsi3 (offset, offset, offset));
5800 break;
5801 case V4SFmode:
5802 case V4SImode:
5803 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5804 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5805 break;
5806 case V2DImode:
5807 case V2DFmode:
5808 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5809 break;
5810 default:
5811 abort ();
5812 }
5813 }
5814 emit_insn (gen_rotqby_ti (rot, from, offset));
5815 }
5816
5817 static void
5818 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5819 {
5820 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5821 rtx shuf = gen_reg_rtx (V4SImode);
5822 rtx insn = gen_reg_rtx (V4SImode);
5823 rtx shufc;
5824 rtx insnc;
5825 rtx mem;
5826
5827 fnaddr = force_reg (SImode, fnaddr);
5828 cxt = force_reg (SImode, cxt);
5829
5830 if (TARGET_LARGE_MEM)
5831 {
5832 rtx rotl = gen_reg_rtx (V4SImode);
5833 rtx mask = gen_reg_rtx (V4SImode);
5834 rtx bi = gen_reg_rtx (SImode);
5835 static unsigned char const shufa[16] = {
5836 2, 3, 0, 1, 18, 19, 16, 17,
5837 0, 1, 2, 3, 16, 17, 18, 19
5838 };
5839 static unsigned char const insna[16] = {
5840 0x41, 0, 0, 79,
5841 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5842 0x60, 0x80, 0, 79,
5843 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5844 };
5845
5846 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5847 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5848
5849 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5850 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5851 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5852 emit_insn (gen_selb (insn, insnc, rotl, mask));
5853
5854 mem = adjust_address (m_tramp, V4SImode, 0);
5855 emit_move_insn (mem, insn);
5856
5857 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5858 mem = adjust_address (m_tramp, Pmode, 16);
5859 emit_move_insn (mem, bi);
5860 }
5861 else
5862 {
5863 rtx scxt = gen_reg_rtx (SImode);
5864 rtx sfnaddr = gen_reg_rtx (SImode);
5865 static unsigned char const insna[16] = {
5866 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5867 0x30, 0, 0, 0,
5868 0, 0, 0, 0,
5869 0, 0, 0, 0
5870 };
5871
5872 shufc = gen_reg_rtx (TImode);
5873 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5874
5875 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5876 fits 18 bits and the last 4 are zeros. This will be true if
5877 the stack pointer is initialized to 0x3fff0 at program start,
5878 otherwise the ila instruction will be garbage. */
5879
5880 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5881 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5882 emit_insn (gen_cpat
5883 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5884 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5885 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5886
5887 mem = adjust_address (m_tramp, V4SImode, 0);
5888 emit_move_insn (mem, insn);
5889 }
5890 emit_insn (gen_sync ());
5891 }
5892
5893 static bool
5894 spu_warn_func_return (tree decl)
5895 {
5896 /* Naked functions are implemented entirely in assembly, including the
5897 return sequence, so suppress warnings about this. */
5898 return !spu_naked_function_p (decl);
5899 }
5900
5901 void
5902 spu_expand_sign_extend (rtx ops[])
5903 {
5904 unsigned char arr[16];
5905 rtx pat = gen_reg_rtx (TImode);
5906 rtx sign, c;
5907 int i, last;
5908 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5909 if (GET_MODE (ops[1]) == QImode)
5910 {
5911 sign = gen_reg_rtx (HImode);
5912 emit_insn (gen_extendqihi2 (sign, ops[1]));
5913 for (i = 0; i < 16; i++)
5914 arr[i] = 0x12;
5915 arr[last] = 0x13;
5916 }
5917 else
5918 {
5919 for (i = 0; i < 16; i++)
5920 arr[i] = 0x10;
5921 switch (GET_MODE (ops[1]))
5922 {
5923 case HImode:
5924 sign = gen_reg_rtx (SImode);
5925 emit_insn (gen_extendhisi2 (sign, ops[1]));
5926 arr[last] = 0x03;
5927 arr[last - 1] = 0x02;
5928 break;
5929 case SImode:
5930 sign = gen_reg_rtx (SImode);
5931 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5932 for (i = 0; i < 4; i++)
5933 arr[last - i] = 3 - i;
5934 break;
5935 case DImode:
5936 sign = gen_reg_rtx (SImode);
5937 c = gen_reg_rtx (SImode);
5938 emit_insn (gen_spu_convert (c, ops[1]));
5939 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5940 for (i = 0; i < 8; i++)
5941 arr[last - i] = 7 - i;
5942 break;
5943 default:
5944 abort ();
5945 }
5946 }
5947 emit_move_insn (pat, array_to_constant (TImode, arr));
5948 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5949 }
5950
5951 /* expand vector initialization. If there are any constant parts,
5952 load constant parts first. Then load any non-constant parts. */
5953 void
5954 spu_expand_vector_init (rtx target, rtx vals)
5955 {
5956 enum machine_mode mode = GET_MODE (target);
5957 int n_elts = GET_MODE_NUNITS (mode);
5958 int n_var = 0;
5959 bool all_same = true;
5960 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5961 int i;
5962
5963 first = XVECEXP (vals, 0, 0);
5964 for (i = 0; i < n_elts; ++i)
5965 {
5966 x = XVECEXP (vals, 0, i);
5967 if (!(CONST_INT_P (x)
5968 || GET_CODE (x) == CONST_DOUBLE
5969 || GET_CODE (x) == CONST_FIXED))
5970 ++n_var;
5971 else
5972 {
5973 if (first_constant == NULL_RTX)
5974 first_constant = x;
5975 }
5976 if (i > 0 && !rtx_equal_p (x, first))
5977 all_same = false;
5978 }
5979
5980 /* if all elements are the same, use splats to repeat elements */
5981 if (all_same)
5982 {
5983 if (!CONSTANT_P (first)
5984 && !register_operand (first, GET_MODE (x)))
5985 first = force_reg (GET_MODE (first), first);
5986 emit_insn (gen_spu_splats (target, first));
5987 return;
5988 }
5989
5990 /* load constant parts */
5991 if (n_var != n_elts)
5992 {
5993 if (n_var == 0)
5994 {
5995 emit_move_insn (target,
5996 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5997 }
5998 else
5999 {
6000 rtx constant_parts_rtx = copy_rtx (vals);
6001
6002 gcc_assert (first_constant != NULL_RTX);
6003 /* fill empty slots with the first constant, this increases
6004 our chance of using splats in the recursive call below. */
6005 for (i = 0; i < n_elts; ++i)
6006 {
6007 x = XVECEXP (constant_parts_rtx, 0, i);
6008 if (!(CONST_INT_P (x)
6009 || GET_CODE (x) == CONST_DOUBLE
6010 || GET_CODE (x) == CONST_FIXED))
6011 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6012 }
6013
6014 spu_expand_vector_init (target, constant_parts_rtx);
6015 }
6016 }
6017
6018 /* load variable parts */
6019 if (n_var != 0)
6020 {
6021 rtx insert_operands[4];
6022
6023 insert_operands[0] = target;
6024 insert_operands[2] = target;
6025 for (i = 0; i < n_elts; ++i)
6026 {
6027 x = XVECEXP (vals, 0, i);
6028 if (!(CONST_INT_P (x)
6029 || GET_CODE (x) == CONST_DOUBLE
6030 || GET_CODE (x) == CONST_FIXED))
6031 {
6032 if (!register_operand (x, GET_MODE (x)))
6033 x = force_reg (GET_MODE (x), x);
6034 insert_operands[1] = x;
6035 insert_operands[3] = GEN_INT (i);
6036 spu_builtin_insert (insert_operands);
6037 }
6038 }
6039 }
6040 }
6041
6042 /* Return insn index for the vector compare instruction for given CODE,
6043 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6044
6045 static int
6046 get_vec_cmp_insn (enum rtx_code code,
6047 enum machine_mode dest_mode,
6048 enum machine_mode op_mode)
6049
6050 {
6051 switch (code)
6052 {
6053 case EQ:
6054 if (dest_mode == V16QImode && op_mode == V16QImode)
6055 return CODE_FOR_ceq_v16qi;
6056 if (dest_mode == V8HImode && op_mode == V8HImode)
6057 return CODE_FOR_ceq_v8hi;
6058 if (dest_mode == V4SImode && op_mode == V4SImode)
6059 return CODE_FOR_ceq_v4si;
6060 if (dest_mode == V4SImode && op_mode == V4SFmode)
6061 return CODE_FOR_ceq_v4sf;
6062 if (dest_mode == V2DImode && op_mode == V2DFmode)
6063 return CODE_FOR_ceq_v2df;
6064 break;
6065 case GT:
6066 if (dest_mode == V16QImode && op_mode == V16QImode)
6067 return CODE_FOR_cgt_v16qi;
6068 if (dest_mode == V8HImode && op_mode == V8HImode)
6069 return CODE_FOR_cgt_v8hi;
6070 if (dest_mode == V4SImode && op_mode == V4SImode)
6071 return CODE_FOR_cgt_v4si;
6072 if (dest_mode == V4SImode && op_mode == V4SFmode)
6073 return CODE_FOR_cgt_v4sf;
6074 if (dest_mode == V2DImode && op_mode == V2DFmode)
6075 return CODE_FOR_cgt_v2df;
6076 break;
6077 case GTU:
6078 if (dest_mode == V16QImode && op_mode == V16QImode)
6079 return CODE_FOR_clgt_v16qi;
6080 if (dest_mode == V8HImode && op_mode == V8HImode)
6081 return CODE_FOR_clgt_v8hi;
6082 if (dest_mode == V4SImode && op_mode == V4SImode)
6083 return CODE_FOR_clgt_v4si;
6084 break;
6085 default:
6086 break;
6087 }
6088 return -1;
6089 }
6090
6091 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6092 DMODE is expected destination mode. This is a recursive function. */
6093
6094 static rtx
6095 spu_emit_vector_compare (enum rtx_code rcode,
6096 rtx op0, rtx op1,
6097 enum machine_mode dmode)
6098 {
6099 int vec_cmp_insn;
6100 rtx mask;
6101 enum machine_mode dest_mode;
6102 enum machine_mode op_mode = GET_MODE (op1);
6103
6104 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6105
6106 /* Floating point vector compare instructions uses destination V4SImode.
6107 Double floating point vector compare instructions uses destination V2DImode.
6108 Move destination to appropriate mode later. */
6109 if (dmode == V4SFmode)
6110 dest_mode = V4SImode;
6111 else if (dmode == V2DFmode)
6112 dest_mode = V2DImode;
6113 else
6114 dest_mode = dmode;
6115
6116 mask = gen_reg_rtx (dest_mode);
6117 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6118
6119 if (vec_cmp_insn == -1)
6120 {
6121 bool swap_operands = false;
6122 bool try_again = false;
6123 switch (rcode)
6124 {
6125 case LT:
6126 rcode = GT;
6127 swap_operands = true;
6128 try_again = true;
6129 break;
6130 case LTU:
6131 rcode = GTU;
6132 swap_operands = true;
6133 try_again = true;
6134 break;
6135 case NE:
6136 case UNEQ:
6137 case UNLE:
6138 case UNLT:
6139 case UNGE:
6140 case UNGT:
6141 case UNORDERED:
6142 /* Treat A != B as ~(A==B). */
6143 {
6144 enum rtx_code rev_code;
6145 enum insn_code nor_code;
6146 rtx rev_mask;
6147
6148 rev_code = reverse_condition_maybe_unordered (rcode);
6149 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6150
6151 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6152 gcc_assert (nor_code != CODE_FOR_nothing);
6153 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6154 if (dmode != dest_mode)
6155 {
6156 rtx temp = gen_reg_rtx (dest_mode);
6157 convert_move (temp, mask, 0);
6158 return temp;
6159 }
6160 return mask;
6161 }
6162 break;
6163 case GE:
6164 case GEU:
6165 case LE:
6166 case LEU:
6167 /* Try GT/GTU/LT/LTU OR EQ */
6168 {
6169 rtx c_rtx, eq_rtx;
6170 enum insn_code ior_code;
6171 enum rtx_code new_code;
6172
6173 switch (rcode)
6174 {
6175 case GE: new_code = GT; break;
6176 case GEU: new_code = GTU; break;
6177 case LE: new_code = LT; break;
6178 case LEU: new_code = LTU; break;
6179 default:
6180 gcc_unreachable ();
6181 }
6182
6183 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6184 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6185
6186 ior_code = optab_handler (ior_optab, dest_mode);
6187 gcc_assert (ior_code != CODE_FOR_nothing);
6188 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6189 if (dmode != dest_mode)
6190 {
6191 rtx temp = gen_reg_rtx (dest_mode);
6192 convert_move (temp, mask, 0);
6193 return temp;
6194 }
6195 return mask;
6196 }
6197 break;
6198 case LTGT:
6199 /* Try LT OR GT */
6200 {
6201 rtx lt_rtx, gt_rtx;
6202 enum insn_code ior_code;
6203
6204 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6205 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6206
6207 ior_code = optab_handler (ior_optab, dest_mode);
6208 gcc_assert (ior_code != CODE_FOR_nothing);
6209 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6210 if (dmode != dest_mode)
6211 {
6212 rtx temp = gen_reg_rtx (dest_mode);
6213 convert_move (temp, mask, 0);
6214 return temp;
6215 }
6216 return mask;
6217 }
6218 break;
6219 case ORDERED:
6220 /* Implement as (A==A) & (B==B) */
6221 {
6222 rtx a_rtx, b_rtx;
6223 enum insn_code and_code;
6224
6225 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6226 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6227
6228 and_code = optab_handler (and_optab, dest_mode);
6229 gcc_assert (and_code != CODE_FOR_nothing);
6230 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6231 if (dmode != dest_mode)
6232 {
6233 rtx temp = gen_reg_rtx (dest_mode);
6234 convert_move (temp, mask, 0);
6235 return temp;
6236 }
6237 return mask;
6238 }
6239 break;
6240 default:
6241 gcc_unreachable ();
6242 }
6243
6244 /* You only get two chances. */
6245 if (try_again)
6246 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6247
6248 gcc_assert (vec_cmp_insn != -1);
6249
6250 if (swap_operands)
6251 {
6252 rtx tmp;
6253 tmp = op0;
6254 op0 = op1;
6255 op1 = tmp;
6256 }
6257 }
6258
6259 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6260 if (dmode != dest_mode)
6261 {
6262 rtx temp = gen_reg_rtx (dest_mode);
6263 convert_move (temp, mask, 0);
6264 return temp;
6265 }
6266 return mask;
6267 }
6268
6269
6270 /* Emit vector conditional expression.
6271 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6272 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6273
6274 int
6275 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6276 rtx cond, rtx cc_op0, rtx cc_op1)
6277 {
6278 enum machine_mode dest_mode = GET_MODE (dest);
6279 enum rtx_code rcode = GET_CODE (cond);
6280 rtx mask;
6281
6282 /* Get the vector mask for the given relational operations. */
6283 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6284
6285 emit_insn(gen_selb (dest, op2, op1, mask));
6286
6287 return 1;
6288 }
6289
6290 static rtx
6291 spu_force_reg (enum machine_mode mode, rtx op)
6292 {
6293 rtx x, r;
6294 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6295 {
6296 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6297 || GET_MODE (op) == BLKmode)
6298 return force_reg (mode, convert_to_mode (mode, op, 0));
6299 abort ();
6300 }
6301
6302 r = force_reg (GET_MODE (op), op);
6303 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6304 {
6305 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6306 if (x)
6307 return x;
6308 }
6309
6310 x = gen_reg_rtx (mode);
6311 emit_insn (gen_spu_convert (x, r));
6312 return x;
6313 }
6314
6315 static void
6316 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6317 {
6318 HOST_WIDE_INT v = 0;
6319 int lsbits;
6320 /* Check the range of immediate operands. */
6321 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6322 {
6323 int range = p - SPU_BTI_7;
6324
6325 if (!CONSTANT_P (op))
6326 error ("%s expects an integer literal in the range [%d, %d]",
6327 d->name,
6328 spu_builtin_range[range].low, spu_builtin_range[range].high);
6329
6330 if (GET_CODE (op) == CONST
6331 && (GET_CODE (XEXP (op, 0)) == PLUS
6332 || GET_CODE (XEXP (op, 0)) == MINUS))
6333 {
6334 v = INTVAL (XEXP (XEXP (op, 0), 1));
6335 op = XEXP (XEXP (op, 0), 0);
6336 }
6337 else if (GET_CODE (op) == CONST_INT)
6338 v = INTVAL (op);
6339 else if (GET_CODE (op) == CONST_VECTOR
6340 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6341 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6342
6343 /* The default for v is 0 which is valid in every range. */
6344 if (v < spu_builtin_range[range].low
6345 || v > spu_builtin_range[range].high)
6346 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6347 d->name,
6348 spu_builtin_range[range].low, spu_builtin_range[range].high,
6349 v);
6350
6351 switch (p)
6352 {
6353 case SPU_BTI_S10_4:
6354 lsbits = 4;
6355 break;
6356 case SPU_BTI_U16_2:
6357 /* This is only used in lqa, and stqa. Even though the insns
6358 encode 16 bits of the address (all but the 2 least
6359 significant), only 14 bits are used because it is masked to
6360 be 16 byte aligned. */
6361 lsbits = 4;
6362 break;
6363 case SPU_BTI_S16_2:
6364 /* This is used for lqr and stqr. */
6365 lsbits = 2;
6366 break;
6367 default:
6368 lsbits = 0;
6369 }
6370
6371 if (GET_CODE (op) == LABEL_REF
6372 || (GET_CODE (op) == SYMBOL_REF
6373 && SYMBOL_REF_FUNCTION_P (op))
6374 || (v & ((1 << lsbits) - 1)) != 0)
6375 warning (0, "%d least significant bits of %s are ignored", lsbits,
6376 d->name);
6377 }
6378 }
6379
6380
6381 static int
6382 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6383 rtx target, rtx ops[])
6384 {
6385 enum insn_code icode = (enum insn_code) d->icode;
6386 int i = 0, a;
6387
6388 /* Expand the arguments into rtl. */
6389
6390 if (d->parm[0] != SPU_BTI_VOID)
6391 ops[i++] = target;
6392
6393 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6394 {
6395 tree arg = CALL_EXPR_ARG (exp, a);
6396 if (arg == 0)
6397 abort ();
6398 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6399 }
6400
6401 gcc_assert (i == insn_data[icode].n_generator_args);
6402 return i;
6403 }
6404
6405 static rtx
6406 spu_expand_builtin_1 (struct spu_builtin_description *d,
6407 tree exp, rtx target)
6408 {
6409 rtx pat;
6410 rtx ops[8];
6411 enum insn_code icode = (enum insn_code) d->icode;
6412 enum machine_mode mode, tmode;
6413 int i, p;
6414 int n_operands;
6415 tree return_type;
6416
6417 /* Set up ops[] with values from arglist. */
6418 n_operands = expand_builtin_args (d, exp, target, ops);
6419
6420 /* Handle the target operand which must be operand 0. */
6421 i = 0;
6422 if (d->parm[0] != SPU_BTI_VOID)
6423 {
6424
6425 /* We prefer the mode specified for the match_operand otherwise
6426 use the mode from the builtin function prototype. */
6427 tmode = insn_data[d->icode].operand[0].mode;
6428 if (tmode == VOIDmode)
6429 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6430
6431 /* Try to use target because not using it can lead to extra copies
6432 and when we are using all of the registers extra copies leads
6433 to extra spills. */
6434 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6435 ops[0] = target;
6436 else
6437 target = ops[0] = gen_reg_rtx (tmode);
6438
6439 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6440 abort ();
6441
6442 i++;
6443 }
6444
6445 if (d->fcode == SPU_MASK_FOR_LOAD)
6446 {
6447 enum machine_mode mode = insn_data[icode].operand[1].mode;
6448 tree arg;
6449 rtx addr, op, pat;
6450
6451 /* get addr */
6452 arg = CALL_EXPR_ARG (exp, 0);
6453 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6454 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6455 addr = memory_address (mode, op);
6456
6457 /* negate addr */
6458 op = gen_reg_rtx (GET_MODE (addr));
6459 emit_insn (gen_rtx_SET (VOIDmode, op,
6460 gen_rtx_NEG (GET_MODE (addr), addr)));
6461 op = gen_rtx_MEM (mode, op);
6462
6463 pat = GEN_FCN (icode) (target, op);
6464 if (!pat)
6465 return 0;
6466 emit_insn (pat);
6467 return target;
6468 }
6469
6470 /* Ignore align_hint, but still expand it's args in case they have
6471 side effects. */
6472 if (icode == CODE_FOR_spu_align_hint)
6473 return 0;
6474
6475 /* Handle the rest of the operands. */
6476 for (p = 1; i < n_operands; i++, p++)
6477 {
6478 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6479 mode = insn_data[d->icode].operand[i].mode;
6480 else
6481 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6482
6483 /* mode can be VOIDmode here for labels */
6484
6485 /* For specific intrinsics with an immediate operand, e.g.,
6486 si_ai(), we sometimes need to convert the scalar argument to a
6487 vector argument by splatting the scalar. */
6488 if (VECTOR_MODE_P (mode)
6489 && (GET_CODE (ops[i]) == CONST_INT
6490 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6491 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6492 {
6493 if (GET_CODE (ops[i]) == CONST_INT)
6494 ops[i] = spu_const (mode, INTVAL (ops[i]));
6495 else
6496 {
6497 rtx reg = gen_reg_rtx (mode);
6498 enum machine_mode imode = GET_MODE_INNER (mode);
6499 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6500 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6501 if (imode != GET_MODE (ops[i]))
6502 ops[i] = convert_to_mode (imode, ops[i],
6503 TYPE_UNSIGNED (spu_builtin_types
6504 [d->parm[i]]));
6505 emit_insn (gen_spu_splats (reg, ops[i]));
6506 ops[i] = reg;
6507 }
6508 }
6509
6510 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6511
6512 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6513 ops[i] = spu_force_reg (mode, ops[i]);
6514 }
6515
6516 switch (n_operands)
6517 {
6518 case 0:
6519 pat = GEN_FCN (icode) (0);
6520 break;
6521 case 1:
6522 pat = GEN_FCN (icode) (ops[0]);
6523 break;
6524 case 2:
6525 pat = GEN_FCN (icode) (ops[0], ops[1]);
6526 break;
6527 case 3:
6528 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6529 break;
6530 case 4:
6531 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6532 break;
6533 case 5:
6534 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6535 break;
6536 case 6:
6537 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6538 break;
6539 default:
6540 abort ();
6541 }
6542
6543 if (!pat)
6544 abort ();
6545
6546 if (d->type == B_CALL || d->type == B_BISLED)
6547 emit_call_insn (pat);
6548 else if (d->type == B_JUMP)
6549 {
6550 emit_jump_insn (pat);
6551 emit_barrier ();
6552 }
6553 else
6554 emit_insn (pat);
6555
6556 return_type = spu_builtin_types[d->parm[0]];
6557 if (d->parm[0] != SPU_BTI_VOID
6558 && GET_MODE (target) != TYPE_MODE (return_type))
6559 {
6560 /* target is the return value. It should always be the mode of
6561 the builtin function prototype. */
6562 target = spu_force_reg (TYPE_MODE (return_type), target);
6563 }
6564
6565 return target;
6566 }
6567
6568 rtx
6569 spu_expand_builtin (tree exp,
6570 rtx target,
6571 rtx subtarget ATTRIBUTE_UNUSED,
6572 enum machine_mode mode ATTRIBUTE_UNUSED,
6573 int ignore ATTRIBUTE_UNUSED)
6574 {
6575 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6576 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6577 struct spu_builtin_description *d;
6578
6579 if (fcode < NUM_SPU_BUILTINS)
6580 {
6581 d = &spu_builtins[fcode];
6582
6583 return spu_expand_builtin_1 (d, exp, target);
6584 }
6585 abort ();
6586 }
6587
6588 /* Implement targetm.vectorize.builtin_mask_for_load. */
6589 static tree
6590 spu_builtin_mask_for_load (void)
6591 {
6592 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6593 }
6594
6595 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6596 static int
6597 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6598 tree vectype,
6599 int misalign ATTRIBUTE_UNUSED)
6600 {
6601 unsigned elements;
6602
6603 switch (type_of_cost)
6604 {
6605 case scalar_stmt:
6606 case vector_stmt:
6607 case vector_load:
6608 case vector_store:
6609 case vec_to_scalar:
6610 case scalar_to_vec:
6611 case cond_branch_not_taken:
6612 case vec_perm:
6613 case vec_promote_demote:
6614 return 1;
6615
6616 case scalar_store:
6617 return 10;
6618
6619 case scalar_load:
6620 /* Load + rotate. */
6621 return 2;
6622
6623 case unaligned_load:
6624 return 2;
6625
6626 case cond_branch_taken:
6627 return 6;
6628
6629 case vec_construct:
6630 elements = TYPE_VECTOR_SUBPARTS (vectype);
6631 return elements / 2 + 1;
6632
6633 default:
6634 gcc_unreachable ();
6635 }
6636 }
6637
6638 /* Implement targetm.vectorize.init_cost. */
6639
6640 static void *
6641 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6642 {
6643 unsigned *cost = XNEWVEC (unsigned, 3);
6644 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6645 return cost;
6646 }
6647
6648 /* Implement targetm.vectorize.add_stmt_cost. */
6649
6650 static unsigned
6651 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6652 struct _stmt_vec_info *stmt_info, int misalign,
6653 enum vect_cost_model_location where)
6654 {
6655 unsigned *cost = (unsigned *) data;
6656 unsigned retval = 0;
6657
6658 if (flag_vect_cost_model)
6659 {
6660 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6661 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6662
6663 /* Statements in an inner loop relative to the loop being
6664 vectorized are weighted more heavily. The value here is
6665 arbitrary and could potentially be improved with analysis. */
6666 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6667 count *= 50; /* FIXME. */
6668
6669 retval = (unsigned) (count * stmt_cost);
6670 cost[where] += retval;
6671 }
6672
6673 return retval;
6674 }
6675
6676 /* Implement targetm.vectorize.finish_cost. */
6677
6678 static void
6679 spu_finish_cost (void *data, unsigned *prologue_cost,
6680 unsigned *body_cost, unsigned *epilogue_cost)
6681 {
6682 unsigned *cost = (unsigned *) data;
6683 *prologue_cost = cost[vect_prologue];
6684 *body_cost = cost[vect_body];
6685 *epilogue_cost = cost[vect_epilogue];
6686 }
6687
6688 /* Implement targetm.vectorize.destroy_cost_data. */
6689
6690 static void
6691 spu_destroy_cost_data (void *data)
6692 {
6693 free (data);
6694 }
6695
6696 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6697 after applying N number of iterations. This routine does not determine
6698 how may iterations are required to reach desired alignment. */
6699
6700 static bool
6701 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6702 {
6703 if (is_packed)
6704 return false;
6705
6706 /* All other types are naturally aligned. */
6707 return true;
6708 }
6709
6710 /* Return the appropriate mode for a named address pointer. */
6711 static enum machine_mode
6712 spu_addr_space_pointer_mode (addr_space_t addrspace)
6713 {
6714 switch (addrspace)
6715 {
6716 case ADDR_SPACE_GENERIC:
6717 return ptr_mode;
6718 case ADDR_SPACE_EA:
6719 return EAmode;
6720 default:
6721 gcc_unreachable ();
6722 }
6723 }
6724
6725 /* Return the appropriate mode for a named address address. */
6726 static enum machine_mode
6727 spu_addr_space_address_mode (addr_space_t addrspace)
6728 {
6729 switch (addrspace)
6730 {
6731 case ADDR_SPACE_GENERIC:
6732 return Pmode;
6733 case ADDR_SPACE_EA:
6734 return EAmode;
6735 default:
6736 gcc_unreachable ();
6737 }
6738 }
6739
6740 /* Determine if one named address space is a subset of another. */
6741
6742 static bool
6743 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6744 {
6745 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6746 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6747
6748 if (subset == superset)
6749 return true;
6750
6751 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6752 being subsets but instead as disjoint address spaces. */
6753 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6754 return false;
6755
6756 else
6757 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6758 }
6759
6760 /* Convert from one address space to another. */
6761 static rtx
6762 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6763 {
6764 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6765 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6766
6767 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6768 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6769
6770 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6771 {
6772 rtx result, ls;
6773
6774 ls = gen_const_mem (DImode,
6775 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6776 set_mem_align (ls, 128);
6777
6778 result = gen_reg_rtx (Pmode);
6779 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6780 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6781 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6782 ls, const0_rtx, Pmode, 1);
6783
6784 emit_insn (gen_subsi3 (result, op, ls));
6785
6786 return result;
6787 }
6788
6789 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6790 {
6791 rtx result, ls;
6792
6793 ls = gen_const_mem (DImode,
6794 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6795 set_mem_align (ls, 128);
6796
6797 result = gen_reg_rtx (EAmode);
6798 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6799 op = force_reg (Pmode, op);
6800 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6801 ls, const0_rtx, EAmode, 1);
6802 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6803
6804 if (EAmode == SImode)
6805 emit_insn (gen_addsi3 (result, op, ls));
6806 else
6807 emit_insn (gen_adddi3 (result, op, ls));
6808
6809 return result;
6810 }
6811
6812 else
6813 gcc_unreachable ();
6814 }
6815
6816
6817 /* Count the total number of instructions in each pipe and return the
6818 maximum, which is used as the Minimum Iteration Interval (MII)
6819 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6820 -2 are instructions that can go in pipe0 or pipe1. */
6821 static int
6822 spu_sms_res_mii (struct ddg *g)
6823 {
6824 int i;
6825 unsigned t[4] = {0, 0, 0, 0};
6826
6827 for (i = 0; i < g->num_nodes; i++)
6828 {
6829 rtx insn = g->nodes[i].insn;
6830 int p = get_pipe (insn) + 2;
6831
6832 gcc_assert (p >= 0);
6833 gcc_assert (p < 4);
6834
6835 t[p]++;
6836 if (dump_file && INSN_P (insn))
6837 fprintf (dump_file, "i%d %s %d %d\n",
6838 INSN_UID (insn),
6839 insn_data[INSN_CODE(insn)].name,
6840 p, t[p]);
6841 }
6842 if (dump_file)
6843 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6844
6845 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6846 }
6847
6848
6849 void
6850 spu_init_expanders (void)
6851 {
6852 if (cfun)
6853 {
6854 rtx r0, r1;
6855 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6856 frame_pointer_needed is true. We don't know that until we're
6857 expanding the prologue. */
6858 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6859
6860 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6861 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6862 to be treated as aligned, so generate them here. */
6863 r0 = gen_reg_rtx (SImode);
6864 r1 = gen_reg_rtx (SImode);
6865 mark_reg_pointer (r0, 128);
6866 mark_reg_pointer (r1, 128);
6867 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6868 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6869 }
6870 }
6871
6872 static enum machine_mode
6873 spu_libgcc_cmp_return_mode (void)
6874 {
6875
6876 /* For SPU word mode is TI mode so it is better to use SImode
6877 for compare returns. */
6878 return SImode;
6879 }
6880
6881 static enum machine_mode
6882 spu_libgcc_shift_count_mode (void)
6883 {
6884 /* For SPU word mode is TI mode so it is better to use SImode
6885 for shift counts. */
6886 return SImode;
6887 }
6888
6889 /* Implement targetm.section_type_flags. */
6890 static unsigned int
6891 spu_section_type_flags (tree decl, const char *name, int reloc)
6892 {
6893 /* .toe needs to have type @nobits. */
6894 if (strcmp (name, ".toe") == 0)
6895 return SECTION_BSS;
6896 /* Don't load _ea into the current address space. */
6897 if (strcmp (name, "._ea") == 0)
6898 return SECTION_WRITE | SECTION_DEBUG;
6899 return default_section_type_flags (decl, name, reloc);
6900 }
6901
6902 /* Implement targetm.select_section. */
6903 static section *
6904 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6905 {
6906 /* Variables and constants defined in the __ea address space
6907 go into a special section named "._ea". */
6908 if (TREE_TYPE (decl) != error_mark_node
6909 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6910 {
6911 /* We might get called with string constants, but get_named_section
6912 doesn't like them as they are not DECLs. Also, we need to set
6913 flags in that case. */
6914 if (!DECL_P (decl))
6915 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6916
6917 return get_named_section (decl, "._ea", reloc);
6918 }
6919
6920 return default_elf_select_section (decl, reloc, align);
6921 }
6922
6923 /* Implement targetm.unique_section. */
6924 static void
6925 spu_unique_section (tree decl, int reloc)
6926 {
6927 /* We don't support unique section names in the __ea address
6928 space for now. */
6929 if (TREE_TYPE (decl) != error_mark_node
6930 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6931 return;
6932
6933 default_unique_section (decl, reloc);
6934 }
6935
6936 /* Generate a constant or register which contains 2^SCALE. We assume
6937 the result is valid for MODE. Currently, MODE must be V4SFmode and
6938 SCALE must be SImode. */
6939 rtx
6940 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6941 {
6942 gcc_assert (mode == V4SFmode);
6943 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6944 if (GET_CODE (scale) != CONST_INT)
6945 {
6946 /* unsigned int exp = (127 + scale) << 23;
6947 __vector float m = (__vector float) spu_splats (exp); */
6948 rtx reg = force_reg (SImode, scale);
6949 rtx exp = gen_reg_rtx (SImode);
6950 rtx mul = gen_reg_rtx (mode);
6951 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6952 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6953 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6954 return mul;
6955 }
6956 else
6957 {
6958 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6959 unsigned char arr[16];
6960 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6961 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6962 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6963 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6964 return array_to_constant (mode, arr);
6965 }
6966 }
6967
6968 /* After reload, just change the convert into a move instruction
6969 or a dead instruction. */
6970 void
6971 spu_split_convert (rtx ops[])
6972 {
6973 if (REGNO (ops[0]) == REGNO (ops[1]))
6974 emit_note (NOTE_INSN_DELETED);
6975 else
6976 {
6977 /* Use TImode always as this might help hard reg copyprop. */
6978 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6979 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6980 emit_insn (gen_move_insn (op0, op1));
6981 }
6982 }
6983
6984 void
6985 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6986 {
6987 fprintf (file, "# profile\n");
6988 fprintf (file, "brsl $75, _mcount\n");
6989 }
6990
6991 /* Implement targetm.ref_may_alias_errno. */
6992 static bool
6993 spu_ref_may_alias_errno (ao_ref *ref)
6994 {
6995 tree base = ao_ref_base (ref);
6996
6997 /* With SPU newlib, errno is defined as something like
6998 _impure_data._errno
6999 The default implementation of this target macro does not
7000 recognize such expressions, so special-code for it here. */
7001
7002 if (TREE_CODE (base) == VAR_DECL
7003 && !TREE_STATIC (base)
7004 && DECL_EXTERNAL (base)
7005 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7006 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7007 "_impure_data") == 0
7008 /* _errno is the first member of _impure_data. */
7009 && ref->offset == 0)
7010 return true;
7011
7012 return default_ref_may_alias_errno (ref);
7013 }
7014
7015 /* Output thunk to FILE that implements a C++ virtual function call (with
7016 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7017 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7018 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7019 relative to the resulting this pointer. */
7020
7021 static void
7022 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7023 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7024 tree function)
7025 {
7026 rtx op[8];
7027
7028 /* Make sure unwind info is emitted for the thunk if needed. */
7029 final_start_function (emit_barrier (), file, 1);
7030
7031 /* Operand 0 is the target function. */
7032 op[0] = XEXP (DECL_RTL (function), 0);
7033
7034 /* Operand 1 is the 'this' pointer. */
7035 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7036 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7037 else
7038 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7039
7040 /* Operands 2/3 are the low/high halfwords of delta. */
7041 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7042 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7043
7044 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7045 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7046 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7047
7048 /* Operands 6/7 are temporary registers. */
7049 op[6] = gen_rtx_REG (Pmode, 79);
7050 op[7] = gen_rtx_REG (Pmode, 78);
7051
7052 /* Add DELTA to this pointer. */
7053 if (delta)
7054 {
7055 if (delta >= -0x200 && delta < 0x200)
7056 output_asm_insn ("ai\t%1,%1,%2", op);
7057 else if (delta >= -0x8000 && delta < 0x8000)
7058 {
7059 output_asm_insn ("il\t%6,%2", op);
7060 output_asm_insn ("a\t%1,%1,%6", op);
7061 }
7062 else
7063 {
7064 output_asm_insn ("ilhu\t%6,%3", op);
7065 output_asm_insn ("iohl\t%6,%2", op);
7066 output_asm_insn ("a\t%1,%1,%6", op);
7067 }
7068 }
7069
7070 /* Perform vcall adjustment. */
7071 if (vcall_offset)
7072 {
7073 output_asm_insn ("lqd\t%7,0(%1)", op);
7074 output_asm_insn ("rotqby\t%7,%7,%1", op);
7075
7076 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7077 output_asm_insn ("ai\t%7,%7,%4", op);
7078 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7079 {
7080 output_asm_insn ("il\t%6,%4", op);
7081 output_asm_insn ("a\t%7,%7,%6", op);
7082 }
7083 else
7084 {
7085 output_asm_insn ("ilhu\t%6,%5", op);
7086 output_asm_insn ("iohl\t%6,%4", op);
7087 output_asm_insn ("a\t%7,%7,%6", op);
7088 }
7089
7090 output_asm_insn ("lqd\t%6,0(%7)", op);
7091 output_asm_insn ("rotqby\t%6,%6,%7", op);
7092 output_asm_insn ("a\t%1,%1,%6", op);
7093 }
7094
7095 /* Jump to target. */
7096 output_asm_insn ("br\t%0", op);
7097
7098 final_end_function ();
7099 }
7100
7101 /* Canonicalize a comparison from one we don't have to one we do have. */
7102 static void
7103 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7104 bool op0_preserve_value)
7105 {
7106 if (!op0_preserve_value
7107 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7108 {
7109 rtx tem = *op0;
7110 *op0 = *op1;
7111 *op1 = tem;
7112 *code = (int)swap_condition ((enum rtx_code)*code);
7113 }
7114 }
7115 \f
7116 /* Table of machine attributes. */
7117 static const struct attribute_spec spu_attribute_table[] =
7118 {
7119 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7120 affects_type_identity } */
7121 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7122 false },
7123 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7124 false },
7125 { NULL, 0, 0, false, false, false, NULL, false }
7126 };
7127
7128 /* TARGET overrides. */
7129
7130 #undef TARGET_ADDR_SPACE_POINTER_MODE
7131 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7132
7133 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7134 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7135
7136 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7137 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7138 spu_addr_space_legitimate_address_p
7139
7140 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7141 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7142
7143 #undef TARGET_ADDR_SPACE_SUBSET_P
7144 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7145
7146 #undef TARGET_ADDR_SPACE_CONVERT
7147 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7148
7149 #undef TARGET_INIT_BUILTINS
7150 #define TARGET_INIT_BUILTINS spu_init_builtins
7151 #undef TARGET_BUILTIN_DECL
7152 #define TARGET_BUILTIN_DECL spu_builtin_decl
7153
7154 #undef TARGET_EXPAND_BUILTIN
7155 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7156
7157 #undef TARGET_UNWIND_WORD_MODE
7158 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7159
7160 #undef TARGET_LEGITIMIZE_ADDRESS
7161 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7162
7163 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7164 and .quad for the debugger. When it is known that the assembler is fixed,
7165 these can be removed. */
7166 #undef TARGET_ASM_UNALIGNED_SI_OP
7167 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7168
7169 #undef TARGET_ASM_ALIGNED_DI_OP
7170 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7171
7172 /* The .8byte directive doesn't seem to work well for a 32 bit
7173 architecture. */
7174 #undef TARGET_ASM_UNALIGNED_DI_OP
7175 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7176
7177 #undef TARGET_RTX_COSTS
7178 #define TARGET_RTX_COSTS spu_rtx_costs
7179
7180 #undef TARGET_ADDRESS_COST
7181 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7182
7183 #undef TARGET_SCHED_ISSUE_RATE
7184 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7185
7186 #undef TARGET_SCHED_INIT_GLOBAL
7187 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7188
7189 #undef TARGET_SCHED_INIT
7190 #define TARGET_SCHED_INIT spu_sched_init
7191
7192 #undef TARGET_SCHED_VARIABLE_ISSUE
7193 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7194
7195 #undef TARGET_SCHED_REORDER
7196 #define TARGET_SCHED_REORDER spu_sched_reorder
7197
7198 #undef TARGET_SCHED_REORDER2
7199 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7200
7201 #undef TARGET_SCHED_ADJUST_COST
7202 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7203
7204 #undef TARGET_ATTRIBUTE_TABLE
7205 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7206
7207 #undef TARGET_ASM_INTEGER
7208 #define TARGET_ASM_INTEGER spu_assemble_integer
7209
7210 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7211 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7212
7213 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7214 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7215
7216 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7217 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7218
7219 #undef TARGET_ASM_GLOBALIZE_LABEL
7220 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7221
7222 #undef TARGET_PASS_BY_REFERENCE
7223 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7224
7225 #undef TARGET_FUNCTION_ARG
7226 #define TARGET_FUNCTION_ARG spu_function_arg
7227
7228 #undef TARGET_FUNCTION_ARG_ADVANCE
7229 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7230
7231 #undef TARGET_MUST_PASS_IN_STACK
7232 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7233
7234 #undef TARGET_BUILD_BUILTIN_VA_LIST
7235 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7236
7237 #undef TARGET_EXPAND_BUILTIN_VA_START
7238 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7239
7240 #undef TARGET_SETUP_INCOMING_VARARGS
7241 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7242
7243 #undef TARGET_MACHINE_DEPENDENT_REORG
7244 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7245
7246 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7247 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7248
7249 #undef TARGET_INIT_LIBFUNCS
7250 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7251
7252 #undef TARGET_RETURN_IN_MEMORY
7253 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7254
7255 #undef TARGET_ENCODE_SECTION_INFO
7256 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7257
7258 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7259 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7260
7261 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7262 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7263
7264 #undef TARGET_VECTORIZE_INIT_COST
7265 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7266
7267 #undef TARGET_VECTORIZE_ADD_STMT_COST
7268 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7269
7270 #undef TARGET_VECTORIZE_FINISH_COST
7271 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7272
7273 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7274 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7275
7276 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7277 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7278
7279 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7280 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7281
7282 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7283 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7284
7285 #undef TARGET_SCHED_SMS_RES_MII
7286 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7287
7288 #undef TARGET_SECTION_TYPE_FLAGS
7289 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7290
7291 #undef TARGET_ASM_SELECT_SECTION
7292 #define TARGET_ASM_SELECT_SECTION spu_select_section
7293
7294 #undef TARGET_ASM_UNIQUE_SECTION
7295 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7296
7297 #undef TARGET_LEGITIMATE_ADDRESS_P
7298 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7299
7300 #undef TARGET_LEGITIMATE_CONSTANT_P
7301 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7302
7303 #undef TARGET_TRAMPOLINE_INIT
7304 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7305
7306 #undef TARGET_WARN_FUNC_RETURN
7307 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7308
7309 #undef TARGET_OPTION_OVERRIDE
7310 #define TARGET_OPTION_OVERRIDE spu_option_override
7311
7312 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7313 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7314
7315 #undef TARGET_REF_MAY_ALIAS_ERRNO
7316 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7317
7318 #undef TARGET_ASM_OUTPUT_MI_THUNK
7319 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7320 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7321 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7322
7323 /* Variable tracking should be run after all optimizations which
7324 change order of insns. It also needs a valid CFG. */
7325 #undef TARGET_DELAY_VARTRACK
7326 #define TARGET_DELAY_VARTRACK true
7327
7328 #undef TARGET_CANONICALIZE_COMPARISON
7329 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7330
7331 #undef TARGET_CAN_USE_DOLOOP_P
7332 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7333
7334 struct gcc_target targetm = TARGET_INITIALIZER;
7335
7336 #include "gt-spu.h"