]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
re PR middle-end/91603 (Unaligned access in expand_assignment)
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2019 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #define IN_TARGET_CODE 1
18
19 #include "config.h"
20 #include "system.h"
21 #include "coretypes.h"
22 #include "backend.h"
23 #include "target.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "attribs.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "output.h"
49 #include "cfgrtl.h"
50 #include "cfgbuild.h"
51 #include "langhooks.h"
52 #include "reload.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "gimplify.h"
56 #include "tm-constrs.h"
57 #include "ddg.h"
58 #include "dumpfile.h"
59 #include "builtins.h"
60 #include "rtl-iter.h"
61 #include "flags.h"
62 #include "toplev.h"
63
64 /* This file should be included last. */
65 #include "target-def.h"
66
67 /* Builtin types, data and prototypes. */
68
69 enum spu_builtin_type_index
70 {
71 SPU_BTI_END_OF_PARAMS,
72
73 /* We create new type nodes for these. */
74 SPU_BTI_V16QI,
75 SPU_BTI_V8HI,
76 SPU_BTI_V4SI,
77 SPU_BTI_V2DI,
78 SPU_BTI_V4SF,
79 SPU_BTI_V2DF,
80 SPU_BTI_UV16QI,
81 SPU_BTI_UV8HI,
82 SPU_BTI_UV4SI,
83 SPU_BTI_UV2DI,
84
85 /* A 16-byte type. (Implemented with V16QI_type_node) */
86 SPU_BTI_QUADWORD,
87
88 /* These all correspond to intSI_type_node */
89 SPU_BTI_7,
90 SPU_BTI_S7,
91 SPU_BTI_U7,
92 SPU_BTI_S10,
93 SPU_BTI_S10_4,
94 SPU_BTI_U14,
95 SPU_BTI_16,
96 SPU_BTI_S16,
97 SPU_BTI_S16_2,
98 SPU_BTI_U16,
99 SPU_BTI_U16_2,
100 SPU_BTI_U18,
101
102 /* These correspond to the standard types */
103 SPU_BTI_INTQI,
104 SPU_BTI_INTHI,
105 SPU_BTI_INTSI,
106 SPU_BTI_INTDI,
107
108 SPU_BTI_UINTQI,
109 SPU_BTI_UINTHI,
110 SPU_BTI_UINTSI,
111 SPU_BTI_UINTDI,
112
113 SPU_BTI_FLOAT,
114 SPU_BTI_DOUBLE,
115
116 SPU_BTI_VOID,
117 SPU_BTI_PTR,
118
119 SPU_BTI_MAX
120 };
121
122 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
123 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
124 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
125 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
126 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
127 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
128 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
129 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
130 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
131 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
132
133 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
134
135 struct spu_builtin_range
136 {
137 int low, high;
138 };
139
140 static struct spu_builtin_range spu_builtin_range[] = {
141 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
142 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
143 {0ll, 0x7fll}, /* SPU_BTI_U7 */
144 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
145 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
146 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
147 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
148 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
149 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
150 {0ll, 0xffffll}, /* SPU_BTI_U16 */
151 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
152 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
153 };
154
155 \f
156 /* Target specific attribute specifications. */
157 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
158
159 /* Prototypes and external defs. */
160 static int get_pipe (rtx_insn *insn);
161 static int spu_naked_function_p (tree func);
162 static int mem_is_padded_component_ref (rtx x);
163 static void fix_range (const char *);
164 static rtx spu_expand_load (rtx, rtx, rtx, int);
165
166 /* Which instruction set architecture to use. */
167 int spu_arch;
168 /* Which cpu are we tuning for. */
169 int spu_tune;
170
171 /* The hardware requires 8 insns between a hint and the branch it
172 effects. This variable describes how many rtl instructions the
173 compiler needs to see before inserting a hint, and then the compiler
174 will insert enough nops to make it at least 8 insns. The default is
175 for the compiler to allow up to 2 nops be emitted. The nops are
176 inserted in pairs, so we round down. */
177 int spu_hint_dist = (8*4) - (2*4);
178
179 enum spu_immediate {
180 SPU_NONE,
181 SPU_IL,
182 SPU_ILA,
183 SPU_ILH,
184 SPU_ILHU,
185 SPU_ORI,
186 SPU_ORHI,
187 SPU_ORBI,
188 SPU_IOHL
189 };
190 enum immediate_class
191 {
192 IC_POOL, /* constant pool */
193 IC_IL1, /* one il* instruction */
194 IC_IL2, /* both ilhu and iohl instructions */
195 IC_IL1s, /* one il* instruction */
196 IC_IL2s, /* both ilhu and iohl instructions */
197 IC_FSMBI, /* the fsmbi instruction */
198 IC_CPAT, /* one of the c*d instructions */
199 IC_FSMBI2 /* fsmbi plus 1 other instruction */
200 };
201
202 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
203 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
204 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
205 static enum immediate_class classify_immediate (rtx op,
206 machine_mode mode);
207
208 /* Pointer mode for __ea references. */
209 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
210
211 \f
212 /* Define the structure for the machine field in struct function. */
213 struct GTY(()) machine_function
214 {
215 /* Register to use for PIC accesses. */
216 rtx pic_reg;
217 };
218
219 /* How to allocate a 'struct machine_function'. */
220 static struct machine_function *
221 spu_init_machine_status (void)
222 {
223 return ggc_cleared_alloc<machine_function> ();
224 }
225
226 /* Implement TARGET_OPTION_OVERRIDE. */
227 static void
228 spu_option_override (void)
229 {
230 /* Set up function hooks. */
231 init_machine_status = spu_init_machine_status;
232
233 /* Small loops will be unpeeled at -O3. For SPU it is more important
234 to keep code small by default. */
235 if (!flag_unroll_loops && !flag_peel_loops)
236 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
237 global_options.x_param_values,
238 global_options_set.x_param_values);
239
240 flag_omit_frame_pointer = 1;
241
242 /* Functions must be 8 byte aligned so we correctly handle dual issue */
243 parse_alignment_opts ();
244 if (align_functions.levels[0].get_value () < 8)
245 str_align_functions = "8";
246
247 spu_hint_dist = 8*4 - spu_max_nops*4;
248 if (spu_hint_dist < 0)
249 spu_hint_dist = 0;
250
251 if (spu_fixed_range_string)
252 fix_range (spu_fixed_range_string);
253
254 /* Determine processor architectural level. */
255 if (spu_arch_string)
256 {
257 if (strcmp (&spu_arch_string[0], "cell") == 0)
258 spu_arch = PROCESSOR_CELL;
259 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
260 spu_arch = PROCESSOR_CELLEDP;
261 else
262 error ("bad value (%s) for %<-march=%> switch", spu_arch_string);
263 }
264
265 /* Determine processor to tune for. */
266 if (spu_tune_string)
267 {
268 if (strcmp (&spu_tune_string[0], "cell") == 0)
269 spu_tune = PROCESSOR_CELL;
270 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
271 spu_tune = PROCESSOR_CELLEDP;
272 else
273 error ("bad value (%s) for %<-mtune=%> switch", spu_tune_string);
274 }
275
276 /* Change defaults according to the processor architecture. */
277 if (spu_arch == PROCESSOR_CELLEDP)
278 {
279 /* If no command line option has been otherwise specified, change
280 the default to -mno-safe-hints on celledp -- only the original
281 Cell/B.E. processors require this workaround. */
282 if (!(target_flags_explicit & MASK_SAFE_HINTS))
283 target_flags &= ~MASK_SAFE_HINTS;
284 }
285
286 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
287 }
288 \f
289 /* Implement TARGET_HARD_REGNO_NREGS. */
290
291 static unsigned int
292 spu_hard_regno_nregs (unsigned int, machine_mode mode)
293 {
294 return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
295 }
296
297 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
298 struct attribute_spec.handler. */
299
300 /* True if MODE is valid for the target. By "valid", we mean able to
301 be manipulated in non-trivial ways. In particular, this means all
302 the arithmetic is supported. */
303 static bool
304 spu_scalar_mode_supported_p (scalar_mode mode)
305 {
306 switch (mode)
307 {
308 case E_QImode:
309 case E_HImode:
310 case E_SImode:
311 case E_SFmode:
312 case E_DImode:
313 case E_TImode:
314 case E_DFmode:
315 return true;
316
317 default:
318 return false;
319 }
320 }
321
322 /* Similarly for vector modes. "Supported" here is less strict. At
323 least some operations are supported; need to check optabs or builtins
324 for further details. */
325 static bool
326 spu_vector_mode_supported_p (machine_mode mode)
327 {
328 switch (mode)
329 {
330 case E_V16QImode:
331 case E_V8HImode:
332 case E_V4SImode:
333 case E_V2DImode:
334 case E_V4SFmode:
335 case E_V2DFmode:
336 return true;
337
338 default:
339 return false;
340 }
341 }
342
343 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
344 least significant bytes of the outer mode. This function returns
345 TRUE for the SUBREG's where this is correct. */
346 int
347 valid_subreg (rtx op)
348 {
349 machine_mode om = GET_MODE (op);
350 machine_mode im = GET_MODE (SUBREG_REG (op));
351 return om != VOIDmode && im != VOIDmode
352 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
353 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
354 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
355 }
356
357 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
358 and adjust the start offset. */
359 static rtx
360 adjust_operand (rtx op, HOST_WIDE_INT * start)
361 {
362 machine_mode mode;
363 int op_size;
364 /* Strip any paradoxical SUBREG. */
365 if (GET_CODE (op) == SUBREG
366 && (GET_MODE_BITSIZE (GET_MODE (op))
367 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
368 {
369 if (start)
370 *start -=
371 GET_MODE_BITSIZE (GET_MODE (op)) -
372 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
373 op = SUBREG_REG (op);
374 }
375 /* If it is smaller than SI, assure a SUBREG */
376 op_size = GET_MODE_BITSIZE (GET_MODE (op));
377 if (op_size < 32)
378 {
379 if (start)
380 *start += 32 - op_size;
381 op_size = 32;
382 }
383 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
384 mode = int_mode_for_size (op_size, 0).require ();
385 if (mode != GET_MODE (op))
386 op = gen_rtx_SUBREG (mode, op, 0);
387 return op;
388 }
389
390 void
391 spu_expand_extv (rtx ops[], int unsignedp)
392 {
393 rtx dst = ops[0], src = ops[1];
394 HOST_WIDE_INT width = INTVAL (ops[2]);
395 HOST_WIDE_INT start = INTVAL (ops[3]);
396 HOST_WIDE_INT align_mask;
397 rtx s0, s1, mask, r0;
398
399 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
400
401 if (MEM_P (src))
402 {
403 /* First, determine if we need 1 TImode load or 2. We need only 1
404 if the bits being extracted do not cross the alignment boundary
405 as determined by the MEM and its address. */
406
407 align_mask = -MEM_ALIGN (src);
408 if ((start & align_mask) == ((start + width - 1) & align_mask))
409 {
410 /* Alignment is sufficient for 1 load. */
411 s0 = gen_reg_rtx (TImode);
412 r0 = spu_expand_load (s0, 0, src, start / 8);
413 start &= 7;
414 if (r0)
415 emit_insn (gen_rotqby_ti (s0, s0, r0));
416 }
417 else
418 {
419 /* Need 2 loads. */
420 s0 = gen_reg_rtx (TImode);
421 s1 = gen_reg_rtx (TImode);
422 r0 = spu_expand_load (s0, s1, src, start / 8);
423 start &= 7;
424
425 gcc_assert (start + width <= 128);
426 if (r0)
427 {
428 rtx r1 = gen_reg_rtx (SImode);
429 mask = gen_reg_rtx (TImode);
430 emit_move_insn (mask, GEN_INT (-1));
431 emit_insn (gen_rotqby_ti (s0, s0, r0));
432 emit_insn (gen_rotqby_ti (s1, s1, r0));
433 if (GET_CODE (r0) == CONST_INT)
434 r1 = GEN_INT (INTVAL (r0) & 15);
435 else
436 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
437 emit_insn (gen_shlqby_ti (mask, mask, r1));
438 emit_insn (gen_selb (s0, s1, s0, mask));
439 }
440 }
441
442 }
443 else if (GET_CODE (src) == SUBREG)
444 {
445 rtx r = SUBREG_REG (src);
446 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
447 s0 = gen_reg_rtx (TImode);
448 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
449 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
450 else
451 emit_move_insn (s0, src);
452 }
453 else
454 {
455 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
456 s0 = gen_reg_rtx (TImode);
457 emit_move_insn (s0, src);
458 }
459
460 /* Now s0 is TImode and contains the bits to extract at start. */
461
462 if (start)
463 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
464
465 if (128 - width)
466 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
467
468 emit_move_insn (dst, s0);
469 }
470
471 void
472 spu_expand_insv (rtx ops[])
473 {
474 HOST_WIDE_INT width = INTVAL (ops[1]);
475 HOST_WIDE_INT start = INTVAL (ops[2]);
476 unsigned HOST_WIDE_INT maskbits;
477 machine_mode dst_mode;
478 rtx dst = ops[0], src = ops[3];
479 int dst_size;
480 rtx mask;
481 rtx shift_reg;
482 int shift;
483
484
485 if (GET_CODE (ops[0]) == MEM)
486 dst = gen_reg_rtx (TImode);
487 else
488 dst = adjust_operand (dst, &start);
489 dst_mode = GET_MODE (dst);
490 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
491
492 if (CONSTANT_P (src))
493 {
494 machine_mode m =
495 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
496 src = force_reg (m, convert_to_mode (m, src, 0));
497 }
498 src = adjust_operand (src, 0);
499
500 mask = gen_reg_rtx (dst_mode);
501 shift_reg = gen_reg_rtx (dst_mode);
502 shift = dst_size - start - width;
503
504 /* It's not safe to use subreg here because the compiler assumes
505 that the SUBREG_REG is right justified in the SUBREG. */
506 convert_move (shift_reg, src, 1);
507
508 if (shift > 0)
509 {
510 switch (dst_mode)
511 {
512 case E_SImode:
513 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 break;
515 case E_DImode:
516 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
517 break;
518 case E_TImode:
519 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
520 break;
521 default:
522 abort ();
523 }
524 }
525 else if (shift < 0)
526 abort ();
527
528 switch (dst_size)
529 {
530 case 32:
531 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
532 if (start)
533 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
534 emit_move_insn (mask, GEN_INT (maskbits));
535 break;
536 case 64:
537 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
538 if (start)
539 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
540 emit_move_insn (mask, GEN_INT (maskbits));
541 break;
542 case 128:
543 {
544 unsigned char arr[16];
545 int i = start / 8;
546 memset (arr, 0, sizeof (arr));
547 arr[i] = 0xff >> (start & 7);
548 for (i++; i <= (start + width - 1) / 8; i++)
549 arr[i] = 0xff;
550 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
551 emit_move_insn (mask, array_to_constant (TImode, arr));
552 }
553 break;
554 default:
555 abort ();
556 }
557 if (GET_CODE (ops[0]) == MEM)
558 {
559 rtx low = gen_reg_rtx (SImode);
560 rtx rotl = gen_reg_rtx (SImode);
561 rtx mask0 = gen_reg_rtx (TImode);
562 rtx addr;
563 rtx addr0;
564 rtx addr1;
565 rtx mem;
566
567 addr = force_reg (Pmode, XEXP (ops[0], 0));
568 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
569 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
570 emit_insn (gen_negsi2 (rotl, low));
571 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
572 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
573 mem = change_address (ops[0], TImode, addr0);
574 set_mem_alias_set (mem, 0);
575 emit_move_insn (dst, mem);
576 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
577 if (start + width > MEM_ALIGN (ops[0]))
578 {
579 rtx shl = gen_reg_rtx (SImode);
580 rtx mask1 = gen_reg_rtx (TImode);
581 rtx dst1 = gen_reg_rtx (TImode);
582 rtx mem1;
583 addr1 = plus_constant (Pmode, addr, 16);
584 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
585 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
586 emit_insn (gen_shlqby_ti (mask1, mask, shl));
587 mem1 = change_address (ops[0], TImode, addr1);
588 set_mem_alias_set (mem1, 0);
589 emit_move_insn (dst1, mem1);
590 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
591 emit_move_insn (mem1, dst1);
592 }
593 emit_move_insn (mem, dst);
594 }
595 else
596 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
597 }
598
599
600 int
601 spu_expand_block_move (rtx ops[])
602 {
603 HOST_WIDE_INT bytes, align, offset;
604 rtx src, dst, sreg, dreg, target;
605 int i;
606 if (GET_CODE (ops[2]) != CONST_INT
607 || GET_CODE (ops[3]) != CONST_INT
608 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
609 return 0;
610
611 bytes = INTVAL (ops[2]);
612 align = INTVAL (ops[3]);
613
614 if (bytes <= 0)
615 return 1;
616
617 dst = ops[0];
618 src = ops[1];
619
620 if (align == 16)
621 {
622 for (offset = 0; offset + 16 <= bytes; offset += 16)
623 {
624 dst = adjust_address (ops[0], V16QImode, offset);
625 src = adjust_address (ops[1], V16QImode, offset);
626 emit_move_insn (dst, src);
627 }
628 if (offset < bytes)
629 {
630 rtx mask;
631 unsigned char arr[16] = { 0 };
632 for (i = 0; i < bytes - offset; i++)
633 arr[i] = 0xff;
634 dst = adjust_address (ops[0], V16QImode, offset);
635 src = adjust_address (ops[1], V16QImode, offset);
636 mask = gen_reg_rtx (V16QImode);
637 sreg = gen_reg_rtx (V16QImode);
638 dreg = gen_reg_rtx (V16QImode);
639 target = gen_reg_rtx (V16QImode);
640 emit_move_insn (mask, array_to_constant (V16QImode, arr));
641 emit_move_insn (dreg, dst);
642 emit_move_insn (sreg, src);
643 emit_insn (gen_selb (target, dreg, sreg, mask));
644 emit_move_insn (dst, target);
645 }
646 return 1;
647 }
648 return 0;
649 }
650
651 enum spu_comp_code
652 { SPU_EQ, SPU_GT, SPU_GTU };
653
654 int spu_comp_icode[12][3] = {
655 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
656 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
657 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
658 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
659 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
660 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
661 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
662 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
663 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
664 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
665 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
666 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
667 };
668
669 /* Generate a compare for CODE. Return a brand-new rtx that represents
670 the result of the compare. GCC can figure this out too if we don't
671 provide all variations of compares, but GCC always wants to use
672 WORD_MODE, we can generate better code in most cases if we do it
673 ourselves. */
674 void
675 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
676 {
677 int reverse_compare = 0;
678 int reverse_test = 0;
679 rtx compare_result, eq_result;
680 rtx comp_rtx, eq_rtx;
681 machine_mode comp_mode;
682 machine_mode op_mode;
683 enum spu_comp_code scode, eq_code;
684 enum insn_code ior_code;
685 enum rtx_code code = GET_CODE (cmp);
686 rtx op0 = XEXP (cmp, 0);
687 rtx op1 = XEXP (cmp, 1);
688 int index;
689 int eq_test = 0;
690
691 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
692 and so on, to keep the constant in operand 1. */
693 if (GET_CODE (op1) == CONST_INT)
694 {
695 HOST_WIDE_INT val = INTVAL (op1) - 1;
696 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
697 switch (code)
698 {
699 case GE:
700 op1 = GEN_INT (val);
701 code = GT;
702 break;
703 case LT:
704 op1 = GEN_INT (val);
705 code = LE;
706 break;
707 case GEU:
708 op1 = GEN_INT (val);
709 code = GTU;
710 break;
711 case LTU:
712 op1 = GEN_INT (val);
713 code = LEU;
714 break;
715 default:
716 break;
717 }
718 }
719
720 /* However, if we generate an integer result, performing a reverse test
721 would require an extra negation, so avoid that where possible. */
722 if (GET_CODE (op1) == CONST_INT && is_set == 1)
723 {
724 HOST_WIDE_INT val = INTVAL (op1) + 1;
725 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
726 switch (code)
727 {
728 case LE:
729 op1 = GEN_INT (val);
730 code = LT;
731 break;
732 case LEU:
733 op1 = GEN_INT (val);
734 code = LTU;
735 break;
736 default:
737 break;
738 }
739 }
740
741 comp_mode = SImode;
742 op_mode = GET_MODE (op0);
743
744 switch (code)
745 {
746 case GE:
747 scode = SPU_GT;
748 if (HONOR_NANS (op_mode))
749 {
750 reverse_compare = 0;
751 reverse_test = 0;
752 eq_test = 1;
753 eq_code = SPU_EQ;
754 }
755 else
756 {
757 reverse_compare = 1;
758 reverse_test = 1;
759 }
760 break;
761 case LE:
762 scode = SPU_GT;
763 if (HONOR_NANS (op_mode))
764 {
765 reverse_compare = 1;
766 reverse_test = 0;
767 eq_test = 1;
768 eq_code = SPU_EQ;
769 }
770 else
771 {
772 reverse_compare = 0;
773 reverse_test = 1;
774 }
775 break;
776 case LT:
777 reverse_compare = 1;
778 reverse_test = 0;
779 scode = SPU_GT;
780 break;
781 case GEU:
782 reverse_compare = 1;
783 reverse_test = 1;
784 scode = SPU_GTU;
785 break;
786 case LEU:
787 reverse_compare = 0;
788 reverse_test = 1;
789 scode = SPU_GTU;
790 break;
791 case LTU:
792 reverse_compare = 1;
793 reverse_test = 0;
794 scode = SPU_GTU;
795 break;
796 case NE:
797 reverse_compare = 0;
798 reverse_test = 1;
799 scode = SPU_EQ;
800 break;
801
802 case EQ:
803 scode = SPU_EQ;
804 break;
805 case GT:
806 scode = SPU_GT;
807 break;
808 case GTU:
809 scode = SPU_GTU;
810 break;
811 default:
812 scode = SPU_EQ;
813 break;
814 }
815
816 switch (op_mode)
817 {
818 case E_QImode:
819 index = 0;
820 comp_mode = QImode;
821 break;
822 case E_HImode:
823 index = 1;
824 comp_mode = HImode;
825 break;
826 case E_SImode:
827 index = 2;
828 break;
829 case E_DImode:
830 index = 3;
831 break;
832 case E_TImode:
833 index = 4;
834 break;
835 case E_SFmode:
836 index = 5;
837 break;
838 case E_DFmode:
839 index = 6;
840 break;
841 case E_V16QImode:
842 index = 7;
843 comp_mode = op_mode;
844 break;
845 case E_V8HImode:
846 index = 8;
847 comp_mode = op_mode;
848 break;
849 case E_V4SImode:
850 index = 9;
851 comp_mode = op_mode;
852 break;
853 case E_V4SFmode:
854 index = 10;
855 comp_mode = V4SImode;
856 break;
857 case E_V2DFmode:
858 index = 11;
859 comp_mode = V2DImode;
860 break;
861 case E_V2DImode:
862 default:
863 abort ();
864 }
865
866 if (GET_MODE (op1) == DFmode
867 && (scode != SPU_GT && scode != SPU_EQ))
868 abort ();
869
870 if (is_set == 0 && op1 == const0_rtx
871 && (GET_MODE (op0) == SImode
872 || GET_MODE (op0) == HImode
873 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
874 {
875 /* Don't need to set a register with the result when we are
876 comparing against zero and branching. */
877 reverse_test = !reverse_test;
878 compare_result = op0;
879 }
880 else
881 {
882 compare_result = gen_reg_rtx (comp_mode);
883
884 if (reverse_compare)
885 {
886 rtx t = op1;
887 op1 = op0;
888 op0 = t;
889 }
890
891 if (spu_comp_icode[index][scode] == 0)
892 abort ();
893
894 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
895 (op0, op_mode))
896 op0 = force_reg (op_mode, op0);
897 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
898 (op1, op_mode))
899 op1 = force_reg (op_mode, op1);
900 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
901 op0, op1);
902 if (comp_rtx == 0)
903 abort ();
904 emit_insn (comp_rtx);
905
906 if (eq_test)
907 {
908 eq_result = gen_reg_rtx (comp_mode);
909 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
910 op0, op1);
911 if (eq_rtx == 0)
912 abort ();
913 emit_insn (eq_rtx);
914 ior_code = optab_handler (ior_optab, comp_mode);
915 gcc_assert (ior_code != CODE_FOR_nothing);
916 emit_insn (GEN_FCN (ior_code)
917 (compare_result, compare_result, eq_result));
918 }
919 }
920
921 if (is_set == 0)
922 {
923 rtx bcomp;
924 rtx loc_ref;
925
926 /* We don't have branch on QI compare insns, so we convert the
927 QI compare result to a HI result. */
928 if (comp_mode == QImode)
929 {
930 rtx old_res = compare_result;
931 compare_result = gen_reg_rtx (HImode);
932 comp_mode = HImode;
933 emit_insn (gen_extendqihi2 (compare_result, old_res));
934 }
935
936 if (reverse_test)
937 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
938 else
939 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
940
941 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
942 emit_jump_insn (gen_rtx_SET (pc_rtx,
943 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
944 loc_ref, pc_rtx)));
945 }
946 else if (is_set == 2)
947 {
948 rtx target = operands[0];
949 int compare_size = GET_MODE_BITSIZE (comp_mode);
950 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
951 machine_mode mode = int_mode_for_size (target_size, 0).require ();
952 rtx select_mask;
953 rtx op_t = operands[2];
954 rtx op_f = operands[3];
955
956 /* The result of the comparison can be SI, HI or QI mode. Create a
957 mask based on that result. */
958 if (target_size > compare_size)
959 {
960 select_mask = gen_reg_rtx (mode);
961 emit_insn (gen_extend_compare (select_mask, compare_result));
962 }
963 else if (target_size < compare_size)
964 select_mask =
965 gen_rtx_SUBREG (mode, compare_result,
966 (compare_size - target_size) / BITS_PER_UNIT);
967 else if (comp_mode != mode)
968 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
969 else
970 select_mask = compare_result;
971
972 if (GET_MODE (target) != GET_MODE (op_t)
973 || GET_MODE (target) != GET_MODE (op_f))
974 abort ();
975
976 if (reverse_test)
977 emit_insn (gen_selb (target, op_t, op_f, select_mask));
978 else
979 emit_insn (gen_selb (target, op_f, op_t, select_mask));
980 }
981 else
982 {
983 rtx target = operands[0];
984 if (reverse_test)
985 emit_insn (gen_rtx_SET (compare_result,
986 gen_rtx_NOT (comp_mode, compare_result)));
987 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
988 emit_insn (gen_extendhisi2 (target, compare_result));
989 else if (GET_MODE (target) == SImode
990 && GET_MODE (compare_result) == QImode)
991 emit_insn (gen_extend_compare (target, compare_result));
992 else
993 emit_move_insn (target, compare_result);
994 }
995 }
996
997 HOST_WIDE_INT
998 const_double_to_hwint (rtx x)
999 {
1000 HOST_WIDE_INT val;
1001 if (GET_MODE (x) == SFmode)
1002 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
1003 else if (GET_MODE (x) == DFmode)
1004 {
1005 long l[2];
1006 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
1007 val = l[0];
1008 val = (val << 32) | (l[1] & 0xffffffff);
1009 }
1010 else
1011 abort ();
1012 return val;
1013 }
1014
1015 rtx
1016 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1017 {
1018 long tv[2];
1019 REAL_VALUE_TYPE rv;
1020 gcc_assert (mode == SFmode || mode == DFmode);
1021
1022 if (mode == SFmode)
1023 tv[0] = (v << 32) >> 32;
1024 else if (mode == DFmode)
1025 {
1026 tv[1] = (v << 32) >> 32;
1027 tv[0] = v >> 32;
1028 }
1029 real_from_target (&rv, tv, mode);
1030 return const_double_from_real_value (rv, mode);
1031 }
1032
1033 void
1034 print_operand_address (FILE * file, register rtx addr)
1035 {
1036 rtx reg;
1037 rtx offset;
1038
1039 if (GET_CODE (addr) == AND
1040 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1041 && INTVAL (XEXP (addr, 1)) == -16)
1042 addr = XEXP (addr, 0);
1043
1044 switch (GET_CODE (addr))
1045 {
1046 case REG:
1047 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1048 break;
1049
1050 case PLUS:
1051 reg = XEXP (addr, 0);
1052 offset = XEXP (addr, 1);
1053 if (GET_CODE (offset) == REG)
1054 {
1055 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1056 reg_names[REGNO (offset)]);
1057 }
1058 else if (GET_CODE (offset) == CONST_INT)
1059 {
1060 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1061 INTVAL (offset), reg_names[REGNO (reg)]);
1062 }
1063 else
1064 abort ();
1065 break;
1066
1067 case CONST:
1068 case LABEL_REF:
1069 case SYMBOL_REF:
1070 case CONST_INT:
1071 output_addr_const (file, addr);
1072 break;
1073
1074 default:
1075 debug_rtx (addr);
1076 abort ();
1077 }
1078 }
1079
1080 void
1081 print_operand (FILE * file, rtx x, int code)
1082 {
1083 machine_mode mode = GET_MODE (x);
1084 HOST_WIDE_INT val;
1085 unsigned char arr[16];
1086 int xcode = GET_CODE (x);
1087 int i, info;
1088 if (GET_MODE (x) == VOIDmode)
1089 switch (code)
1090 {
1091 case 'L': /* 128 bits, signed */
1092 case 'm': /* 128 bits, signed */
1093 case 'T': /* 128 bits, signed */
1094 case 't': /* 128 bits, signed */
1095 mode = TImode;
1096 break;
1097 case 'K': /* 64 bits, signed */
1098 case 'k': /* 64 bits, signed */
1099 case 'D': /* 64 bits, signed */
1100 case 'd': /* 64 bits, signed */
1101 mode = DImode;
1102 break;
1103 case 'J': /* 32 bits, signed */
1104 case 'j': /* 32 bits, signed */
1105 case 's': /* 32 bits, signed */
1106 case 'S': /* 32 bits, signed */
1107 mode = SImode;
1108 break;
1109 }
1110 switch (code)
1111 {
1112
1113 case 'j': /* 32 bits, signed */
1114 case 'k': /* 64 bits, signed */
1115 case 'm': /* 128 bits, signed */
1116 if (xcode == CONST_INT
1117 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1118 {
1119 gcc_assert (logical_immediate_p (x, mode));
1120 constant_to_array (mode, x, arr);
1121 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1122 val = trunc_int_for_mode (val, SImode);
1123 switch (which_logical_immediate (val))
1124 {
1125 case SPU_ORI:
1126 break;
1127 case SPU_ORHI:
1128 fprintf (file, "h");
1129 break;
1130 case SPU_ORBI:
1131 fprintf (file, "b");
1132 break;
1133 default:
1134 gcc_unreachable();
1135 }
1136 }
1137 else
1138 gcc_unreachable();
1139 return;
1140
1141 case 'J': /* 32 bits, signed */
1142 case 'K': /* 64 bits, signed */
1143 case 'L': /* 128 bits, signed */
1144 if (xcode == CONST_INT
1145 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1146 {
1147 gcc_assert (logical_immediate_p (x, mode)
1148 || iohl_immediate_p (x, mode));
1149 constant_to_array (mode, x, arr);
1150 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1151 val = trunc_int_for_mode (val, SImode);
1152 switch (which_logical_immediate (val))
1153 {
1154 case SPU_ORI:
1155 case SPU_IOHL:
1156 break;
1157 case SPU_ORHI:
1158 val = trunc_int_for_mode (val, HImode);
1159 break;
1160 case SPU_ORBI:
1161 val = trunc_int_for_mode (val, QImode);
1162 break;
1163 default:
1164 gcc_unreachable();
1165 }
1166 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1167 }
1168 else
1169 gcc_unreachable();
1170 return;
1171
1172 case 't': /* 128 bits, signed */
1173 case 'd': /* 64 bits, signed */
1174 case 's': /* 32 bits, signed */
1175 if (CONSTANT_P (x))
1176 {
1177 enum immediate_class c = classify_immediate (x, mode);
1178 switch (c)
1179 {
1180 case IC_IL1:
1181 constant_to_array (mode, x, arr);
1182 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1183 val = trunc_int_for_mode (val, SImode);
1184 switch (which_immediate_load (val))
1185 {
1186 case SPU_IL:
1187 break;
1188 case SPU_ILA:
1189 fprintf (file, "a");
1190 break;
1191 case SPU_ILH:
1192 fprintf (file, "h");
1193 break;
1194 case SPU_ILHU:
1195 fprintf (file, "hu");
1196 break;
1197 default:
1198 gcc_unreachable ();
1199 }
1200 break;
1201 case IC_CPAT:
1202 constant_to_array (mode, x, arr);
1203 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1204 if (info == 1)
1205 fprintf (file, "b");
1206 else if (info == 2)
1207 fprintf (file, "h");
1208 else if (info == 4)
1209 fprintf (file, "w");
1210 else if (info == 8)
1211 fprintf (file, "d");
1212 break;
1213 case IC_IL1s:
1214 if (xcode == CONST_VECTOR)
1215 {
1216 x = CONST_VECTOR_ELT (x, 0);
1217 xcode = GET_CODE (x);
1218 }
1219 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1220 fprintf (file, "a");
1221 else if (xcode == HIGH)
1222 fprintf (file, "hu");
1223 break;
1224 case IC_FSMBI:
1225 case IC_FSMBI2:
1226 case IC_IL2:
1227 case IC_IL2s:
1228 case IC_POOL:
1229 abort ();
1230 }
1231 }
1232 else
1233 gcc_unreachable ();
1234 return;
1235
1236 case 'T': /* 128 bits, signed */
1237 case 'D': /* 64 bits, signed */
1238 case 'S': /* 32 bits, signed */
1239 if (CONSTANT_P (x))
1240 {
1241 enum immediate_class c = classify_immediate (x, mode);
1242 switch (c)
1243 {
1244 case IC_IL1:
1245 constant_to_array (mode, x, arr);
1246 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1247 val = trunc_int_for_mode (val, SImode);
1248 switch (which_immediate_load (val))
1249 {
1250 case SPU_IL:
1251 case SPU_ILA:
1252 break;
1253 case SPU_ILH:
1254 case SPU_ILHU:
1255 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1256 break;
1257 default:
1258 gcc_unreachable ();
1259 }
1260 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1261 break;
1262 case IC_FSMBI:
1263 constant_to_array (mode, x, arr);
1264 val = 0;
1265 for (i = 0; i < 16; i++)
1266 {
1267 val <<= 1;
1268 val |= arr[i] & 1;
1269 }
1270 print_operand (file, GEN_INT (val), 0);
1271 break;
1272 case IC_CPAT:
1273 constant_to_array (mode, x, arr);
1274 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1276 break;
1277 case IC_IL1s:
1278 if (xcode == HIGH)
1279 x = XEXP (x, 0);
1280 if (GET_CODE (x) == CONST_VECTOR)
1281 x = CONST_VECTOR_ELT (x, 0);
1282 output_addr_const (file, x);
1283 if (xcode == HIGH)
1284 fprintf (file, "@h");
1285 break;
1286 case IC_IL2:
1287 case IC_IL2s:
1288 case IC_FSMBI2:
1289 case IC_POOL:
1290 abort ();
1291 }
1292 }
1293 else
1294 gcc_unreachable ();
1295 return;
1296
1297 case 'C':
1298 if (xcode == CONST_INT)
1299 {
1300 /* Only 4 least significant bits are relevant for generate
1301 control word instructions. */
1302 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1303 return;
1304 }
1305 break;
1306
1307 case 'M': /* print code for c*d */
1308 if (GET_CODE (x) == CONST_INT)
1309 switch (INTVAL (x))
1310 {
1311 case 1:
1312 fprintf (file, "b");
1313 break;
1314 case 2:
1315 fprintf (file, "h");
1316 break;
1317 case 4:
1318 fprintf (file, "w");
1319 break;
1320 case 8:
1321 fprintf (file, "d");
1322 break;
1323 default:
1324 gcc_unreachable();
1325 }
1326 else
1327 gcc_unreachable();
1328 return;
1329
1330 case 'N': /* Negate the operand */
1331 if (xcode == CONST_INT)
1332 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1333 else if (xcode == CONST_VECTOR)
1334 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1335 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1336 return;
1337
1338 case 'I': /* enable/disable interrupts */
1339 if (xcode == CONST_INT)
1340 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1341 return;
1342
1343 case 'b': /* branch modifiers */
1344 if (xcode == REG)
1345 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1346 else if (COMPARISON_P (x))
1347 fprintf (file, "%s", xcode == NE ? "n" : "");
1348 return;
1349
1350 case 'i': /* indirect call */
1351 if (xcode == MEM)
1352 {
1353 if (GET_CODE (XEXP (x, 0)) == REG)
1354 /* Used in indirect function calls. */
1355 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1356 else
1357 output_address (GET_MODE (x), XEXP (x, 0));
1358 }
1359 return;
1360
1361 case 'p': /* load/store */
1362 if (xcode == MEM)
1363 {
1364 x = XEXP (x, 0);
1365 xcode = GET_CODE (x);
1366 }
1367 if (xcode == AND)
1368 {
1369 x = XEXP (x, 0);
1370 xcode = GET_CODE (x);
1371 }
1372 if (xcode == REG)
1373 fprintf (file, "d");
1374 else if (xcode == CONST_INT)
1375 fprintf (file, "a");
1376 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1377 fprintf (file, "r");
1378 else if (xcode == PLUS || xcode == LO_SUM)
1379 {
1380 if (GET_CODE (XEXP (x, 1)) == REG)
1381 fprintf (file, "x");
1382 else
1383 fprintf (file, "d");
1384 }
1385 return;
1386
1387 case 'e':
1388 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1389 val &= 0x7;
1390 output_addr_const (file, GEN_INT (val));
1391 return;
1392
1393 case 'f':
1394 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1395 val &= 0x1f;
1396 output_addr_const (file, GEN_INT (val));
1397 return;
1398
1399 case 'g':
1400 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1401 val &= 0x3f;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1404
1405 case 'h':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val = (val >> 3) & 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1410
1411 case 'E':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val = -val;
1414 val &= 0x7;
1415 output_addr_const (file, GEN_INT (val));
1416 return;
1417
1418 case 'F':
1419 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420 val = -val;
1421 val &= 0x1f;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1424
1425 case 'G':
1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427 val = -val;
1428 val &= 0x3f;
1429 output_addr_const (file, GEN_INT (val));
1430 return;
1431
1432 case 'H':
1433 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1434 val = -(val & -8ll);
1435 val = (val >> 3) & 0x1f;
1436 output_addr_const (file, GEN_INT (val));
1437 return;
1438
1439 case 'v':
1440 case 'w':
1441 constant_to_array (mode, x, arr);
1442 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1443 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1444 return;
1445
1446 case 0:
1447 if (xcode == REG)
1448 fprintf (file, "%s", reg_names[REGNO (x)]);
1449 else if (xcode == MEM)
1450 output_address (GET_MODE (x), XEXP (x, 0));
1451 else if (xcode == CONST_VECTOR)
1452 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1453 else
1454 output_addr_const (file, x);
1455 return;
1456
1457 /* unused letters
1458 o qr u yz
1459 AB OPQR UVWXYZ */
1460 default:
1461 output_operand_lossage ("invalid %%xn code");
1462 }
1463 gcc_unreachable ();
1464 }
1465
1466 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1467 caller saved register. For leaf functions it is more efficient to
1468 use a volatile register because we won't need to save and restore the
1469 pic register. This routine is only valid after register allocation
1470 is completed, so we can pick an unused register. */
1471 static rtx
1472 get_pic_reg (void)
1473 {
1474 if (!reload_completed && !reload_in_progress)
1475 abort ();
1476
1477 /* If we've already made the decision, we need to keep with it. Once we've
1478 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1479 return true since the register is now live; this should not cause us to
1480 "switch back" to using pic_offset_table_rtx. */
1481 if (!cfun->machine->pic_reg)
1482 {
1483 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1484 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1485 else
1486 cfun->machine->pic_reg = pic_offset_table_rtx;
1487 }
1488
1489 return cfun->machine->pic_reg;
1490 }
1491
1492 /* Split constant addresses to handle cases that are too large.
1493 Add in the pic register when in PIC mode.
1494 Split immediates that require more than 1 instruction. */
1495 int
1496 spu_split_immediate (rtx * ops)
1497 {
1498 machine_mode mode = GET_MODE (ops[0]);
1499 enum immediate_class c = classify_immediate (ops[1], mode);
1500
1501 switch (c)
1502 {
1503 case IC_IL2:
1504 {
1505 unsigned char arrhi[16];
1506 unsigned char arrlo[16];
1507 rtx to, temp, hi, lo;
1508 int i;
1509 /* We need to do reals as ints because the constant used in the
1510 IOR might not be a legitimate real constant. */
1511 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1512 constant_to_array (mode, ops[1], arrhi);
1513 if (imode != mode)
1514 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1515 else
1516 to = ops[0];
1517 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1518 for (i = 0; i < 16; i += 4)
1519 {
1520 arrlo[i + 2] = arrhi[i + 2];
1521 arrlo[i + 3] = arrhi[i + 3];
1522 arrlo[i + 0] = arrlo[i + 1] = 0;
1523 arrhi[i + 2] = arrhi[i + 3] = 0;
1524 }
1525 hi = array_to_constant (imode, arrhi);
1526 lo = array_to_constant (imode, arrlo);
1527 emit_move_insn (temp, hi);
1528 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1529 return 1;
1530 }
1531 case IC_FSMBI2:
1532 {
1533 unsigned char arr_fsmbi[16];
1534 unsigned char arr_andbi[16];
1535 rtx to, reg_fsmbi, reg_and;
1536 int i;
1537 /* We need to do reals as ints because the constant used in the
1538 * AND might not be a legitimate real constant. */
1539 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1540 constant_to_array (mode, ops[1], arr_fsmbi);
1541 if (imode != mode)
1542 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1543 else
1544 to = ops[0];
1545 for (i = 0; i < 16; i++)
1546 if (arr_fsmbi[i] != 0)
1547 {
1548 arr_andbi[0] = arr_fsmbi[i];
1549 arr_fsmbi[i] = 0xff;
1550 }
1551 for (i = 1; i < 16; i++)
1552 arr_andbi[i] = arr_andbi[0];
1553 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1554 reg_and = array_to_constant (imode, arr_andbi);
1555 emit_move_insn (to, reg_fsmbi);
1556 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1557 return 1;
1558 }
1559 case IC_POOL:
1560 if (reload_in_progress || reload_completed)
1561 {
1562 rtx mem = force_const_mem (mode, ops[1]);
1563 if (TARGET_LARGE_MEM)
1564 {
1565 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1566 emit_move_insn (addr, XEXP (mem, 0));
1567 mem = replace_equiv_address (mem, addr);
1568 }
1569 emit_move_insn (ops[0], mem);
1570 return 1;
1571 }
1572 break;
1573 case IC_IL1s:
1574 case IC_IL2s:
1575 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1576 {
1577 if (c == IC_IL2s)
1578 {
1579 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1580 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1581 }
1582 else if (flag_pic)
1583 emit_insn (gen_pic (ops[0], ops[1]));
1584 if (flag_pic)
1585 {
1586 rtx pic_reg = get_pic_reg ();
1587 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1588 }
1589 return flag_pic || c == IC_IL2s;
1590 }
1591 break;
1592 case IC_IL1:
1593 case IC_FSMBI:
1594 case IC_CPAT:
1595 break;
1596 }
1597 return 0;
1598 }
1599
1600 /* SAVING is TRUE when we are generating the actual load and store
1601 instructions for REGNO. When determining the size of the stack
1602 needed for saving register we must allocate enough space for the
1603 worst case, because we don't always have the information early enough
1604 to not allocate it. But we can at least eliminate the actual loads
1605 and stores during the prologue/epilogue. */
1606 static int
1607 need_to_save_reg (int regno, int saving)
1608 {
1609 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1610 return 1;
1611 if (flag_pic
1612 && regno == PIC_OFFSET_TABLE_REGNUM
1613 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1614 return 1;
1615 return 0;
1616 }
1617
1618 /* This function is only correct starting with local register
1619 allocation */
1620 int
1621 spu_saved_regs_size (void)
1622 {
1623 int reg_save_size = 0;
1624 int regno;
1625
1626 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1627 if (need_to_save_reg (regno, 0))
1628 reg_save_size += 0x10;
1629 return reg_save_size;
1630 }
1631
1632 static rtx_insn *
1633 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1634 {
1635 rtx reg = gen_rtx_REG (V4SImode, regno);
1636 rtx mem =
1637 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1638 return emit_insn (gen_movv4si (mem, reg));
1639 }
1640
1641 static rtx_insn *
1642 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1643 {
1644 rtx reg = gen_rtx_REG (V4SImode, regno);
1645 rtx mem =
1646 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1647 return emit_insn (gen_movv4si (reg, mem));
1648 }
1649
1650 /* This happens after reload, so we need to expand it. */
1651 static rtx_insn *
1652 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1653 {
1654 rtx_insn *insn;
1655 if (satisfies_constraint_K (GEN_INT (imm)))
1656 {
1657 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1658 }
1659 else
1660 {
1661 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1662 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1663 if (REGNO (src) == REGNO (scratch))
1664 abort ();
1665 }
1666 return insn;
1667 }
1668
1669 /* Return nonzero if this function is known to have a null epilogue. */
1670
1671 int
1672 direct_return (void)
1673 {
1674 if (reload_completed)
1675 {
1676 if (cfun->static_chain_decl == 0
1677 && (spu_saved_regs_size ()
1678 + get_frame_size ()
1679 + crtl->outgoing_args_size
1680 + crtl->args.pretend_args_size == 0)
1681 && crtl->is_leaf)
1682 return 1;
1683 }
1684 return 0;
1685 }
1686
1687 /*
1688 The stack frame looks like this:
1689 +-------------+
1690 | incoming |
1691 | args |
1692 AP -> +-------------+
1693 | $lr save |
1694 +-------------+
1695 prev SP | back chain |
1696 +-------------+
1697 | var args |
1698 | reg save | crtl->args.pretend_args_size bytes
1699 +-------------+
1700 | ... |
1701 | saved regs | spu_saved_regs_size() bytes
1702 FP -> +-------------+
1703 | ... |
1704 | vars | get_frame_size() bytes
1705 HFP -> +-------------+
1706 | ... |
1707 | outgoing |
1708 | args | crtl->outgoing_args_size bytes
1709 +-------------+
1710 | $lr of next |
1711 | frame |
1712 +-------------+
1713 | back chain |
1714 SP -> +-------------+
1715
1716 */
1717 void
1718 spu_expand_prologue (void)
1719 {
1720 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1721 HOST_WIDE_INT total_size;
1722 HOST_WIDE_INT saved_regs_size;
1723 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1724 rtx scratch_reg_0, scratch_reg_1;
1725 rtx_insn *insn;
1726 rtx real;
1727
1728 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1729 cfun->machine->pic_reg = pic_offset_table_rtx;
1730
1731 if (spu_naked_function_p (current_function_decl))
1732 return;
1733
1734 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1735 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1736
1737 saved_regs_size = spu_saved_regs_size ();
1738 total_size = size + saved_regs_size
1739 + crtl->outgoing_args_size
1740 + crtl->args.pretend_args_size;
1741
1742 if (!crtl->is_leaf
1743 || cfun->calls_alloca || total_size > 0)
1744 total_size += STACK_POINTER_OFFSET;
1745
1746 /* Save this first because code after this might use the link
1747 register as a scratch register. */
1748 if (!crtl->is_leaf)
1749 {
1750 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1751 RTX_FRAME_RELATED_P (insn) = 1;
1752 }
1753
1754 if (total_size > 0)
1755 {
1756 offset = -crtl->args.pretend_args_size;
1757 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1758 if (need_to_save_reg (regno, 1))
1759 {
1760 offset -= 16;
1761 insn = frame_emit_store (regno, sp_reg, offset);
1762 RTX_FRAME_RELATED_P (insn) = 1;
1763 }
1764 }
1765
1766 if (flag_pic && cfun->machine->pic_reg)
1767 {
1768 rtx pic_reg = cfun->machine->pic_reg;
1769 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1770 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1771 }
1772
1773 if (total_size > 0)
1774 {
1775 if (flag_stack_check || flag_stack_clash_protection)
1776 {
1777 /* We compare against total_size-1 because
1778 ($sp >= total_size) <=> ($sp > total_size-1) */
1779 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1780 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1781 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1782 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1783 {
1784 emit_move_insn (scratch_v4si, size_v4si);
1785 size_v4si = scratch_v4si;
1786 }
1787 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1788 emit_insn (gen_vec_extractv4sisi
1789 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1790 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1791 }
1792
1793 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1794 the value of the previous $sp because we save it as the back
1795 chain. */
1796 if (total_size <= 2000)
1797 {
1798 /* In this case we save the back chain first. */
1799 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1800 insn =
1801 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1802 }
1803 else
1804 {
1805 insn = emit_move_insn (scratch_reg_0, sp_reg);
1806 insn =
1807 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1808 }
1809 RTX_FRAME_RELATED_P (insn) = 1;
1810 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1811 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1812
1813 if (total_size > 2000)
1814 {
1815 /* Save the back chain ptr */
1816 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1817 }
1818
1819 if (frame_pointer_needed)
1820 {
1821 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1822 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1823 + crtl->outgoing_args_size;
1824 /* Set the new frame_pointer */
1825 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1826 RTX_FRAME_RELATED_P (insn) = 1;
1827 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1828 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1829 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1830 }
1831 }
1832
1833 if (flag_stack_usage_info)
1834 current_function_static_stack_size = total_size;
1835 }
1836
1837 void
1838 spu_expand_epilogue (bool sibcall_p)
1839 {
1840 int size = get_frame_size (), offset, regno;
1841 HOST_WIDE_INT saved_regs_size, total_size;
1842 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1843 rtx scratch_reg_0;
1844
1845 if (spu_naked_function_p (current_function_decl))
1846 return;
1847
1848 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1849
1850 saved_regs_size = spu_saved_regs_size ();
1851 total_size = size + saved_regs_size
1852 + crtl->outgoing_args_size
1853 + crtl->args.pretend_args_size;
1854
1855 if (!crtl->is_leaf
1856 || cfun->calls_alloca || total_size > 0)
1857 total_size += STACK_POINTER_OFFSET;
1858
1859 if (total_size > 0)
1860 {
1861 if (cfun->calls_alloca)
1862 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1863 else
1864 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1865
1866
1867 if (saved_regs_size > 0)
1868 {
1869 offset = -crtl->args.pretend_args_size;
1870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1871 if (need_to_save_reg (regno, 1))
1872 {
1873 offset -= 0x10;
1874 frame_emit_load (regno, sp_reg, offset);
1875 }
1876 }
1877 }
1878
1879 if (!crtl->is_leaf)
1880 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1881
1882 if (!sibcall_p)
1883 {
1884 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1885 emit_jump_insn (gen__return ());
1886 }
1887 }
1888
1889 rtx
1890 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1891 {
1892 if (count != 0)
1893 return 0;
1894 /* This is inefficient because it ends up copying to a save-register
1895 which then gets saved even though $lr has already been saved. But
1896 it does generate better code for leaf functions and we don't need
1897 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1898 used for __builtin_return_address anyway, so maybe we don't care if
1899 it's inefficient. */
1900 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1901 }
1902 \f
1903
1904 /* Given VAL, generate a constant appropriate for MODE.
1905 If MODE is a vector mode, every element will be VAL.
1906 For TImode, VAL will be zero extended to 128 bits. */
1907 rtx
1908 spu_const (machine_mode mode, HOST_WIDE_INT val)
1909 {
1910 rtx inner;
1911
1912 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1913 || GET_MODE_CLASS (mode) == MODE_FLOAT
1914 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1915 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1916
1917 if (GET_MODE_CLASS (mode) == MODE_INT)
1918 return immed_double_const (val, 0, mode);
1919
1920 /* val is the bit representation of the float */
1921 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1922 return hwint_to_const_double (mode, val);
1923
1924 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1925 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1926 else
1927 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1928
1929 return gen_const_vec_duplicate (mode, inner);
1930 }
1931
1932 /* Create a MODE vector constant from 4 ints. */
1933 rtx
1934 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1935 {
1936 unsigned char arr[16];
1937 arr[0] = (a >> 24) & 0xff;
1938 arr[1] = (a >> 16) & 0xff;
1939 arr[2] = (a >> 8) & 0xff;
1940 arr[3] = (a >> 0) & 0xff;
1941 arr[4] = (b >> 24) & 0xff;
1942 arr[5] = (b >> 16) & 0xff;
1943 arr[6] = (b >> 8) & 0xff;
1944 arr[7] = (b >> 0) & 0xff;
1945 arr[8] = (c >> 24) & 0xff;
1946 arr[9] = (c >> 16) & 0xff;
1947 arr[10] = (c >> 8) & 0xff;
1948 arr[11] = (c >> 0) & 0xff;
1949 arr[12] = (d >> 24) & 0xff;
1950 arr[13] = (d >> 16) & 0xff;
1951 arr[14] = (d >> 8) & 0xff;
1952 arr[15] = (d >> 0) & 0xff;
1953 return array_to_constant(mode, arr);
1954 }
1955 \f
1956 /* branch hint stuff */
1957
1958 /* An array of these is used to propagate hints to predecessor blocks. */
1959 struct spu_bb_info
1960 {
1961 rtx_insn *prop_jump; /* propagated from another block */
1962 int bb_index; /* the original block. */
1963 };
1964 static struct spu_bb_info *spu_bb_info;
1965
1966 #define STOP_HINT_P(INSN) \
1967 (CALL_P(INSN) \
1968 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1969 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1970
1971 /* 1 when RTX is a hinted branch or its target. We keep track of
1972 what has been hinted so the safe-hint code can test it easily. */
1973 #define HINTED_P(RTX) \
1974 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1975
1976 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1977 #define SCHED_ON_EVEN_P(RTX) \
1978 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1979
1980 /* Emit a nop for INSN such that the two will dual issue. This assumes
1981 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1982 We check for TImode to handle a MULTI1 insn which has dual issued its
1983 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1984 static void
1985 emit_nop_for_insn (rtx_insn *insn)
1986 {
1987 int p;
1988 rtx_insn *new_insn;
1989
1990 /* We need to handle JUMP_TABLE_DATA separately. */
1991 if (JUMP_TABLE_DATA_P (insn))
1992 {
1993 new_insn = emit_insn_after (gen_lnop(), insn);
1994 recog_memoized (new_insn);
1995 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1996 return;
1997 }
1998
1999 p = get_pipe (insn);
2000 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2001 new_insn = emit_insn_after (gen_lnop (), insn);
2002 else if (p == 1 && GET_MODE (insn) == TImode)
2003 {
2004 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2005 PUT_MODE (new_insn, TImode);
2006 PUT_MODE (insn, VOIDmode);
2007 }
2008 else
2009 new_insn = emit_insn_after (gen_lnop (), insn);
2010 recog_memoized (new_insn);
2011 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2012 }
2013
2014 /* Insert nops in basic blocks to meet dual issue alignment
2015 requirements. Also make sure hbrp and hint instructions are at least
2016 one cycle apart, possibly inserting a nop. */
2017 static void
2018 pad_bb(void)
2019 {
2020 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2021 int length;
2022 int addr;
2023
2024 /* This sets up INSN_ADDRESSES. */
2025 shorten_branches (get_insns ());
2026
2027 /* Keep track of length added by nops. */
2028 length = 0;
2029
2030 prev_insn = 0;
2031 insn = get_insns ();
2032 if (!active_insn_p (insn))
2033 insn = next_active_insn (insn);
2034 for (; insn; insn = next_insn)
2035 {
2036 next_insn = next_active_insn (insn);
2037 if (INSN_P (insn)
2038 && (INSN_CODE (insn) == CODE_FOR_iprefetch
2039 || INSN_CODE (insn) == CODE_FOR_hbr))
2040 {
2041 if (hbr_insn)
2042 {
2043 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2044 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2045 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2046 || (a1 - a0 == 4))
2047 {
2048 prev_insn = emit_insn_before (gen_lnop (), insn);
2049 PUT_MODE (prev_insn, GET_MODE (insn));
2050 PUT_MODE (insn, TImode);
2051 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2052 length += 4;
2053 }
2054 }
2055 hbr_insn = insn;
2056 }
2057 if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2058 {
2059 if (GET_MODE (insn) == TImode)
2060 PUT_MODE (next_insn, TImode);
2061 insn = next_insn;
2062 next_insn = next_active_insn (insn);
2063 }
2064 addr = INSN_ADDRESSES (INSN_UID (insn));
2065 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2066 {
2067 if (((addr + length) & 7) != 0)
2068 {
2069 emit_nop_for_insn (prev_insn);
2070 length += 4;
2071 }
2072 }
2073 else if (GET_MODE (insn) == TImode
2074 && ((next_insn && GET_MODE (next_insn) != TImode)
2075 || get_attr_type (insn) == TYPE_MULTI0)
2076 && ((addr + length) & 7) != 0)
2077 {
2078 /* prev_insn will always be set because the first insn is
2079 always 8-byte aligned. */
2080 emit_nop_for_insn (prev_insn);
2081 length += 4;
2082 }
2083 prev_insn = insn;
2084 }
2085 }
2086
2087 \f
2088 /* Routines for branch hints. */
2089
2090 static void
2091 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2092 int distance, sbitmap blocks)
2093 {
2094 rtx_insn *hint;
2095 rtx_insn *insn;
2096 rtx_jump_table_data *table;
2097
2098 if (before == 0 || branch == 0 || target == 0)
2099 return;
2100
2101 /* While scheduling we require hints to be no further than 600, so
2102 we need to enforce that here too */
2103 if (distance > 600)
2104 return;
2105
2106 /* If we have a Basic block note, emit it after the basic block note. */
2107 if (NOTE_INSN_BASIC_BLOCK_P (before))
2108 before = NEXT_INSN (before);
2109
2110 rtx_code_label *branch_label = gen_label_rtx ();
2111 LABEL_NUSES (branch_label)++;
2112 LABEL_PRESERVE_P (branch_label) = 1;
2113 insn = emit_label_before (branch_label, branch);
2114 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2115 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2116
2117 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2118 recog_memoized (hint);
2119 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2120 HINTED_P (branch) = 1;
2121
2122 if (GET_CODE (target) == LABEL_REF)
2123 HINTED_P (XEXP (target, 0)) = 1;
2124 else if (tablejump_p (branch, 0, &table))
2125 {
2126 rtvec vec;
2127 int j;
2128 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2129 vec = XVEC (PATTERN (table), 0);
2130 else
2131 vec = XVEC (PATTERN (table), 1);
2132 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2133 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2134 }
2135
2136 if (distance >= 588)
2137 {
2138 /* Make sure the hint isn't scheduled any earlier than this point,
2139 which could make it too far for the branch offest to fit */
2140 insn = emit_insn_before (gen_blockage (), hint);
2141 recog_memoized (insn);
2142 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2143 }
2144 else if (distance <= 8 * 4)
2145 {
2146 /* To guarantee at least 8 insns between the hint and branch we
2147 insert nops. */
2148 int d;
2149 for (d = distance; d < 8 * 4; d += 4)
2150 {
2151 insn =
2152 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2153 recog_memoized (insn);
2154 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2155 }
2156
2157 /* Make sure any nops inserted aren't scheduled before the hint. */
2158 insn = emit_insn_after (gen_blockage (), hint);
2159 recog_memoized (insn);
2160 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2161
2162 /* Make sure any nops inserted aren't scheduled after the call. */
2163 if (CALL_P (branch) && distance < 8 * 4)
2164 {
2165 insn = emit_insn_before (gen_blockage (), branch);
2166 recog_memoized (insn);
2167 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2168 }
2169 }
2170 }
2171
2172 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2173 the rtx for the branch target. */
2174 static rtx
2175 get_branch_target (rtx_insn *branch)
2176 {
2177 if (JUMP_P (branch))
2178 {
2179 rtx set, src;
2180
2181 /* Return statements */
2182 if (GET_CODE (PATTERN (branch)) == RETURN)
2183 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2184
2185 /* ASM GOTOs. */
2186 if (extract_asm_operands (PATTERN (branch)) != NULL)
2187 return NULL;
2188
2189 set = single_set (branch);
2190 src = SET_SRC (set);
2191 if (GET_CODE (SET_DEST (set)) != PC)
2192 abort ();
2193
2194 if (GET_CODE (src) == IF_THEN_ELSE)
2195 {
2196 rtx lab = 0;
2197 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2198 if (note)
2199 {
2200 /* If the more probable case is not a fall through, then
2201 try a branch hint. */
2202 int prob = profile_probability::from_reg_br_prob_note
2203 (XINT (note, 0)).to_reg_br_prob_base ();
2204 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2205 && GET_CODE (XEXP (src, 1)) != PC)
2206 lab = XEXP (src, 1);
2207 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2208 && GET_CODE (XEXP (src, 2)) != PC)
2209 lab = XEXP (src, 2);
2210 }
2211 if (lab)
2212 {
2213 if (GET_CODE (lab) == RETURN)
2214 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2215 return lab;
2216 }
2217 return 0;
2218 }
2219
2220 return src;
2221 }
2222 else if (CALL_P (branch))
2223 {
2224 rtx call;
2225 /* All of our call patterns are in a PARALLEL and the CALL is
2226 the first pattern in the PARALLEL. */
2227 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2228 abort ();
2229 call = XVECEXP (PATTERN (branch), 0, 0);
2230 if (GET_CODE (call) == SET)
2231 call = SET_SRC (call);
2232 if (GET_CODE (call) != CALL)
2233 abort ();
2234 return XEXP (XEXP (call, 0), 0);
2235 }
2236 return 0;
2237 }
2238
2239 /* The special $hbr register is used to prevent the insn scheduler from
2240 moving hbr insns across instructions which invalidate them. It
2241 should only be used in a clobber, and this function searches for
2242 insns which clobber it. */
2243 static bool
2244 insn_clobbers_hbr (rtx_insn *insn)
2245 {
2246 if (INSN_P (insn)
2247 && GET_CODE (PATTERN (insn)) == PARALLEL)
2248 {
2249 rtx parallel = PATTERN (insn);
2250 rtx clobber;
2251 int j;
2252 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2253 {
2254 clobber = XVECEXP (parallel, 0, j);
2255 if (GET_CODE (clobber) == CLOBBER
2256 && GET_CODE (XEXP (clobber, 0)) == REG
2257 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2258 return 1;
2259 }
2260 }
2261 return 0;
2262 }
2263
2264 /* Search up to 32 insns starting at FIRST:
2265 - at any kind of hinted branch, just return
2266 - at any unconditional branch in the first 15 insns, just return
2267 - at a call or indirect branch, after the first 15 insns, force it to
2268 an even address and return
2269 - at any unconditional branch, after the first 15 insns, force it to
2270 an even address.
2271 At then end of the search, insert an hbrp within 4 insns of FIRST,
2272 and an hbrp within 16 instructions of FIRST.
2273 */
2274 static void
2275 insert_hbrp_for_ilb_runout (rtx_insn *first)
2276 {
2277 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2278 int addr = 0, length, first_addr = -1;
2279 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2280 int insert_lnop_after = 0;
2281 for (insn = first; insn; insn = NEXT_INSN (insn))
2282 if (INSN_P (insn))
2283 {
2284 if (first_addr == -1)
2285 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2286 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2287 length = get_attr_length (insn);
2288
2289 if (before_4 == 0 && addr + length >= 4 * 4)
2290 before_4 = insn;
2291 /* We test for 14 instructions because the first hbrp will add
2292 up to 2 instructions. */
2293 if (before_16 == 0 && addr + length >= 14 * 4)
2294 before_16 = insn;
2295
2296 if (INSN_CODE (insn) == CODE_FOR_hbr)
2297 {
2298 /* Make sure an hbrp is at least 2 cycles away from a hint.
2299 Insert an lnop after the hbrp when necessary. */
2300 if (before_4 == 0 && addr > 0)
2301 {
2302 before_4 = insn;
2303 insert_lnop_after |= 1;
2304 }
2305 else if (before_4 && addr <= 4 * 4)
2306 insert_lnop_after |= 1;
2307 if (before_16 == 0 && addr > 10 * 4)
2308 {
2309 before_16 = insn;
2310 insert_lnop_after |= 2;
2311 }
2312 else if (before_16 && addr <= 14 * 4)
2313 insert_lnop_after |= 2;
2314 }
2315
2316 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2317 {
2318 if (addr < hbrp_addr0)
2319 hbrp_addr0 = addr;
2320 else if (addr < hbrp_addr1)
2321 hbrp_addr1 = addr;
2322 }
2323
2324 if (CALL_P (insn) || JUMP_P (insn))
2325 {
2326 if (HINTED_P (insn))
2327 return;
2328
2329 /* Any branch after the first 15 insns should be on an even
2330 address to avoid a special case branch. There might be
2331 some nops and/or hbrps inserted, so we test after 10
2332 insns. */
2333 if (addr > 10 * 4)
2334 SCHED_ON_EVEN_P (insn) = 1;
2335 }
2336
2337 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2338 return;
2339
2340
2341 if (addr + length >= 32 * 4)
2342 {
2343 gcc_assert (before_4 && before_16);
2344 if (hbrp_addr0 > 4 * 4)
2345 {
2346 insn =
2347 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2348 recog_memoized (insn);
2349 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2350 INSN_ADDRESSES_NEW (insn,
2351 INSN_ADDRESSES (INSN_UID (before_4)));
2352 PUT_MODE (insn, GET_MODE (before_4));
2353 PUT_MODE (before_4, TImode);
2354 if (insert_lnop_after & 1)
2355 {
2356 insn = emit_insn_before (gen_lnop (), before_4);
2357 recog_memoized (insn);
2358 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2359 INSN_ADDRESSES_NEW (insn,
2360 INSN_ADDRESSES (INSN_UID (before_4)));
2361 PUT_MODE (insn, TImode);
2362 }
2363 }
2364 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2365 && hbrp_addr1 > 16 * 4)
2366 {
2367 insn =
2368 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2369 recog_memoized (insn);
2370 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2371 INSN_ADDRESSES_NEW (insn,
2372 INSN_ADDRESSES (INSN_UID (before_16)));
2373 PUT_MODE (insn, GET_MODE (before_16));
2374 PUT_MODE (before_16, TImode);
2375 if (insert_lnop_after & 2)
2376 {
2377 insn = emit_insn_before (gen_lnop (), before_16);
2378 recog_memoized (insn);
2379 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2380 INSN_ADDRESSES_NEW (insn,
2381 INSN_ADDRESSES (INSN_UID
2382 (before_16)));
2383 PUT_MODE (insn, TImode);
2384 }
2385 }
2386 return;
2387 }
2388 }
2389 else if (BARRIER_P (insn))
2390 return;
2391
2392 }
2393
2394 /* The SPU might hang when it executes 48 inline instructions after a
2395 hinted branch jumps to its hinted target. The beginning of a
2396 function and the return from a call might have been hinted, and
2397 must be handled as well. To prevent a hang we insert 2 hbrps. The
2398 first should be within 6 insns of the branch target. The second
2399 should be within 22 insns of the branch target. When determining
2400 if hbrps are necessary, we look for only 32 inline instructions,
2401 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2402 when inserting new hbrps, we insert them within 4 and 16 insns of
2403 the target. */
2404 static void
2405 insert_hbrp (void)
2406 {
2407 rtx_insn *insn;
2408 if (TARGET_SAFE_HINTS)
2409 {
2410 shorten_branches (get_insns ());
2411 /* Insert hbrp at beginning of function */
2412 insn = next_active_insn (get_insns ());
2413 if (insn)
2414 insert_hbrp_for_ilb_runout (insn);
2415 /* Insert hbrp after hinted targets. */
2416 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2417 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2418 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2419 }
2420 }
2421
2422 static int in_spu_reorg;
2423
2424 static void
2425 spu_var_tracking (void)
2426 {
2427 if (flag_var_tracking)
2428 {
2429 df_analyze ();
2430 timevar_push (TV_VAR_TRACKING);
2431 variable_tracking_main ();
2432 timevar_pop (TV_VAR_TRACKING);
2433 df_finish_pass (false);
2434 }
2435 }
2436
2437 /* Insert branch hints. There are no branch optimizations after this
2438 pass, so it's safe to set our branch hints now. */
2439 static void
2440 spu_machine_dependent_reorg (void)
2441 {
2442 sbitmap blocks;
2443 basic_block bb;
2444 rtx_insn *branch, *insn;
2445 rtx branch_target = 0;
2446 int branch_addr = 0, insn_addr, required_dist = 0;
2447 int i;
2448 unsigned int j;
2449
2450 if (!TARGET_BRANCH_HINTS || optimize == 0)
2451 {
2452 /* We still do it for unoptimized code because an external
2453 function might have hinted a call or return. */
2454 compute_bb_for_insn ();
2455 insert_hbrp ();
2456 pad_bb ();
2457 spu_var_tracking ();
2458 free_bb_for_insn ();
2459 return;
2460 }
2461
2462 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2463 bitmap_clear (blocks);
2464
2465 in_spu_reorg = 1;
2466 compute_bb_for_insn ();
2467
2468 /* (Re-)discover loops so that bb->loop_father can be used
2469 in the analysis below. */
2470 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2471
2472 compact_blocks ();
2473
2474 spu_bb_info =
2475 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2476 sizeof (struct spu_bb_info));
2477
2478 /* We need exact insn addresses and lengths. */
2479 shorten_branches (get_insns ());
2480
2481 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2482 {
2483 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2484 branch = 0;
2485 if (spu_bb_info[i].prop_jump)
2486 {
2487 branch = spu_bb_info[i].prop_jump;
2488 branch_target = get_branch_target (branch);
2489 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2490 required_dist = spu_hint_dist;
2491 }
2492 /* Search from end of a block to beginning. In this loop, find
2493 jumps which need a branch and emit them only when:
2494 - it's an indirect branch and we're at the insn which sets
2495 the register
2496 - we're at an insn that will invalidate the hint. e.g., a
2497 call, another hint insn, inline asm that clobbers $hbr, and
2498 some inlined operations (divmodsi4). Don't consider jumps
2499 because they are only at the end of a block and are
2500 considered when we are deciding whether to propagate
2501 - we're getting too far away from the branch. The hbr insns
2502 only have a signed 10 bit offset
2503 We go back as far as possible so the branch will be considered
2504 for propagation when we get to the beginning of the block. */
2505 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2506 {
2507 if (INSN_P (insn))
2508 {
2509 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2510 if (branch
2511 && ((GET_CODE (branch_target) == REG
2512 && set_of (branch_target, insn) != NULL_RTX)
2513 || insn_clobbers_hbr (insn)
2514 || branch_addr - insn_addr > 600))
2515 {
2516 rtx_insn *next = NEXT_INSN (insn);
2517 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2518 if (insn != BB_END (bb)
2519 && branch_addr - next_addr >= required_dist)
2520 {
2521 if (dump_file)
2522 fprintf (dump_file,
2523 "hint for %i in block %i before %i\n",
2524 INSN_UID (branch), bb->index,
2525 INSN_UID (next));
2526 spu_emit_branch_hint (next, branch, branch_target,
2527 branch_addr - next_addr, blocks);
2528 }
2529 branch = 0;
2530 }
2531
2532 /* JUMP_P will only be true at the end of a block. When
2533 branch is already set it means we've previously decided
2534 to propagate a hint for that branch into this block. */
2535 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2536 {
2537 branch = 0;
2538 if ((branch_target = get_branch_target (insn)))
2539 {
2540 branch = insn;
2541 branch_addr = insn_addr;
2542 required_dist = spu_hint_dist;
2543 }
2544 }
2545 }
2546 if (insn == BB_HEAD (bb))
2547 break;
2548 }
2549
2550 if (branch)
2551 {
2552 /* If we haven't emitted a hint for this branch yet, it might
2553 be profitable to emit it in one of the predecessor blocks,
2554 especially for loops. */
2555 rtx_insn *bbend;
2556 basic_block prev = 0, prop = 0, prev2 = 0;
2557 int loop_exit = 0, simple_loop = 0;
2558 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2559
2560 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2561 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2562 prev = EDGE_PRED (bb, j)->src;
2563 else
2564 prev2 = EDGE_PRED (bb, j)->src;
2565
2566 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2567 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2568 loop_exit = 1;
2569 else if (EDGE_SUCC (bb, j)->dest == bb)
2570 simple_loop = 1;
2571
2572 /* If this branch is a loop exit then propagate to previous
2573 fallthru block. This catches the cases when it is a simple
2574 loop or when there is an initial branch into the loop. */
2575 if (prev && (loop_exit || simple_loop)
2576 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2577 prop = prev;
2578
2579 /* If there is only one adjacent predecessor. Don't propagate
2580 outside this loop. */
2581 else if (prev && single_pred_p (bb)
2582 && prev->loop_father == bb->loop_father)
2583 prop = prev;
2584
2585 /* If this is the JOIN block of a simple IF-THEN then
2586 propagate the hint to the HEADER block. */
2587 else if (prev && prev2
2588 && EDGE_COUNT (bb->preds) == 2
2589 && EDGE_COUNT (prev->preds) == 1
2590 && EDGE_PRED (prev, 0)->src == prev2
2591 && prev2->loop_father == bb->loop_father
2592 && GET_CODE (branch_target) != REG)
2593 prop = prev;
2594
2595 /* Don't propagate when:
2596 - this is a simple loop and the hint would be too far
2597 - this is not a simple loop and there are 16 insns in
2598 this block already
2599 - the predecessor block ends in a branch that will be
2600 hinted
2601 - the predecessor block ends in an insn that invalidates
2602 the hint */
2603 if (prop
2604 && prop->index >= 0
2605 && (bbend = BB_END (prop))
2606 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2607 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2608 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2609 {
2610 if (dump_file)
2611 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2612 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2613 bb->index, prop->index, bb_loop_depth (bb),
2614 INSN_UID (branch), loop_exit, simple_loop,
2615 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2616
2617 spu_bb_info[prop->index].prop_jump = branch;
2618 spu_bb_info[prop->index].bb_index = i;
2619 }
2620 else if (branch_addr - next_addr >= required_dist)
2621 {
2622 if (dump_file)
2623 fprintf (dump_file, "hint for %i in block %i before %i\n",
2624 INSN_UID (branch), bb->index,
2625 INSN_UID (NEXT_INSN (insn)));
2626 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2627 branch_addr - next_addr, blocks);
2628 }
2629 branch = 0;
2630 }
2631 }
2632 free (spu_bb_info);
2633
2634 if (!bitmap_empty_p (blocks))
2635 find_many_sub_basic_blocks (blocks);
2636
2637 /* We have to schedule to make sure alignment is ok. */
2638 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2639
2640 /* The hints need to be scheduled, so call it again. */
2641 schedule_insns ();
2642 df_finish_pass (true);
2643
2644 insert_hbrp ();
2645
2646 pad_bb ();
2647
2648 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2649 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2650 {
2651 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2652 between its branch label and the branch . We don't move the
2653 label because GCC expects it at the beginning of the block. */
2654 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2655 rtx label_ref = XVECEXP (unspec, 0, 0);
2656 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2657 rtx_insn *branch;
2658 int offset = 0;
2659 for (branch = NEXT_INSN (label);
2660 !JUMP_P (branch) && !CALL_P (branch);
2661 branch = NEXT_INSN (branch))
2662 if (NONJUMP_INSN_P (branch))
2663 offset += get_attr_length (branch);
2664 if (offset > 0)
2665 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2666 }
2667
2668 spu_var_tracking ();
2669
2670 loop_optimizer_finalize ();
2671
2672 free_bb_for_insn ();
2673
2674 in_spu_reorg = 0;
2675 }
2676 \f
2677
2678 /* Insn scheduling routines, primarily for dual issue. */
2679 static int
2680 spu_sched_issue_rate (void)
2681 {
2682 return 2;
2683 }
2684
2685 static int
2686 uses_ls_unit(rtx_insn *insn)
2687 {
2688 rtx set = single_set (insn);
2689 if (set != 0
2690 && (GET_CODE (SET_DEST (set)) == MEM
2691 || GET_CODE (SET_SRC (set)) == MEM))
2692 return 1;
2693 return 0;
2694 }
2695
2696 static int
2697 get_pipe (rtx_insn *insn)
2698 {
2699 enum attr_type t;
2700 /* Handle inline asm */
2701 if (INSN_CODE (insn) == -1)
2702 return -1;
2703 t = get_attr_type (insn);
2704 switch (t)
2705 {
2706 case TYPE_CONVERT:
2707 return -2;
2708 case TYPE_MULTI0:
2709 return -1;
2710
2711 case TYPE_FX2:
2712 case TYPE_FX3:
2713 case TYPE_SPR:
2714 case TYPE_NOP:
2715 case TYPE_FXB:
2716 case TYPE_FPD:
2717 case TYPE_FP6:
2718 case TYPE_FP7:
2719 return 0;
2720
2721 case TYPE_LNOP:
2722 case TYPE_SHUF:
2723 case TYPE_LOAD:
2724 case TYPE_STORE:
2725 case TYPE_BR:
2726 case TYPE_MULTI1:
2727 case TYPE_HBR:
2728 case TYPE_IPREFETCH:
2729 return 1;
2730 default:
2731 abort ();
2732 }
2733 }
2734
2735
2736 /* haifa-sched.c has a static variable that keeps track of the current
2737 cycle. It is passed to spu_sched_reorder, and we record it here for
2738 use by spu_sched_variable_issue. It won't be accurate if the
2739 scheduler updates it's clock_var between the two calls. */
2740 static int clock_var;
2741
2742 /* This is used to keep track of insn alignment. Set to 0 at the
2743 beginning of each block and increased by the "length" attr of each
2744 insn scheduled. */
2745 static int spu_sched_length;
2746
2747 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2748 ready list appropriately in spu_sched_reorder(). */
2749 static int pipe0_clock;
2750 static int pipe1_clock;
2751
2752 static int prev_clock_var;
2753
2754 static int prev_priority;
2755
2756 /* The SPU needs to load the next ilb sometime during the execution of
2757 the previous ilb. There is a potential conflict if every cycle has a
2758 load or store. To avoid the conflict we make sure the load/store
2759 unit is free for at least one cycle during the execution of insns in
2760 the previous ilb. */
2761 static int spu_ls_first;
2762 static int prev_ls_clock;
2763
2764 static void
2765 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2766 int max_ready ATTRIBUTE_UNUSED)
2767 {
2768 spu_sched_length = 0;
2769 }
2770
2771 static void
2772 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2773 int max_ready ATTRIBUTE_UNUSED)
2774 {
2775 if (align_labels.levels[0].get_value () > 4
2776 || align_loops.levels[0].get_value () > 4
2777 || align_jumps.levels[0].get_value () > 4)
2778 {
2779 /* When any block might be at least 8-byte aligned, assume they
2780 will all be at least 8-byte aligned to make sure dual issue
2781 works out correctly. */
2782 spu_sched_length = 0;
2783 }
2784 spu_ls_first = INT_MAX;
2785 clock_var = -1;
2786 prev_ls_clock = -1;
2787 pipe0_clock = -1;
2788 pipe1_clock = -1;
2789 prev_clock_var = -1;
2790 prev_priority = -1;
2791 }
2792
2793 static int
2794 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2795 int verbose ATTRIBUTE_UNUSED,
2796 rtx_insn *insn, int more)
2797 {
2798 int len;
2799 int p;
2800 if (GET_CODE (PATTERN (insn)) == USE
2801 || GET_CODE (PATTERN (insn)) == CLOBBER
2802 || (len = get_attr_length (insn)) == 0)
2803 return more;
2804
2805 spu_sched_length += len;
2806
2807 /* Reset on inline asm */
2808 if (INSN_CODE (insn) == -1)
2809 {
2810 spu_ls_first = INT_MAX;
2811 pipe0_clock = -1;
2812 pipe1_clock = -1;
2813 return 0;
2814 }
2815 p = get_pipe (insn);
2816 if (p == 0)
2817 pipe0_clock = clock_var;
2818 else
2819 pipe1_clock = clock_var;
2820
2821 if (in_spu_reorg)
2822 {
2823 if (clock_var - prev_ls_clock > 1
2824 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2825 spu_ls_first = INT_MAX;
2826 if (uses_ls_unit (insn))
2827 {
2828 if (spu_ls_first == INT_MAX)
2829 spu_ls_first = spu_sched_length;
2830 prev_ls_clock = clock_var;
2831 }
2832
2833 /* The scheduler hasn't inserted the nop, but we will later on.
2834 Include those nops in spu_sched_length. */
2835 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2836 spu_sched_length += 4;
2837 prev_clock_var = clock_var;
2838
2839 /* more is -1 when called from spu_sched_reorder for new insns
2840 that don't have INSN_PRIORITY */
2841 if (more >= 0)
2842 prev_priority = INSN_PRIORITY (insn);
2843 }
2844
2845 /* Always try issuing more insns. spu_sched_reorder will decide
2846 when the cycle should be advanced. */
2847 return 1;
2848 }
2849
2850 /* This function is called for both TARGET_SCHED_REORDER and
2851 TARGET_SCHED_REORDER2. */
2852 static int
2853 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2854 rtx_insn **ready, int *nreadyp, int clock)
2855 {
2856 int i, nready = *nreadyp;
2857 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2858 rtx_insn *insn;
2859
2860 clock_var = clock;
2861
2862 if (nready <= 0 || pipe1_clock >= clock)
2863 return 0;
2864
2865 /* Find any rtl insns that don't generate assembly insns and schedule
2866 them first. */
2867 for (i = nready - 1; i >= 0; i--)
2868 {
2869 insn = ready[i];
2870 if (INSN_CODE (insn) == -1
2871 || INSN_CODE (insn) == CODE_FOR_blockage
2872 || (INSN_P (insn) && get_attr_length (insn) == 0))
2873 {
2874 ready[i] = ready[nready - 1];
2875 ready[nready - 1] = insn;
2876 return 1;
2877 }
2878 }
2879
2880 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2881 for (i = 0; i < nready; i++)
2882 if (INSN_CODE (ready[i]) != -1)
2883 {
2884 insn = ready[i];
2885 switch (get_attr_type (insn))
2886 {
2887 default:
2888 case TYPE_MULTI0:
2889 case TYPE_CONVERT:
2890 case TYPE_FX2:
2891 case TYPE_FX3:
2892 case TYPE_SPR:
2893 case TYPE_NOP:
2894 case TYPE_FXB:
2895 case TYPE_FPD:
2896 case TYPE_FP6:
2897 case TYPE_FP7:
2898 pipe_0 = i;
2899 break;
2900 case TYPE_LOAD:
2901 case TYPE_STORE:
2902 pipe_ls = i;
2903 /* FALLTHRU */
2904 case TYPE_LNOP:
2905 case TYPE_SHUF:
2906 case TYPE_BR:
2907 case TYPE_MULTI1:
2908 case TYPE_HBR:
2909 pipe_1 = i;
2910 break;
2911 case TYPE_IPREFETCH:
2912 pipe_hbrp = i;
2913 break;
2914 }
2915 }
2916
2917 /* In the first scheduling phase, schedule loads and stores together
2918 to increase the chance they will get merged during postreload CSE. */
2919 if (!reload_completed && pipe_ls >= 0)
2920 {
2921 insn = ready[pipe_ls];
2922 ready[pipe_ls] = ready[nready - 1];
2923 ready[nready - 1] = insn;
2924 return 1;
2925 }
2926
2927 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2928 if (pipe_hbrp >= 0)
2929 pipe_1 = pipe_hbrp;
2930
2931 /* When we have loads/stores in every cycle of the last 15 insns and
2932 we are about to schedule another load/store, emit an hbrp insn
2933 instead. */
2934 if (in_spu_reorg
2935 && spu_sched_length - spu_ls_first >= 4 * 15
2936 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2937 {
2938 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2939 recog_memoized (insn);
2940 if (pipe0_clock < clock)
2941 PUT_MODE (insn, TImode);
2942 spu_sched_variable_issue (file, verbose, insn, -1);
2943 return 0;
2944 }
2945
2946 /* In general, we want to emit nops to increase dual issue, but dual
2947 issue isn't faster when one of the insns could be scheduled later
2948 without effecting the critical path. We look at INSN_PRIORITY to
2949 make a good guess, but it isn't perfect so -mdual-nops=n can be
2950 used to effect it. */
2951 if (in_spu_reorg && spu_dual_nops < 10)
2952 {
2953 /* When we are at an even address and we are not issuing nops to
2954 improve scheduling then we need to advance the cycle. */
2955 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2956 && (spu_dual_nops == 0
2957 || (pipe_1 != -1
2958 && prev_priority >
2959 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2960 return 0;
2961
2962 /* When at an odd address, schedule the highest priority insn
2963 without considering pipeline. */
2964 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2965 && (spu_dual_nops == 0
2966 || (prev_priority >
2967 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2968 return 1;
2969 }
2970
2971
2972 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2973 pipe0 insn in the ready list, schedule it. */
2974 if (pipe0_clock < clock && pipe_0 >= 0)
2975 schedule_i = pipe_0;
2976
2977 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2978 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2979 else
2980 schedule_i = pipe_1;
2981
2982 if (schedule_i > -1)
2983 {
2984 insn = ready[schedule_i];
2985 ready[schedule_i] = ready[nready - 1];
2986 ready[nready - 1] = insn;
2987 return 1;
2988 }
2989 return 0;
2990 }
2991
2992 /* INSN is dependent on DEP_INSN. */
2993 static int
2994 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2995 int cost, unsigned int)
2996 {
2997 rtx set;
2998
2999 /* The blockage pattern is used to prevent instructions from being
3000 moved across it and has no cost. */
3001 if (INSN_CODE (insn) == CODE_FOR_blockage
3002 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3003 return 0;
3004
3005 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3006 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3007 return 0;
3008
3009 /* Make sure hbrps are spread out. */
3010 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3011 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3012 return 8;
3013
3014 /* Make sure hints and hbrps are 2 cycles apart. */
3015 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3016 || INSN_CODE (insn) == CODE_FOR_hbr)
3017 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3018 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3019 return 2;
3020
3021 /* An hbrp has no real dependency on other insns. */
3022 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3023 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3024 return 0;
3025
3026 /* Assuming that it is unlikely an argument register will be used in
3027 the first cycle of the called function, we reduce the cost for
3028 slightly better scheduling of dep_insn. When not hinted, the
3029 mispredicted branch would hide the cost as well. */
3030 if (CALL_P (insn))
3031 {
3032 rtx target = get_branch_target (insn);
3033 if (GET_CODE (target) != REG || !set_of (target, insn))
3034 return cost - 2;
3035 return cost;
3036 }
3037
3038 /* And when returning from a function, let's assume the return values
3039 are completed sooner too. */
3040 if (CALL_P (dep_insn))
3041 return cost - 2;
3042
3043 /* Make sure an instruction that loads from the back chain is schedule
3044 away from the return instruction so a hint is more likely to get
3045 issued. */
3046 if (INSN_CODE (insn) == CODE_FOR__return
3047 && (set = single_set (dep_insn))
3048 && GET_CODE (SET_DEST (set)) == REG
3049 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3050 return 20;
3051
3052 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3053 scheduler makes every insn in a block anti-dependent on the final
3054 jump_insn. We adjust here so higher cost insns will get scheduled
3055 earlier. */
3056 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3057 return insn_sched_cost (dep_insn) - 3;
3058
3059 return cost;
3060 }
3061 \f
3062 /* Create a CONST_DOUBLE from a string. */
3063 rtx
3064 spu_float_const (const char *string, machine_mode mode)
3065 {
3066 REAL_VALUE_TYPE value;
3067 value = REAL_VALUE_ATOF (string, mode);
3068 return const_double_from_real_value (value, mode);
3069 }
3070
3071 int
3072 spu_constant_address_p (rtx x)
3073 {
3074 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3075 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3076 || GET_CODE (x) == HIGH);
3077 }
3078
3079 static enum spu_immediate
3080 which_immediate_load (HOST_WIDE_INT val)
3081 {
3082 gcc_assert (val == trunc_int_for_mode (val, SImode));
3083
3084 if (val >= -0x8000 && val <= 0x7fff)
3085 return SPU_IL;
3086 if (val >= 0 && val <= 0x3ffff)
3087 return SPU_ILA;
3088 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3089 return SPU_ILH;
3090 if ((val & 0xffff) == 0)
3091 return SPU_ILHU;
3092
3093 return SPU_NONE;
3094 }
3095
3096 /* Return true when OP can be loaded by one of the il instructions, or
3097 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3098 int
3099 immediate_load_p (rtx op, machine_mode mode)
3100 {
3101 if (CONSTANT_P (op))
3102 {
3103 enum immediate_class c = classify_immediate (op, mode);
3104 return c == IC_IL1 || c == IC_IL1s
3105 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3106 }
3107 return 0;
3108 }
3109
3110 /* Return true if the first SIZE bytes of arr is a constant that can be
3111 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3112 represent the size and offset of the instruction to use. */
3113 static int
3114 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3115 {
3116 int cpat, run, i, start;
3117 cpat = 1;
3118 run = 0;
3119 start = -1;
3120 for (i = 0; i < size && cpat; i++)
3121 if (arr[i] != i+16)
3122 {
3123 if (!run)
3124 {
3125 start = i;
3126 if (arr[i] == 3)
3127 run = 1;
3128 else if (arr[i] == 2 && arr[i+1] == 3)
3129 run = 2;
3130 else if (arr[i] == 0)
3131 {
3132 while (arr[i+run] == run && i+run < 16)
3133 run++;
3134 if (run != 4 && run != 8)
3135 cpat = 0;
3136 }
3137 else
3138 cpat = 0;
3139 if ((i & (run-1)) != 0)
3140 cpat = 0;
3141 i += run;
3142 }
3143 else
3144 cpat = 0;
3145 }
3146 if (cpat && (run || size < 16))
3147 {
3148 if (run == 0)
3149 run = 1;
3150 if (prun)
3151 *prun = run;
3152 if (pstart)
3153 *pstart = start == -1 ? 16-run : start;
3154 return 1;
3155 }
3156 return 0;
3157 }
3158
3159 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3160 it into a register. MODE is only valid when OP is a CONST_INT. */
3161 static enum immediate_class
3162 classify_immediate (rtx op, machine_mode mode)
3163 {
3164 HOST_WIDE_INT val;
3165 unsigned char arr[16];
3166 int i, j, repeated, fsmbi, repeat;
3167
3168 gcc_assert (CONSTANT_P (op));
3169
3170 if (GET_MODE (op) != VOIDmode)
3171 mode = GET_MODE (op);
3172
3173 /* A V4SI const_vector with all identical symbols is ok. */
3174 if (!flag_pic
3175 && mode == V4SImode
3176 && GET_CODE (op) == CONST_VECTOR
3177 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3178 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3179 op = unwrap_const_vec_duplicate (op);
3180
3181 switch (GET_CODE (op))
3182 {
3183 case SYMBOL_REF:
3184 case LABEL_REF:
3185 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3186
3187 case CONST:
3188 /* We can never know if the resulting address fits in 18 bits and can be
3189 loaded with ila. For now, assume the address will not overflow if
3190 the displacement is "small" (fits 'K' constraint). */
3191 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3192 {
3193 rtx sym = XEXP (XEXP (op, 0), 0);
3194 rtx cst = XEXP (XEXP (op, 0), 1);
3195
3196 if (GET_CODE (sym) == SYMBOL_REF
3197 && GET_CODE (cst) == CONST_INT
3198 && satisfies_constraint_K (cst))
3199 return IC_IL1s;
3200 }
3201 return IC_IL2s;
3202
3203 case HIGH:
3204 return IC_IL1s;
3205
3206 case CONST_VECTOR:
3207 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3208 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3209 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3210 return IC_POOL;
3211 /* Fall through. */
3212
3213 case CONST_INT:
3214 case CONST_DOUBLE:
3215 constant_to_array (mode, op, arr);
3216
3217 /* Check that each 4-byte slot is identical. */
3218 repeated = 1;
3219 for (i = 4; i < 16; i += 4)
3220 for (j = 0; j < 4; j++)
3221 if (arr[j] != arr[i + j])
3222 repeated = 0;
3223
3224 if (repeated)
3225 {
3226 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3227 val = trunc_int_for_mode (val, SImode);
3228
3229 if (which_immediate_load (val) != SPU_NONE)
3230 return IC_IL1;
3231 }
3232
3233 /* Any mode of 2 bytes or smaller can be loaded with an il
3234 instruction. */
3235 gcc_assert (GET_MODE_SIZE (mode) > 2);
3236
3237 fsmbi = 1;
3238 repeat = 0;
3239 for (i = 0; i < 16 && fsmbi; i++)
3240 if (arr[i] != 0 && repeat == 0)
3241 repeat = arr[i];
3242 else if (arr[i] != 0 && arr[i] != repeat)
3243 fsmbi = 0;
3244 if (fsmbi)
3245 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3246
3247 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3248 return IC_CPAT;
3249
3250 if (repeated)
3251 return IC_IL2;
3252
3253 return IC_POOL;
3254 default:
3255 break;
3256 }
3257 gcc_unreachable ();
3258 }
3259
3260 static enum spu_immediate
3261 which_logical_immediate (HOST_WIDE_INT val)
3262 {
3263 gcc_assert (val == trunc_int_for_mode (val, SImode));
3264
3265 if (val >= -0x200 && val <= 0x1ff)
3266 return SPU_ORI;
3267 if (val >= 0 && val <= 0xffff)
3268 return SPU_IOHL;
3269 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3270 {
3271 val = trunc_int_for_mode (val, HImode);
3272 if (val >= -0x200 && val <= 0x1ff)
3273 return SPU_ORHI;
3274 if ((val & 0xff) == ((val >> 8) & 0xff))
3275 {
3276 val = trunc_int_for_mode (val, QImode);
3277 if (val >= -0x200 && val <= 0x1ff)
3278 return SPU_ORBI;
3279 }
3280 }
3281 return SPU_NONE;
3282 }
3283
3284 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3285 CONST_DOUBLEs. */
3286 static int
3287 const_vector_immediate_p (rtx x)
3288 {
3289 int i;
3290 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3291 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3292 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3293 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3294 return 0;
3295 return 1;
3296 }
3297
3298 int
3299 logical_immediate_p (rtx op, machine_mode mode)
3300 {
3301 HOST_WIDE_INT val;
3302 unsigned char arr[16];
3303 int i, j;
3304
3305 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3306 || GET_CODE (op) == CONST_VECTOR);
3307
3308 if (GET_CODE (op) == CONST_VECTOR
3309 && !const_vector_immediate_p (op))
3310 return 0;
3311
3312 if (GET_MODE (op) != VOIDmode)
3313 mode = GET_MODE (op);
3314
3315 constant_to_array (mode, op, arr);
3316
3317 /* Check that bytes are repeated. */
3318 for (i = 4; i < 16; i += 4)
3319 for (j = 0; j < 4; j++)
3320 if (arr[j] != arr[i + j])
3321 return 0;
3322
3323 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3324 val = trunc_int_for_mode (val, SImode);
3325
3326 i = which_logical_immediate (val);
3327 return i != SPU_NONE && i != SPU_IOHL;
3328 }
3329
3330 int
3331 iohl_immediate_p (rtx op, machine_mode mode)
3332 {
3333 HOST_WIDE_INT val;
3334 unsigned char arr[16];
3335 int i, j;
3336
3337 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3338 || GET_CODE (op) == CONST_VECTOR);
3339
3340 if (GET_CODE (op) == CONST_VECTOR
3341 && !const_vector_immediate_p (op))
3342 return 0;
3343
3344 if (GET_MODE (op) != VOIDmode)
3345 mode = GET_MODE (op);
3346
3347 constant_to_array (mode, op, arr);
3348
3349 /* Check that bytes are repeated. */
3350 for (i = 4; i < 16; i += 4)
3351 for (j = 0; j < 4; j++)
3352 if (arr[j] != arr[i + j])
3353 return 0;
3354
3355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3356 val = trunc_int_for_mode (val, SImode);
3357
3358 return val >= 0 && val <= 0xffff;
3359 }
3360
3361 int
3362 arith_immediate_p (rtx op, machine_mode mode,
3363 HOST_WIDE_INT low, HOST_WIDE_INT high)
3364 {
3365 HOST_WIDE_INT val;
3366 unsigned char arr[16];
3367 int bytes, i, j;
3368
3369 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3370 || GET_CODE (op) == CONST_VECTOR);
3371
3372 if (GET_CODE (op) == CONST_VECTOR
3373 && !const_vector_immediate_p (op))
3374 return 0;
3375
3376 if (GET_MODE (op) != VOIDmode)
3377 mode = GET_MODE (op);
3378
3379 constant_to_array (mode, op, arr);
3380
3381 bytes = GET_MODE_UNIT_SIZE (mode);
3382 mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
3383
3384 /* Check that bytes are repeated. */
3385 for (i = bytes; i < 16; i += bytes)
3386 for (j = 0; j < bytes; j++)
3387 if (arr[j] != arr[i + j])
3388 return 0;
3389
3390 val = arr[0];
3391 for (j = 1; j < bytes; j++)
3392 val = (val << 8) | arr[j];
3393
3394 val = trunc_int_for_mode (val, mode);
3395
3396 return val >= low && val <= high;
3397 }
3398
3399 /* TRUE when op is an immediate and an exact power of 2, and given that
3400 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3401 all entries must be the same. */
3402 bool
3403 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3404 {
3405 machine_mode int_mode;
3406 HOST_WIDE_INT val;
3407 unsigned char arr[16];
3408 int bytes, i, j;
3409
3410 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3411 || GET_CODE (op) == CONST_VECTOR);
3412
3413 if (GET_CODE (op) == CONST_VECTOR
3414 && !const_vector_immediate_p (op))
3415 return 0;
3416
3417 if (GET_MODE (op) != VOIDmode)
3418 mode = GET_MODE (op);
3419
3420 constant_to_array (mode, op, arr);
3421
3422 mode = GET_MODE_INNER (mode);
3423
3424 bytes = GET_MODE_SIZE (mode);
3425 int_mode = int_mode_for_mode (mode).require ();
3426
3427 /* Check that bytes are repeated. */
3428 for (i = bytes; i < 16; i += bytes)
3429 for (j = 0; j < bytes; j++)
3430 if (arr[j] != arr[i + j])
3431 return 0;
3432
3433 val = arr[0];
3434 for (j = 1; j < bytes; j++)
3435 val = (val << 8) | arr[j];
3436
3437 val = trunc_int_for_mode (val, int_mode);
3438
3439 /* Currently, we only handle SFmode */
3440 gcc_assert (mode == SFmode);
3441 if (mode == SFmode)
3442 {
3443 int exp = (val >> 23) - 127;
3444 return val > 0 && (val & 0x007fffff) == 0
3445 && exp >= low && exp <= high;
3446 }
3447 return FALSE;
3448 }
3449
3450 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3451
3452 static bool
3453 ea_symbol_ref_p (const_rtx x)
3454 {
3455 tree decl;
3456
3457 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3458 {
3459 rtx plus = XEXP (x, 0);
3460 rtx op0 = XEXP (plus, 0);
3461 rtx op1 = XEXP (plus, 1);
3462 if (GET_CODE (op1) == CONST_INT)
3463 x = op0;
3464 }
3465
3466 return (GET_CODE (x) == SYMBOL_REF
3467 && (decl = SYMBOL_REF_DECL (x)) != 0
3468 && TREE_CODE (decl) == VAR_DECL
3469 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3470 }
3471
3472 /* We accept:
3473 - any 32-bit constant (SImode, SFmode)
3474 - any constant that can be generated with fsmbi (any mode)
3475 - a 64-bit constant where the high and low bits are identical
3476 (DImode, DFmode)
3477 - a 128-bit constant where the four 32-bit words match. */
3478 bool
3479 spu_legitimate_constant_p (machine_mode mode, rtx x)
3480 {
3481 subrtx_iterator::array_type array;
3482 if (GET_CODE (x) == HIGH)
3483 x = XEXP (x, 0);
3484
3485 /* Reject any __ea qualified reference. These can't appear in
3486 instructions but must be forced to the constant pool. */
3487 FOR_EACH_SUBRTX (iter, array, x, ALL)
3488 if (ea_symbol_ref_p (*iter))
3489 return 0;
3490
3491 /* V4SI with all identical symbols is valid. */
3492 if (!flag_pic
3493 && mode == V4SImode
3494 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3495 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3496 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3497 return const_vec_duplicate_p (x);
3498
3499 if (GET_CODE (x) == CONST_VECTOR
3500 && !const_vector_immediate_p (x))
3501 return 0;
3502 return 1;
3503 }
3504
3505 /* Valid address are:
3506 - symbol_ref, label_ref, const
3507 - reg
3508 - reg + const_int, where const_int is 16 byte aligned
3509 - reg + reg, alignment doesn't matter
3510 The alignment matters in the reg+const case because lqd and stqd
3511 ignore the 4 least significant bits of the const. We only care about
3512 16 byte modes because the expand phase will change all smaller MEM
3513 references to TImode. */
3514 static bool
3515 spu_legitimate_address_p (machine_mode mode,
3516 rtx x, bool reg_ok_strict)
3517 {
3518 int aligned = GET_MODE_SIZE (mode) >= 16;
3519 if (aligned
3520 && GET_CODE (x) == AND
3521 && GET_CODE (XEXP (x, 1)) == CONST_INT
3522 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3523 x = XEXP (x, 0);
3524 switch (GET_CODE (x))
3525 {
3526 case LABEL_REF:
3527 return !TARGET_LARGE_MEM;
3528
3529 case SYMBOL_REF:
3530 case CONST:
3531 /* Keep __ea references until reload so that spu_expand_mov can see them
3532 in MEMs. */
3533 if (ea_symbol_ref_p (x))
3534 return !reload_in_progress && !reload_completed;
3535 return !TARGET_LARGE_MEM;
3536
3537 case CONST_INT:
3538 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3539
3540 case SUBREG:
3541 x = XEXP (x, 0);
3542 if (!REG_P (x))
3543 return 0;
3544 /* FALLTHRU */
3545
3546 case REG:
3547 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3548
3549 case PLUS:
3550 case LO_SUM:
3551 {
3552 rtx op0 = XEXP (x, 0);
3553 rtx op1 = XEXP (x, 1);
3554 if (GET_CODE (op0) == SUBREG)
3555 op0 = XEXP (op0, 0);
3556 if (GET_CODE (op1) == SUBREG)
3557 op1 = XEXP (op1, 0);
3558 if (GET_CODE (op0) == REG
3559 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3560 && GET_CODE (op1) == CONST_INT
3561 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3562 /* If virtual registers are involved, the displacement will
3563 change later on anyway, so checking would be premature.
3564 Reload will make sure the final displacement after
3565 register elimination is OK. */
3566 || op0 == arg_pointer_rtx
3567 || op0 == frame_pointer_rtx
3568 || op0 == virtual_stack_vars_rtx)
3569 && (!aligned || (INTVAL (op1) & 15) == 0))
3570 return TRUE;
3571 if (GET_CODE (op0) == REG
3572 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3573 && GET_CODE (op1) == REG
3574 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3575 return TRUE;
3576 }
3577 break;
3578
3579 default:
3580 break;
3581 }
3582 return FALSE;
3583 }
3584
3585 /* Like spu_legitimate_address_p, except with named addresses. */
3586 static bool
3587 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3588 bool reg_ok_strict, addr_space_t as)
3589 {
3590 if (as == ADDR_SPACE_EA)
3591 return (REG_P (x) && (GET_MODE (x) == EAmode));
3592
3593 else if (as != ADDR_SPACE_GENERIC)
3594 gcc_unreachable ();
3595
3596 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3597 }
3598
3599 /* When the address is reg + const_int, force the const_int into a
3600 register. */
3601 static rtx
3602 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3603 machine_mode mode ATTRIBUTE_UNUSED)
3604 {
3605 rtx op0, op1;
3606 /* Make sure both operands are registers. */
3607 if (GET_CODE (x) == PLUS)
3608 {
3609 op0 = XEXP (x, 0);
3610 op1 = XEXP (x, 1);
3611 if (ALIGNED_SYMBOL_REF_P (op0))
3612 {
3613 op0 = force_reg (Pmode, op0);
3614 mark_reg_pointer (op0, 128);
3615 }
3616 else if (GET_CODE (op0) != REG)
3617 op0 = force_reg (Pmode, op0);
3618 if (ALIGNED_SYMBOL_REF_P (op1))
3619 {
3620 op1 = force_reg (Pmode, op1);
3621 mark_reg_pointer (op1, 128);
3622 }
3623 else if (GET_CODE (op1) != REG)
3624 op1 = force_reg (Pmode, op1);
3625 x = gen_rtx_PLUS (Pmode, op0, op1);
3626 }
3627 return x;
3628 }
3629
3630 /* Like spu_legitimate_address, except with named address support. */
3631 static rtx
3632 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3633 addr_space_t as)
3634 {
3635 if (as != ADDR_SPACE_GENERIC)
3636 return x;
3637
3638 return spu_legitimize_address (x, oldx, mode);
3639 }
3640
3641 /* Reload reg + const_int for out-of-range displacements. */
3642 rtx
3643 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3644 int opnum, int type)
3645 {
3646 bool removed_and = false;
3647
3648 if (GET_CODE (ad) == AND
3649 && CONST_INT_P (XEXP (ad, 1))
3650 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3651 {
3652 ad = XEXP (ad, 0);
3653 removed_and = true;
3654 }
3655
3656 if (GET_CODE (ad) == PLUS
3657 && REG_P (XEXP (ad, 0))
3658 && CONST_INT_P (XEXP (ad, 1))
3659 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3660 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3661 {
3662 /* Unshare the sum. */
3663 ad = copy_rtx (ad);
3664
3665 /* Reload the displacement. */
3666 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3667 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3668 opnum, (enum reload_type) type);
3669
3670 /* Add back AND for alignment if we stripped it. */
3671 if (removed_and)
3672 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3673
3674 return ad;
3675 }
3676
3677 return NULL_RTX;
3678 }
3679
3680 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3681 struct attribute_spec.handler. */
3682 static tree
3683 spu_handle_fndecl_attribute (tree * node,
3684 tree name,
3685 tree args ATTRIBUTE_UNUSED,
3686 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3687 {
3688 if (TREE_CODE (*node) != FUNCTION_DECL)
3689 {
3690 warning (0, "%qE attribute only applies to functions",
3691 name);
3692 *no_add_attrs = true;
3693 }
3694
3695 return NULL_TREE;
3696 }
3697
3698 /* Handle the "vector" attribute. */
3699 static tree
3700 spu_handle_vector_attribute (tree * node, tree name,
3701 tree args ATTRIBUTE_UNUSED,
3702 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3703 {
3704 tree type = *node, result = NULL_TREE;
3705 machine_mode mode;
3706 int unsigned_p;
3707
3708 while (POINTER_TYPE_P (type)
3709 || TREE_CODE (type) == FUNCTION_TYPE
3710 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3711 type = TREE_TYPE (type);
3712
3713 mode = TYPE_MODE (type);
3714
3715 unsigned_p = TYPE_UNSIGNED (type);
3716 switch (mode)
3717 {
3718 case E_DImode:
3719 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3720 break;
3721 case E_SImode:
3722 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3723 break;
3724 case E_HImode:
3725 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3726 break;
3727 case E_QImode:
3728 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3729 break;
3730 case E_SFmode:
3731 result = V4SF_type_node;
3732 break;
3733 case E_DFmode:
3734 result = V2DF_type_node;
3735 break;
3736 default:
3737 break;
3738 }
3739
3740 /* Propagate qualifiers attached to the element type
3741 onto the vector type. */
3742 if (result && result != type && TYPE_QUALS (type))
3743 result = build_qualified_type (result, TYPE_QUALS (type));
3744
3745 *no_add_attrs = true; /* No need to hang on to the attribute. */
3746
3747 if (!result)
3748 warning (0, "%qE attribute ignored", name);
3749 else
3750 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3751
3752 return NULL_TREE;
3753 }
3754
3755 /* Return nonzero if FUNC is a naked function. */
3756 static int
3757 spu_naked_function_p (tree func)
3758 {
3759 tree a;
3760
3761 if (TREE_CODE (func) != FUNCTION_DECL)
3762 abort ();
3763
3764 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3765 return a != NULL_TREE;
3766 }
3767
3768 int
3769 spu_initial_elimination_offset (int from, int to)
3770 {
3771 int saved_regs_size = spu_saved_regs_size ();
3772 int sp_offset = 0;
3773 if (!crtl->is_leaf || crtl->outgoing_args_size
3774 || get_frame_size () || saved_regs_size)
3775 sp_offset = STACK_POINTER_OFFSET;
3776 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3777 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3778 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3779 return get_frame_size ();
3780 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3781 return sp_offset + crtl->outgoing_args_size
3782 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3783 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3784 return get_frame_size () + saved_regs_size + sp_offset;
3785 else
3786 gcc_unreachable ();
3787 }
3788
3789 rtx
3790 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3791 {
3792 machine_mode mode = TYPE_MODE (type);
3793 int byte_size = ((mode == BLKmode)
3794 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3795
3796 /* Make sure small structs are left justified in a register. */
3797 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3798 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3799 {
3800 machine_mode smode;
3801 rtvec v;
3802 int i;
3803 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3804 int n = byte_size / UNITS_PER_WORD;
3805 v = rtvec_alloc (nregs);
3806 for (i = 0; i < n; i++)
3807 {
3808 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3809 gen_rtx_REG (TImode,
3810 FIRST_RETURN_REGNUM
3811 + i),
3812 GEN_INT (UNITS_PER_WORD * i));
3813 byte_size -= UNITS_PER_WORD;
3814 }
3815
3816 if (n < nregs)
3817 {
3818 if (byte_size < 4)
3819 byte_size = 4;
3820 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3821 RTVEC_ELT (v, n) =
3822 gen_rtx_EXPR_LIST (VOIDmode,
3823 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3824 GEN_INT (UNITS_PER_WORD * n));
3825 }
3826 return gen_rtx_PARALLEL (mode, v);
3827 }
3828 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3829 }
3830
3831 static rtx
3832 spu_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3833 {
3834 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3835 int byte_size;
3836
3837 if (*cum >= MAX_REGISTER_ARGS)
3838 return 0;
3839
3840 byte_size = arg.promoted_size_in_bytes ();
3841
3842 /* The ABI does not allow parameters to be passed partially in
3843 reg and partially in stack. */
3844 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3845 return 0;
3846
3847 /* Make sure small structs are left justified in a register. */
3848 if ((arg.mode == BLKmode || arg.aggregate_type_p ())
3849 && byte_size < UNITS_PER_WORD && byte_size > 0)
3850 {
3851 machine_mode smode;
3852 rtx gr_reg;
3853 if (byte_size < 4)
3854 byte_size = 4;
3855 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3856 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3857 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3858 const0_rtx);
3859 return gen_rtx_PARALLEL (arg.mode, gen_rtvec (1, gr_reg));
3860 }
3861 else
3862 return gen_rtx_REG (arg.mode, FIRST_ARG_REGNUM + *cum);
3863 }
3864
3865 static void
3866 spu_function_arg_advance (cumulative_args_t cum_v,
3867 const function_arg_info &arg)
3868 {
3869 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3870
3871 *cum += (arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST
3872 ? 1
3873 : arg.mode == BLKmode
3874 ? ((int_size_in_bytes (arg.type) + 15) / 16)
3875 : arg.mode == VOIDmode
3876 ? 1
3877 : spu_hard_regno_nregs (FIRST_ARG_REGNUM, arg.mode));
3878 }
3879
3880 /* Implement TARGET_FUNCTION_ARG_OFFSET. The SPU ABI wants 32/64-bit
3881 types at offset 0 in the quad-word on the stack. 8/16-bit types
3882 should be at offsets 3/2 respectively. */
3883
3884 static HOST_WIDE_INT
3885 spu_function_arg_offset (machine_mode mode, const_tree type)
3886 {
3887 if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3888 return 4 - GET_MODE_SIZE (mode);
3889 return 0;
3890 }
3891
3892 /* Implement TARGET_FUNCTION_ARG_PADDING. */
3893
3894 static pad_direction
3895 spu_function_arg_padding (machine_mode, const_tree)
3896 {
3897 return PAD_UPWARD;
3898 }
3899
3900 /* Variable sized types are passed by reference. */
3901 static bool
3902 spu_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
3903 {
3904 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
3905 }
3906 \f
3907
3908 /* Var args. */
3909
3910 /* Create and return the va_list datatype.
3911
3912 On SPU, va_list is an array type equivalent to
3913
3914 typedef struct __va_list_tag
3915 {
3916 void *__args __attribute__((__aligned(16)));
3917 void *__skip __attribute__((__aligned(16)));
3918
3919 } va_list[1];
3920
3921 where __args points to the arg that will be returned by the next
3922 va_arg(), and __skip points to the previous stack frame such that
3923 when __args == __skip we should advance __args by 32 bytes. */
3924 static tree
3925 spu_build_builtin_va_list (void)
3926 {
3927 tree f_args, f_skip, record, type_decl;
3928 bool owp;
3929
3930 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3931
3932 type_decl =
3933 build_decl (BUILTINS_LOCATION,
3934 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3935
3936 f_args = build_decl (BUILTINS_LOCATION,
3937 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3938 f_skip = build_decl (BUILTINS_LOCATION,
3939 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3940
3941 DECL_FIELD_CONTEXT (f_args) = record;
3942 SET_DECL_ALIGN (f_args, 128);
3943 DECL_USER_ALIGN (f_args) = 1;
3944
3945 DECL_FIELD_CONTEXT (f_skip) = record;
3946 SET_DECL_ALIGN (f_skip, 128);
3947 DECL_USER_ALIGN (f_skip) = 1;
3948
3949 TYPE_STUB_DECL (record) = type_decl;
3950 TYPE_NAME (record) = type_decl;
3951 TYPE_FIELDS (record) = f_args;
3952 DECL_CHAIN (f_args) = f_skip;
3953
3954 /* We know this is being padded and we want it too. It is an internal
3955 type so hide the warnings from the user. */
3956 owp = warn_padded;
3957 warn_padded = false;
3958
3959 layout_type (record);
3960
3961 warn_padded = owp;
3962
3963 /* The correct type is an array type of one element. */
3964 return build_array_type (record, build_index_type (size_zero_node));
3965 }
3966
3967 /* Implement va_start by filling the va_list structure VALIST.
3968 NEXTARG points to the first anonymous stack argument.
3969
3970 The following global variables are used to initialize
3971 the va_list structure:
3972
3973 crtl->args.info;
3974 the CUMULATIVE_ARGS for this function
3975
3976 crtl->args.arg_offset_rtx:
3977 holds the offset of the first anonymous stack argument
3978 (relative to the virtual arg pointer). */
3979
3980 static void
3981 spu_va_start (tree valist, rtx nextarg)
3982 {
3983 tree f_args, f_skip;
3984 tree args, skip, t;
3985
3986 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3987 f_skip = DECL_CHAIN (f_args);
3988
3989 valist = build_simple_mem_ref (valist);
3990 args =
3991 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3992 skip =
3993 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3994
3995 /* Find the __args area. */
3996 t = make_tree (TREE_TYPE (args), nextarg);
3997 if (crtl->args.pretend_args_size > 0)
3998 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3999 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4000 TREE_SIDE_EFFECTS (t) = 1;
4001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4002
4003 /* Find the __skip area. */
4004 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4005 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4006 - STACK_POINTER_OFFSET));
4007 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4008 TREE_SIDE_EFFECTS (t) = 1;
4009 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4010 }
4011
4012 /* Gimplify va_arg by updating the va_list structure
4013 VALIST as required to retrieve an argument of type
4014 TYPE, and returning that argument.
4015
4016 ret = va_arg(VALIST, TYPE);
4017
4018 generates code equivalent to:
4019
4020 paddedsize = (sizeof(TYPE) + 15) & -16;
4021 if (VALIST.__args + paddedsize > VALIST.__skip
4022 && VALIST.__args <= VALIST.__skip)
4023 addr = VALIST.__skip + 32;
4024 else
4025 addr = VALIST.__args;
4026 VALIST.__args = addr + paddedsize;
4027 ret = *(TYPE *)addr;
4028 */
4029 static tree
4030 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4031 gimple_seq * post_p ATTRIBUTE_UNUSED)
4032 {
4033 tree f_args, f_skip;
4034 tree args, skip;
4035 HOST_WIDE_INT size, rsize;
4036 tree addr, tmp;
4037 bool pass_by_reference_p;
4038
4039 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4040 f_skip = DECL_CHAIN (f_args);
4041
4042 args =
4043 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4044 skip =
4045 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4046
4047 addr = create_tmp_var (ptr_type_node, "va_arg");
4048
4049 /* if an object is dynamically sized, a pointer to it is passed
4050 instead of the object itself. */
4051 pass_by_reference_p = pass_va_arg_by_reference (type);
4052 if (pass_by_reference_p)
4053 type = build_pointer_type (type);
4054 size = int_size_in_bytes (type);
4055 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4056
4057 /* build conditional expression to calculate addr. The expression
4058 will be gimplified later. */
4059 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4060 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4061 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4062 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4063 unshare_expr (skip)));
4064
4065 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4066 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4067 unshare_expr (args));
4068
4069 gimplify_assign (addr, tmp, pre_p);
4070
4071 /* update VALIST.__args */
4072 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4073 gimplify_assign (unshare_expr (args), tmp, pre_p);
4074
4075 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4076 addr);
4077
4078 if (pass_by_reference_p)
4079 addr = build_va_arg_indirect_ref (addr);
4080
4081 return build_va_arg_indirect_ref (addr);
4082 }
4083
4084 /* Save parameter registers starting with the register that corresponds
4085 to the first unnamed parameters. If the first unnamed parameter is
4086 in the stack then save no registers. Set pretend_args_size to the
4087 amount of space needed to save the registers. */
4088 static void
4089 spu_setup_incoming_varargs (cumulative_args_t cum,
4090 const function_arg_info &arg,
4091 int *pretend_size, int no_rtl)
4092 {
4093 if (!no_rtl)
4094 {
4095 rtx tmp;
4096 int regno;
4097 int offset;
4098 int ncum = *get_cumulative_args (cum);
4099
4100 /* cum currently points to the last named argument, we want to
4101 start at the next argument. */
4102 spu_function_arg_advance (pack_cumulative_args (&ncum), arg);
4103
4104 offset = -STACK_POINTER_OFFSET;
4105 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4106 {
4107 tmp = gen_frame_mem (V4SImode,
4108 plus_constant (Pmode, virtual_incoming_args_rtx,
4109 offset));
4110 emit_move_insn (tmp,
4111 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4112 offset += 16;
4113 }
4114 *pretend_size = offset + STACK_POINTER_OFFSET;
4115 }
4116 }
4117 \f
4118 static void
4119 spu_conditional_register_usage (void)
4120 {
4121 if (flag_pic)
4122 {
4123 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4124 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4125 }
4126 }
4127
4128 /* This is called any time we inspect the alignment of a register for
4129 addresses. */
4130 static int
4131 reg_aligned_for_addr (rtx x)
4132 {
4133 int regno =
4134 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4135 return REGNO_POINTER_ALIGN (regno) >= 128;
4136 }
4137
4138 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4139 into its SYMBOL_REF_FLAGS. */
4140 static void
4141 spu_encode_section_info (tree decl, rtx rtl, int first)
4142 {
4143 default_encode_section_info (decl, rtl, first);
4144
4145 /* If a variable has a forced alignment to < 16 bytes, mark it with
4146 SYMBOL_FLAG_ALIGN1. */
4147 if (TREE_CODE (decl) == VAR_DECL
4148 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4149 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4150 }
4151
4152 /* Return TRUE if we are certain the mem refers to a complete object
4153 which is both 16-byte aligned and padded to a 16-byte boundary. This
4154 would make it safe to store with a single instruction.
4155 We guarantee the alignment and padding for static objects by aligning
4156 all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
4157 FIXME: We currently cannot guarantee this for objects on the stack
4158 because assign_parm_setup_stack calls assign_stack_local with the
4159 alignment of the parameter mode and in that case the alignment never
4160 gets adjusted by LOCAL_ALIGNMENT. */
4161 static int
4162 store_with_one_insn_p (rtx mem)
4163 {
4164 machine_mode mode = GET_MODE (mem);
4165 rtx addr = XEXP (mem, 0);
4166 if (mode == BLKmode)
4167 return 0;
4168 if (GET_MODE_SIZE (mode) >= 16)
4169 return 1;
4170 /* Only static objects. */
4171 if (GET_CODE (addr) == SYMBOL_REF)
4172 {
4173 /* We use the associated declaration to make sure the access is
4174 referring to the whole object.
4175 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4176 if it is necessary. Will there be cases where one exists, and
4177 the other does not? Will there be cases where both exist, but
4178 have different types? */
4179 tree decl = MEM_EXPR (mem);
4180 if (decl
4181 && TREE_CODE (decl) == VAR_DECL
4182 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4183 return 1;
4184 decl = SYMBOL_REF_DECL (addr);
4185 if (decl
4186 && TREE_CODE (decl) == VAR_DECL
4187 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4188 return 1;
4189 }
4190 return 0;
4191 }
4192
4193 /* Return 1 when the address is not valid for a simple load and store as
4194 required by the '_mov*' patterns. We could make this less strict
4195 for loads, but we prefer mem's to look the same so they are more
4196 likely to be merged. */
4197 static int
4198 address_needs_split (rtx mem)
4199 {
4200 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4201 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4202 || !(store_with_one_insn_p (mem)
4203 || mem_is_padded_component_ref (mem))))
4204 return 1;
4205
4206 return 0;
4207 }
4208
4209 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4210 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4211 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4212
4213 /* MEM is known to be an __ea qualified memory access. Emit a call to
4214 fetch the ppu memory to local store, and return its address in local
4215 store. */
4216
4217 static void
4218 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4219 {
4220 if (is_store)
4221 {
4222 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4223 if (!cache_fetch_dirty)
4224 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4225 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4226 ea_addr, EAmode, ndirty, SImode);
4227 }
4228 else
4229 {
4230 if (!cache_fetch)
4231 cache_fetch = init_one_libfunc ("__cache_fetch");
4232 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4233 ea_addr, EAmode);
4234 }
4235 }
4236
4237 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4238 dirty bit marking, inline.
4239
4240 The cache control data structure is an array of
4241
4242 struct __cache_tag_array
4243 {
4244 unsigned int tag_lo[4];
4245 unsigned int tag_hi[4];
4246 void *data_pointer[4];
4247 int reserved[4];
4248 vector unsigned short dirty_bits[4];
4249 } */
4250
4251 static void
4252 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4253 {
4254 rtx ea_addr_si;
4255 HOST_WIDE_INT v;
4256 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4257 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4258 rtx index_mask = gen_reg_rtx (SImode);
4259 rtx tag_arr = gen_reg_rtx (Pmode);
4260 rtx splat_mask = gen_reg_rtx (TImode);
4261 rtx splat = gen_reg_rtx (V4SImode);
4262 rtx splat_hi = NULL_RTX;
4263 rtx tag_index = gen_reg_rtx (Pmode);
4264 rtx block_off = gen_reg_rtx (SImode);
4265 rtx tag_addr = gen_reg_rtx (Pmode);
4266 rtx tag = gen_reg_rtx (V4SImode);
4267 rtx cache_tag = gen_reg_rtx (V4SImode);
4268 rtx cache_tag_hi = NULL_RTX;
4269 rtx cache_ptrs = gen_reg_rtx (TImode);
4270 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4271 rtx tag_equal = gen_reg_rtx (V4SImode);
4272 rtx tag_equal_hi = NULL_RTX;
4273 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4274 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4275 rtx eq_index = gen_reg_rtx (SImode);
4276 rtx bcomp, hit_label, hit_ref, cont_label;
4277 rtx_insn *insn;
4278
4279 if (spu_ea_model != 32)
4280 {
4281 splat_hi = gen_reg_rtx (V4SImode);
4282 cache_tag_hi = gen_reg_rtx (V4SImode);
4283 tag_equal_hi = gen_reg_rtx (V4SImode);
4284 }
4285
4286 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4287 emit_move_insn (tag_arr, tag_arr_sym);
4288 v = 0x0001020300010203LL;
4289 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4290 ea_addr_si = ea_addr;
4291 if (spu_ea_model != 32)
4292 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4293
4294 /* tag_index = ea_addr & (tag_array_size - 128) */
4295 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4296
4297 /* splat ea_addr to all 4 slots. */
4298 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4299 /* Similarly for high 32 bits of ea_addr. */
4300 if (spu_ea_model != 32)
4301 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4302
4303 /* block_off = ea_addr & 127 */
4304 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4305
4306 /* tag_addr = tag_arr + tag_index */
4307 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4308
4309 /* Read cache tags. */
4310 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4311 if (spu_ea_model != 32)
4312 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4313 plus_constant (Pmode,
4314 tag_addr, 16)));
4315
4316 /* tag = ea_addr & -128 */
4317 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4318
4319 /* Read all four cache data pointers. */
4320 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4321 plus_constant (Pmode,
4322 tag_addr, 32)));
4323
4324 /* Compare tags. */
4325 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4326 if (spu_ea_model != 32)
4327 {
4328 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4329 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4330 }
4331
4332 /* At most one of the tags compare equal, so tag_equal has one
4333 32-bit slot set to all 1's, with the other slots all zero.
4334 gbb picks off low bit from each byte in the 128-bit registers,
4335 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4336 we have a hit. */
4337 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4338 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4339
4340 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4341 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4342
4343 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4344 (rotating eq_index mod 16 bytes). */
4345 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4346 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4347
4348 /* Add block offset to form final data address. */
4349 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4350
4351 /* Check that we did hit. */
4352 hit_label = gen_label_rtx ();
4353 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4354 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4355 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4356 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4357 hit_ref, pc_rtx)));
4358 /* Say that this branch is very likely to happen. */
4359 add_reg_br_prob_note (insn, profile_probability::very_likely ());
4360
4361 ea_load_store (mem, is_store, ea_addr, data_addr);
4362 cont_label = gen_label_rtx ();
4363 emit_jump_insn (gen_jump (cont_label));
4364 emit_barrier ();
4365
4366 emit_label (hit_label);
4367
4368 if (is_store)
4369 {
4370 HOST_WIDE_INT v_hi;
4371 rtx dirty_bits = gen_reg_rtx (TImode);
4372 rtx dirty_off = gen_reg_rtx (SImode);
4373 rtx dirty_128 = gen_reg_rtx (TImode);
4374 rtx neg_block_off = gen_reg_rtx (SImode);
4375
4376 /* Set up mask with one dirty bit per byte of the mem we are
4377 writing, starting from top bit. */
4378 v_hi = v = -1;
4379 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4380 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4381 {
4382 v_hi = v;
4383 v = 0;
4384 }
4385 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4386
4387 /* Form index into cache dirty_bits. eq_index is one of
4388 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4389 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4390 offset to each of the four dirty_bits elements. */
4391 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4392
4393 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4394
4395 /* Rotate bit mask to proper bit. */
4396 emit_insn (gen_negsi2 (neg_block_off, block_off));
4397 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4398 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4399
4400 /* Or in the new dirty bits. */
4401 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4402
4403 /* Store. */
4404 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4405 }
4406
4407 emit_label (cont_label);
4408 }
4409
4410 static rtx
4411 expand_ea_mem (rtx mem, bool is_store)
4412 {
4413 rtx ea_addr;
4414 rtx data_addr = gen_reg_rtx (Pmode);
4415 rtx new_mem;
4416
4417 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4418 if (optimize_size || optimize == 0)
4419 ea_load_store (mem, is_store, ea_addr, data_addr);
4420 else
4421 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4422
4423 if (ea_alias_set == -1)
4424 ea_alias_set = new_alias_set ();
4425
4426 /* We generate a new MEM RTX to refer to the copy of the data
4427 in the cache. We do not copy memory attributes (except the
4428 alignment) from the original MEM, as they may no longer apply
4429 to the cache copy. */
4430 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4431 set_mem_alias_set (new_mem, ea_alias_set);
4432 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4433
4434 return new_mem;
4435 }
4436
4437 int
4438 spu_expand_mov (rtx * ops, machine_mode mode)
4439 {
4440 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4441 {
4442 /* Perform the move in the destination SUBREG's inner mode. */
4443 ops[0] = SUBREG_REG (ops[0]);
4444 mode = GET_MODE (ops[0]);
4445 ops[1] = gen_lowpart_common (mode, ops[1]);
4446 gcc_assert (ops[1]);
4447 }
4448
4449 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4450 {
4451 rtx from = SUBREG_REG (ops[1]);
4452 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4453
4454 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4455 && GET_MODE_CLASS (imode) == MODE_INT
4456 && subreg_lowpart_p (ops[1]));
4457
4458 if (GET_MODE_SIZE (imode) < 4)
4459 imode = SImode;
4460 if (imode != GET_MODE (from))
4461 from = gen_rtx_SUBREG (imode, from, 0);
4462
4463 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4464 {
4465 enum insn_code icode = convert_optab_handler (trunc_optab,
4466 mode, imode);
4467 emit_insn (GEN_FCN (icode) (ops[0], from));
4468 }
4469 else
4470 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4471 return 1;
4472 }
4473
4474 /* At least one of the operands needs to be a register. */
4475 if ((reload_in_progress | reload_completed) == 0
4476 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4477 {
4478 rtx temp = force_reg (mode, ops[1]);
4479 emit_move_insn (ops[0], temp);
4480 return 1;
4481 }
4482 if (reload_in_progress || reload_completed)
4483 {
4484 if (CONSTANT_P (ops[1]))
4485 return spu_split_immediate (ops);
4486 return 0;
4487 }
4488
4489 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4490 extend them. */
4491 if (GET_CODE (ops[1]) == CONST_INT)
4492 {
4493 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4494 if (val != INTVAL (ops[1]))
4495 {
4496 emit_move_insn (ops[0], GEN_INT (val));
4497 return 1;
4498 }
4499 }
4500 if (MEM_P (ops[0]))
4501 {
4502 if (MEM_ADDR_SPACE (ops[0]))
4503 ops[0] = expand_ea_mem (ops[0], true);
4504 return spu_split_store (ops);
4505 }
4506 if (MEM_P (ops[1]))
4507 {
4508 if (MEM_ADDR_SPACE (ops[1]))
4509 ops[1] = expand_ea_mem (ops[1], false);
4510 return spu_split_load (ops);
4511 }
4512
4513 return 0;
4514 }
4515
4516 static void
4517 spu_convert_move (rtx dst, rtx src)
4518 {
4519 machine_mode mode = GET_MODE (dst);
4520 machine_mode int_mode = int_mode_for_mode (mode).require ();
4521 rtx reg;
4522 gcc_assert (GET_MODE (src) == TImode);
4523 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4524 emit_insn (gen_rtx_SET (reg,
4525 gen_rtx_TRUNCATE (int_mode,
4526 gen_rtx_LSHIFTRT (TImode, src,
4527 GEN_INT (int_mode == DImode ? 64 : 96)))));
4528 if (int_mode != mode)
4529 {
4530 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4531 emit_move_insn (dst, reg);
4532 }
4533 }
4534
4535 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4536 the address from SRC and SRC+16. Return a REG or CONST_INT that
4537 specifies how many bytes to rotate the loaded registers, plus any
4538 extra from EXTRA_ROTQBY. The address and rotate amounts are
4539 normalized to improve merging of loads and rotate computations. */
4540 static rtx
4541 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4542 {
4543 rtx addr = XEXP (src, 0);
4544 rtx p0, p1, rot, addr0, addr1;
4545 int rot_amt;
4546
4547 rot = 0;
4548 rot_amt = 0;
4549
4550 if (MEM_ALIGN (src) >= 128)
4551 /* Address is already aligned; simply perform a TImode load. */ ;
4552 else if (GET_CODE (addr) == PLUS)
4553 {
4554 /* 8 cases:
4555 aligned reg + aligned reg => lqx
4556 aligned reg + unaligned reg => lqx, rotqby
4557 aligned reg + aligned const => lqd
4558 aligned reg + unaligned const => lqd, rotqbyi
4559 unaligned reg + aligned reg => lqx, rotqby
4560 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4561 unaligned reg + aligned const => lqd, rotqby
4562 unaligned reg + unaligned const -> not allowed by legitimate address
4563 */
4564 p0 = XEXP (addr, 0);
4565 p1 = XEXP (addr, 1);
4566 if (!reg_aligned_for_addr (p0))
4567 {
4568 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4569 {
4570 rot = gen_reg_rtx (SImode);
4571 emit_insn (gen_addsi3 (rot, p0, p1));
4572 }
4573 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4574 {
4575 if (INTVAL (p1) > 0
4576 && REG_POINTER (p0)
4577 && INTVAL (p1) * BITS_PER_UNIT
4578 < REGNO_POINTER_ALIGN (REGNO (p0)))
4579 {
4580 rot = gen_reg_rtx (SImode);
4581 emit_insn (gen_addsi3 (rot, p0, p1));
4582 addr = p0;
4583 }
4584 else
4585 {
4586 rtx x = gen_reg_rtx (SImode);
4587 emit_move_insn (x, p1);
4588 if (!spu_arith_operand (p1, SImode))
4589 p1 = x;
4590 rot = gen_reg_rtx (SImode);
4591 emit_insn (gen_addsi3 (rot, p0, p1));
4592 addr = gen_rtx_PLUS (Pmode, p0, x);
4593 }
4594 }
4595 else
4596 rot = p0;
4597 }
4598 else
4599 {
4600 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4601 {
4602 rot_amt = INTVAL (p1) & 15;
4603 if (INTVAL (p1) & -16)
4604 {
4605 p1 = GEN_INT (INTVAL (p1) & -16);
4606 addr = gen_rtx_PLUS (SImode, p0, p1);
4607 }
4608 else
4609 addr = p0;
4610 }
4611 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4612 rot = p1;
4613 }
4614 }
4615 else if (REG_P (addr))
4616 {
4617 if (!reg_aligned_for_addr (addr))
4618 rot = addr;
4619 }
4620 else if (GET_CODE (addr) == CONST)
4621 {
4622 if (GET_CODE (XEXP (addr, 0)) == PLUS
4623 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4624 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4625 {
4626 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4627 if (rot_amt & -16)
4628 addr = gen_rtx_CONST (Pmode,
4629 gen_rtx_PLUS (Pmode,
4630 XEXP (XEXP (addr, 0), 0),
4631 GEN_INT (rot_amt & -16)));
4632 else
4633 addr = XEXP (XEXP (addr, 0), 0);
4634 }
4635 else
4636 {
4637 rot = gen_reg_rtx (Pmode);
4638 emit_move_insn (rot, addr);
4639 }
4640 }
4641 else if (GET_CODE (addr) == CONST_INT)
4642 {
4643 rot_amt = INTVAL (addr);
4644 addr = GEN_INT (rot_amt & -16);
4645 }
4646 else if (!ALIGNED_SYMBOL_REF_P (addr))
4647 {
4648 rot = gen_reg_rtx (Pmode);
4649 emit_move_insn (rot, addr);
4650 }
4651
4652 rot_amt += extra_rotby;
4653
4654 rot_amt &= 15;
4655
4656 if (rot && rot_amt)
4657 {
4658 rtx x = gen_reg_rtx (SImode);
4659 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4660 rot = x;
4661 rot_amt = 0;
4662 }
4663 if (!rot && rot_amt)
4664 rot = GEN_INT (rot_amt);
4665
4666 addr0 = copy_rtx (addr);
4667 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4668 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4669
4670 if (dst1)
4671 {
4672 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4673 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4674 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4675 }
4676
4677 return rot;
4678 }
4679
4680 int
4681 spu_split_load (rtx * ops)
4682 {
4683 machine_mode mode = GET_MODE (ops[0]);
4684 rtx addr, load, rot;
4685 int rot_amt;
4686
4687 if (GET_MODE_SIZE (mode) >= 16)
4688 return 0;
4689
4690 addr = XEXP (ops[1], 0);
4691 gcc_assert (GET_CODE (addr) != AND);
4692
4693 if (!address_needs_split (ops[1]))
4694 {
4695 ops[1] = change_address (ops[1], TImode, addr);
4696 load = gen_reg_rtx (TImode);
4697 emit_insn (gen__movti (load, ops[1]));
4698 spu_convert_move (ops[0], load);
4699 return 1;
4700 }
4701
4702 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4703
4704 load = gen_reg_rtx (TImode);
4705 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4706
4707 if (rot)
4708 emit_insn (gen_rotqby_ti (load, load, rot));
4709
4710 spu_convert_move (ops[0], load);
4711 return 1;
4712 }
4713
4714 int
4715 spu_split_store (rtx * ops)
4716 {
4717 machine_mode mode = GET_MODE (ops[0]);
4718 rtx reg;
4719 rtx addr, p0, p1, p1_lo, smem;
4720 int aform;
4721 int scalar;
4722
4723 if (GET_MODE_SIZE (mode) >= 16)
4724 return 0;
4725
4726 addr = XEXP (ops[0], 0);
4727 gcc_assert (GET_CODE (addr) != AND);
4728
4729 if (!address_needs_split (ops[0]))
4730 {
4731 reg = gen_reg_rtx (TImode);
4732 emit_insn (gen_spu_convert (reg, ops[1]));
4733 ops[0] = change_address (ops[0], TImode, addr);
4734 emit_move_insn (ops[0], reg);
4735 return 1;
4736 }
4737
4738 if (GET_CODE (addr) == PLUS)
4739 {
4740 /* 8 cases:
4741 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4742 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4743 aligned reg + aligned const => lqd, c?d, shuf, stqx
4744 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4745 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4746 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4747 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4748 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4749 */
4750 aform = 0;
4751 p0 = XEXP (addr, 0);
4752 p1 = p1_lo = XEXP (addr, 1);
4753 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4754 {
4755 p1_lo = GEN_INT (INTVAL (p1) & 15);
4756 if (reg_aligned_for_addr (p0))
4757 {
4758 p1 = GEN_INT (INTVAL (p1) & -16);
4759 if (p1 == const0_rtx)
4760 addr = p0;
4761 else
4762 addr = gen_rtx_PLUS (SImode, p0, p1);
4763 }
4764 else
4765 {
4766 rtx x = gen_reg_rtx (SImode);
4767 emit_move_insn (x, p1);
4768 addr = gen_rtx_PLUS (SImode, p0, x);
4769 }
4770 }
4771 }
4772 else if (REG_P (addr))
4773 {
4774 aform = 0;
4775 p0 = addr;
4776 p1 = p1_lo = const0_rtx;
4777 }
4778 else
4779 {
4780 aform = 1;
4781 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4782 p1 = 0; /* aform doesn't use p1 */
4783 p1_lo = addr;
4784 if (ALIGNED_SYMBOL_REF_P (addr))
4785 p1_lo = const0_rtx;
4786 else if (GET_CODE (addr) == CONST
4787 && GET_CODE (XEXP (addr, 0)) == PLUS
4788 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4789 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4790 {
4791 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4792 if ((v & -16) != 0)
4793 addr = gen_rtx_CONST (Pmode,
4794 gen_rtx_PLUS (Pmode,
4795 XEXP (XEXP (addr, 0), 0),
4796 GEN_INT (v & -16)));
4797 else
4798 addr = XEXP (XEXP (addr, 0), 0);
4799 p1_lo = GEN_INT (v & 15);
4800 }
4801 else if (GET_CODE (addr) == CONST_INT)
4802 {
4803 p1_lo = GEN_INT (INTVAL (addr) & 15);
4804 addr = GEN_INT (INTVAL (addr) & -16);
4805 }
4806 else
4807 {
4808 p1_lo = gen_reg_rtx (SImode);
4809 emit_move_insn (p1_lo, addr);
4810 }
4811 }
4812
4813 gcc_assert (aform == 0 || aform == 1);
4814 reg = gen_reg_rtx (TImode);
4815
4816 scalar = store_with_one_insn_p (ops[0]);
4817 if (!scalar)
4818 {
4819 /* We could copy the flags from the ops[0] MEM to mem here,
4820 We don't because we want this load to be optimized away if
4821 possible, and copying the flags will prevent that in certain
4822 cases, e.g. consider the volatile flag. */
4823
4824 rtx pat = gen_reg_rtx (TImode);
4825 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4826 set_mem_alias_set (lmem, 0);
4827 emit_insn (gen_movti (reg, lmem));
4828
4829 if (!p0 || reg_aligned_for_addr (p0))
4830 p0 = stack_pointer_rtx;
4831 if (!p1_lo)
4832 p1_lo = const0_rtx;
4833
4834 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4835 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4836 }
4837 else
4838 {
4839 if (GET_CODE (ops[1]) == REG)
4840 emit_insn (gen_spu_convert (reg, ops[1]));
4841 else if (GET_CODE (ops[1]) == SUBREG)
4842 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4843 else
4844 abort ();
4845 }
4846
4847 if (GET_MODE_SIZE (mode) < 4 && scalar)
4848 emit_insn (gen_ashlti3
4849 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4850
4851 smem = change_address (ops[0], TImode, copy_rtx (addr));
4852 /* We can't use the previous alias set because the memory has changed
4853 size and can potentially overlap objects of other types. */
4854 set_mem_alias_set (smem, 0);
4855
4856 emit_insn (gen_movti (smem, reg));
4857 return 1;
4858 }
4859
4860 /* Return TRUE if X is MEM which is a struct member reference
4861 and the member can safely be loaded and stored with a single
4862 instruction because it is padded. */
4863 static int
4864 mem_is_padded_component_ref (rtx x)
4865 {
4866 tree t = MEM_EXPR (x);
4867 tree r;
4868 if (!t || TREE_CODE (t) != COMPONENT_REF)
4869 return 0;
4870 t = TREE_OPERAND (t, 1);
4871 if (!t || TREE_CODE (t) != FIELD_DECL
4872 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4873 return 0;
4874 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4875 r = DECL_FIELD_CONTEXT (t);
4876 if (!r || TREE_CODE (r) != RECORD_TYPE)
4877 return 0;
4878 /* Make sure they are the same mode */
4879 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4880 return 0;
4881 /* If there are no following fields then the field alignment assures
4882 the structure is padded to the alignment which means this field is
4883 padded too. */
4884 if (TREE_CHAIN (t) == 0)
4885 return 1;
4886 /* If the following field is also aligned then this field will be
4887 padded. */
4888 t = TREE_CHAIN (t);
4889 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4890 return 1;
4891 return 0;
4892 }
4893
4894 /* Parse the -mfixed-range= option string. */
4895 static void
4896 fix_range (const char *const_str)
4897 {
4898 int i, first, last;
4899 char *str, *dash, *comma;
4900
4901 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4902 REG2 are either register names or register numbers. The effect
4903 of this option is to mark the registers in the range from REG1 to
4904 REG2 as ``fixed'' so they won't be used by the compiler. */
4905
4906 i = strlen (const_str);
4907 str = (char *) alloca (i + 1);
4908 memcpy (str, const_str, i + 1);
4909
4910 while (1)
4911 {
4912 dash = strchr (str, '-');
4913 if (!dash)
4914 {
4915 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
4916 return;
4917 }
4918 *dash = '\0';
4919 comma = strchr (dash + 1, ',');
4920 if (comma)
4921 *comma = '\0';
4922
4923 first = decode_reg_name (str);
4924 if (first < 0)
4925 {
4926 warning (0, "unknown register name: %s", str);
4927 return;
4928 }
4929
4930 last = decode_reg_name (dash + 1);
4931 if (last < 0)
4932 {
4933 warning (0, "unknown register name: %s", dash + 1);
4934 return;
4935 }
4936
4937 *dash = '-';
4938
4939 if (first > last)
4940 {
4941 warning (0, "%s-%s is an empty range", str, dash + 1);
4942 return;
4943 }
4944
4945 for (i = first; i <= last; ++i)
4946 fixed_regs[i] = call_used_regs[i] = 1;
4947
4948 if (!comma)
4949 break;
4950
4951 *comma = ',';
4952 str = comma + 1;
4953 }
4954 }
4955
4956 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4957 can be generated using the fsmbi instruction. */
4958 int
4959 fsmbi_const_p (rtx x)
4960 {
4961 if (CONSTANT_P (x))
4962 {
4963 /* We can always choose TImode for CONST_INT because the high bits
4964 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4965 enum immediate_class c = classify_immediate (x, TImode);
4966 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4967 }
4968 return 0;
4969 }
4970
4971 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4972 can be generated using the cbd, chd, cwd or cdd instruction. */
4973 int
4974 cpat_const_p (rtx x, machine_mode mode)
4975 {
4976 if (CONSTANT_P (x))
4977 {
4978 enum immediate_class c = classify_immediate (x, mode);
4979 return c == IC_CPAT;
4980 }
4981 return 0;
4982 }
4983
4984 rtx
4985 gen_cpat_const (rtx * ops)
4986 {
4987 unsigned char dst[16];
4988 int i, offset, shift, isize;
4989 if (GET_CODE (ops[3]) != CONST_INT
4990 || GET_CODE (ops[2]) != CONST_INT
4991 || (GET_CODE (ops[1]) != CONST_INT
4992 && GET_CODE (ops[1]) != REG))
4993 return 0;
4994 if (GET_CODE (ops[1]) == REG
4995 && (!REG_POINTER (ops[1])
4996 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4997 return 0;
4998
4999 for (i = 0; i < 16; i++)
5000 dst[i] = i + 16;
5001 isize = INTVAL (ops[3]);
5002 if (isize == 1)
5003 shift = 3;
5004 else if (isize == 2)
5005 shift = 2;
5006 else
5007 shift = 0;
5008 offset = (INTVAL (ops[2]) +
5009 (GET_CODE (ops[1]) ==
5010 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5011 for (i = 0; i < isize; i++)
5012 dst[offset + i] = i + shift;
5013 return array_to_constant (TImode, dst);
5014 }
5015
5016 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5017 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5018 than 16 bytes, the value is repeated across the rest of the array. */
5019 void
5020 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5021 {
5022 HOST_WIDE_INT val;
5023 int i, j, first;
5024
5025 memset (arr, 0, 16);
5026 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5027 if (GET_CODE (x) == CONST_INT
5028 || (GET_CODE (x) == CONST_DOUBLE
5029 && (mode == SFmode || mode == DFmode)))
5030 {
5031 gcc_assert (mode != VOIDmode && mode != BLKmode);
5032
5033 if (GET_CODE (x) == CONST_DOUBLE)
5034 val = const_double_to_hwint (x);
5035 else
5036 val = INTVAL (x);
5037 first = GET_MODE_SIZE (mode) - 1;
5038 for (i = first; i >= 0; i--)
5039 {
5040 arr[i] = val & 0xff;
5041 val >>= 8;
5042 }
5043 /* Splat the constant across the whole array. */
5044 for (j = 0, i = first + 1; i < 16; i++)
5045 {
5046 arr[i] = arr[j];
5047 j = (j == first) ? 0 : j + 1;
5048 }
5049 }
5050 else if (GET_CODE (x) == CONST_DOUBLE)
5051 {
5052 val = CONST_DOUBLE_LOW (x);
5053 for (i = 15; i >= 8; i--)
5054 {
5055 arr[i] = val & 0xff;
5056 val >>= 8;
5057 }
5058 val = CONST_DOUBLE_HIGH (x);
5059 for (i = 7; i >= 0; i--)
5060 {
5061 arr[i] = val & 0xff;
5062 val >>= 8;
5063 }
5064 }
5065 else if (GET_CODE (x) == CONST_VECTOR)
5066 {
5067 int units;
5068 rtx elt;
5069 mode = GET_MODE_INNER (mode);
5070 units = CONST_VECTOR_NUNITS (x);
5071 for (i = 0; i < units; i++)
5072 {
5073 elt = CONST_VECTOR_ELT (x, i);
5074 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5075 {
5076 if (GET_CODE (elt) == CONST_DOUBLE)
5077 val = const_double_to_hwint (elt);
5078 else
5079 val = INTVAL (elt);
5080 first = GET_MODE_SIZE (mode) - 1;
5081 if (first + i * GET_MODE_SIZE (mode) > 16)
5082 abort ();
5083 for (j = first; j >= 0; j--)
5084 {
5085 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5086 val >>= 8;
5087 }
5088 }
5089 }
5090 }
5091 else
5092 gcc_unreachable();
5093 }
5094
5095 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5096 smaller than 16 bytes, use the bytes that would represent that value
5097 in a register, e.g., for QImode return the value of arr[3]. */
5098 rtx
5099 array_to_constant (machine_mode mode, const unsigned char arr[16])
5100 {
5101 machine_mode inner_mode;
5102 rtvec v;
5103 int units, size, i, j, k;
5104 HOST_WIDE_INT val;
5105
5106 if (GET_MODE_CLASS (mode) == MODE_INT
5107 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5108 {
5109 j = GET_MODE_SIZE (mode);
5110 i = j < 4 ? 4 - j : 0;
5111 for (val = 0; i < j; i++)
5112 val = (val << 8) | arr[i];
5113 val = trunc_int_for_mode (val, mode);
5114 return GEN_INT (val);
5115 }
5116
5117 if (mode == TImode)
5118 {
5119 HOST_WIDE_INT high;
5120 for (i = high = 0; i < 8; i++)
5121 high = (high << 8) | arr[i];
5122 for (i = 8, val = 0; i < 16; i++)
5123 val = (val << 8) | arr[i];
5124 return immed_double_const (val, high, TImode);
5125 }
5126 if (mode == SFmode)
5127 {
5128 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5129 val = trunc_int_for_mode (val, SImode);
5130 return hwint_to_const_double (SFmode, val);
5131 }
5132 if (mode == DFmode)
5133 {
5134 for (i = 0, val = 0; i < 8; i++)
5135 val = (val << 8) | arr[i];
5136 return hwint_to_const_double (DFmode, val);
5137 }
5138
5139 if (!VECTOR_MODE_P (mode))
5140 abort ();
5141
5142 units = GET_MODE_NUNITS (mode);
5143 size = GET_MODE_UNIT_SIZE (mode);
5144 inner_mode = GET_MODE_INNER (mode);
5145 v = rtvec_alloc (units);
5146
5147 for (k = i = 0; i < units; ++i)
5148 {
5149 val = 0;
5150 for (j = 0; j < size; j++, k++)
5151 val = (val << 8) | arr[k];
5152
5153 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5154 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5155 else
5156 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5157 }
5158 if (k > 16)
5159 abort ();
5160
5161 return gen_rtx_CONST_VECTOR (mode, v);
5162 }
5163
5164 static void
5165 reloc_diagnostic (rtx x)
5166 {
5167 tree decl = 0;
5168 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5169 return;
5170
5171 if (GET_CODE (x) == SYMBOL_REF)
5172 decl = SYMBOL_REF_DECL (x);
5173 else if (GET_CODE (x) == CONST
5174 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5175 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5176
5177 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5178 if (decl && !DECL_P (decl))
5179 decl = 0;
5180
5181 /* The decl could be a string constant. */
5182 if (decl && DECL_P (decl))
5183 {
5184 location_t loc;
5185 /* We use last_assemble_variable_decl to get line information. It's
5186 not always going to be right and might not even be close, but will
5187 be right for the more common cases. */
5188 if (!last_assemble_variable_decl || in_section == ctors_section)
5189 loc = DECL_SOURCE_LOCATION (decl);
5190 else
5191 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5192
5193 if (TARGET_WARN_RELOC)
5194 warning_at (loc, 0,
5195 "creating run-time relocation for %qD", decl);
5196 else
5197 error_at (loc,
5198 "creating run-time relocation for %qD", decl);
5199 }
5200 else
5201 {
5202 if (TARGET_WARN_RELOC)
5203 warning_at (input_location, 0, "creating run-time relocation");
5204 else
5205 error_at (input_location, "creating run-time relocation");
5206 }
5207 }
5208
5209 /* Hook into assemble_integer so we can generate an error for run-time
5210 relocations. The SPU ABI disallows them. */
5211 static bool
5212 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5213 {
5214 /* By default run-time relocations aren't supported, but we allow them
5215 in case users support it in their own run-time loader. And we provide
5216 a warning for those users that don't. */
5217 if ((GET_CODE (x) == SYMBOL_REF)
5218 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5219 reloc_diagnostic (x);
5220
5221 return default_assemble_integer (x, size, aligned_p);
5222 }
5223
5224 static void
5225 spu_asm_globalize_label (FILE * file, const char *name)
5226 {
5227 fputs ("\t.global\t", file);
5228 assemble_name (file, name);
5229 fputs ("\n", file);
5230 }
5231
5232 static bool
5233 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5234 int opno ATTRIBUTE_UNUSED, int *total,
5235 bool speed ATTRIBUTE_UNUSED)
5236 {
5237 int code = GET_CODE (x);
5238 int cost = COSTS_N_INSNS (2);
5239
5240 /* Folding to a CONST_VECTOR will use extra space but there might
5241 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5242 only if it allows us to fold away multiple insns. Changing the cost
5243 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5244 because this cost will only be compared against a single insn.
5245 if (code == CONST_VECTOR)
5246 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5247 */
5248
5249 /* Use defaults for float operations. Not accurate but good enough. */
5250 if (mode == DFmode)
5251 {
5252 *total = COSTS_N_INSNS (13);
5253 return true;
5254 }
5255 if (mode == SFmode)
5256 {
5257 *total = COSTS_N_INSNS (6);
5258 return true;
5259 }
5260 switch (code)
5261 {
5262 case CONST_INT:
5263 if (satisfies_constraint_K (x))
5264 *total = 0;
5265 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5266 *total = COSTS_N_INSNS (1);
5267 else
5268 *total = COSTS_N_INSNS (3);
5269 return true;
5270
5271 case CONST:
5272 *total = COSTS_N_INSNS (3);
5273 return true;
5274
5275 case LABEL_REF:
5276 case SYMBOL_REF:
5277 *total = COSTS_N_INSNS (0);
5278 return true;
5279
5280 case CONST_DOUBLE:
5281 *total = COSTS_N_INSNS (5);
5282 return true;
5283
5284 case FLOAT_EXTEND:
5285 case FLOAT_TRUNCATE:
5286 case FLOAT:
5287 case UNSIGNED_FLOAT:
5288 case FIX:
5289 case UNSIGNED_FIX:
5290 *total = COSTS_N_INSNS (7);
5291 return true;
5292
5293 case PLUS:
5294 if (mode == TImode)
5295 {
5296 *total = COSTS_N_INSNS (9);
5297 return true;
5298 }
5299 break;
5300
5301 case MULT:
5302 cost =
5303 GET_CODE (XEXP (x, 0)) ==
5304 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5305 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5306 {
5307 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5308 {
5309 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5310 cost = COSTS_N_INSNS (14);
5311 if ((val & 0xffff) == 0)
5312 cost = COSTS_N_INSNS (9);
5313 else if (val > 0 && val < 0x10000)
5314 cost = COSTS_N_INSNS (11);
5315 }
5316 }
5317 *total = cost;
5318 return true;
5319 case DIV:
5320 case UDIV:
5321 case MOD:
5322 case UMOD:
5323 *total = COSTS_N_INSNS (20);
5324 return true;
5325 case ROTATE:
5326 case ROTATERT:
5327 case ASHIFT:
5328 case ASHIFTRT:
5329 case LSHIFTRT:
5330 *total = COSTS_N_INSNS (4);
5331 return true;
5332 case UNSPEC:
5333 if (XINT (x, 1) == UNSPEC_CONVERT)
5334 *total = COSTS_N_INSNS (0);
5335 else
5336 *total = COSTS_N_INSNS (4);
5337 return true;
5338 }
5339 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5340 if (GET_MODE_CLASS (mode) == MODE_INT
5341 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5342 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5343 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5344 *total = cost;
5345 return true;
5346 }
5347
5348 static scalar_int_mode
5349 spu_unwind_word_mode (void)
5350 {
5351 return SImode;
5352 }
5353
5354 /* Decide whether we can make a sibling call to a function. DECL is the
5355 declaration of the function being targeted by the call and EXP is the
5356 CALL_EXPR representing the call. */
5357 static bool
5358 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5359 {
5360 return decl && !TARGET_LARGE_MEM;
5361 }
5362
5363 /* We need to correctly update the back chain pointer and the Available
5364 Stack Size (which is in the second slot of the sp register.) */
5365 void
5366 spu_allocate_stack (rtx op0, rtx op1)
5367 {
5368 HOST_WIDE_INT v;
5369 rtx chain = gen_reg_rtx (V4SImode);
5370 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5371 rtx sp = gen_reg_rtx (V4SImode);
5372 rtx splatted = gen_reg_rtx (V4SImode);
5373 rtx pat = gen_reg_rtx (TImode);
5374
5375 /* copy the back chain so we can save it back again. */
5376 emit_move_insn (chain, stack_bot);
5377
5378 op1 = force_reg (SImode, op1);
5379
5380 v = 0x1020300010203ll;
5381 emit_move_insn (pat, immed_double_const (v, v, TImode));
5382 emit_insn (gen_shufb (splatted, op1, op1, pat));
5383
5384 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5385 emit_insn (gen_subv4si3 (sp, sp, splatted));
5386
5387 if (flag_stack_check || flag_stack_clash_protection)
5388 {
5389 rtx avail = gen_reg_rtx(SImode);
5390 rtx result = gen_reg_rtx(SImode);
5391 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5392 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5393 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5394 }
5395
5396 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5397
5398 emit_move_insn (stack_bot, chain);
5399
5400 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5401 }
5402
5403 void
5404 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5405 {
5406 static unsigned char arr[16] =
5407 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5408 rtx temp = gen_reg_rtx (SImode);
5409 rtx temp2 = gen_reg_rtx (SImode);
5410 rtx temp3 = gen_reg_rtx (V4SImode);
5411 rtx temp4 = gen_reg_rtx (V4SImode);
5412 rtx pat = gen_reg_rtx (TImode);
5413 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5414
5415 /* Restore the backchain from the first word, sp from the second. */
5416 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5417 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5418
5419 emit_move_insn (pat, array_to_constant (TImode, arr));
5420
5421 /* Compute Available Stack Size for sp */
5422 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5423 emit_insn (gen_shufb (temp3, temp, temp, pat));
5424
5425 /* Compute Available Stack Size for back chain */
5426 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5427 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5428 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5429
5430 emit_insn (gen_addv4si3 (sp, sp, temp3));
5431 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5432 }
5433
5434 static void
5435 spu_init_libfuncs (void)
5436 {
5437 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5438 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5439 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5440 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5441 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5442 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5443 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5444 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5445 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5446 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5447 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5448 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5449
5450 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5451 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5452
5453 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5454 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5455 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5456 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5457 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5458 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5459 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5460 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5461 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5462 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5463 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5464 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5465
5466 set_optab_libfunc (smul_optab, TImode, "__multi3");
5467 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5468 set_optab_libfunc (smod_optab, TImode, "__modti3");
5469 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5470 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5471 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5472 }
5473
5474 /* Make a subreg, stripping any existing subreg. We could possibly just
5475 call simplify_subreg, but in this case we know what we want. */
5476 rtx
5477 spu_gen_subreg (machine_mode mode, rtx x)
5478 {
5479 if (GET_CODE (x) == SUBREG)
5480 x = SUBREG_REG (x);
5481 if (GET_MODE (x) == mode)
5482 return x;
5483 return gen_rtx_SUBREG (mode, x, 0);
5484 }
5485
5486 static bool
5487 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5488 {
5489 return (TYPE_MODE (type) == BLKmode
5490 && ((type) == 0
5491 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5492 || int_size_in_bytes (type) >
5493 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5494 }
5495 \f
5496 /* Create the built-in types and functions */
5497
5498 enum spu_function_code
5499 {
5500 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5501 #include "spu-builtins.def"
5502 #undef DEF_BUILTIN
5503 NUM_SPU_BUILTINS
5504 };
5505
5506 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5507
5508 struct spu_builtin_description spu_builtins[] = {
5509 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5510 {fcode, icode, name, type, params},
5511 #include "spu-builtins.def"
5512 #undef DEF_BUILTIN
5513 };
5514
5515 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5516
5517 /* Returns the spu builtin decl for CODE. */
5518
5519 static tree
5520 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5521 {
5522 if (code >= NUM_SPU_BUILTINS)
5523 return error_mark_node;
5524
5525 return spu_builtin_decls[code];
5526 }
5527
5528
5529 static void
5530 spu_init_builtins (void)
5531 {
5532 struct spu_builtin_description *d;
5533 unsigned int i;
5534
5535 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5536 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5537 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5538 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5539 V4SF_type_node = build_vector_type (float_type_node, 4);
5540 V2DF_type_node = build_vector_type (double_type_node, 2);
5541
5542 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5543 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5544 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5545 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5546
5547 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5548
5549 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5561
5562 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5563 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5564 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5566 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5567 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5568 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5569 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5570
5571 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5572 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5573
5574 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5575
5576 spu_builtin_types[SPU_BTI_PTR] =
5577 build_pointer_type (build_qualified_type
5578 (void_type_node,
5579 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5580
5581 /* For each builtin we build a new prototype. The tree code will make
5582 sure nodes are shared. */
5583 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5584 {
5585 tree p;
5586 char name[64]; /* build_function will make a copy. */
5587 int parm;
5588
5589 if (d->name == 0)
5590 continue;
5591
5592 /* Find last parm. */
5593 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5594 ;
5595
5596 p = void_list_node;
5597 while (parm > 1)
5598 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5599
5600 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5601
5602 sprintf (name, "__builtin_%s", d->name);
5603 spu_builtin_decls[i] =
5604 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5605 if (d->fcode == SPU_MASK_FOR_LOAD)
5606 TREE_READONLY (spu_builtin_decls[i]) = 1;
5607
5608 /* These builtins don't throw. */
5609 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5610 }
5611 }
5612
5613 void
5614 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5615 {
5616 static unsigned char arr[16] =
5617 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5618
5619 rtx temp = gen_reg_rtx (Pmode);
5620 rtx temp2 = gen_reg_rtx (V4SImode);
5621 rtx temp3 = gen_reg_rtx (V4SImode);
5622 rtx pat = gen_reg_rtx (TImode);
5623 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5624
5625 emit_move_insn (pat, array_to_constant (TImode, arr));
5626
5627 /* Restore the sp. */
5628 emit_move_insn (temp, op1);
5629 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5630
5631 /* Compute available stack size for sp. */
5632 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5633 emit_insn (gen_shufb (temp3, temp, temp, pat));
5634
5635 emit_insn (gen_addv4si3 (sp, sp, temp3));
5636 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5637 }
5638
5639 int
5640 spu_safe_dma (HOST_WIDE_INT channel)
5641 {
5642 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5643 }
5644
5645 void
5646 spu_builtin_splats (rtx ops[])
5647 {
5648 machine_mode mode = GET_MODE (ops[0]);
5649 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5650 {
5651 unsigned char arr[16];
5652 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5653 emit_move_insn (ops[0], array_to_constant (mode, arr));
5654 }
5655 else
5656 {
5657 rtx reg = gen_reg_rtx (TImode);
5658 rtx shuf;
5659 if (GET_CODE (ops[1]) != REG
5660 && GET_CODE (ops[1]) != SUBREG)
5661 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5662 switch (mode)
5663 {
5664 case E_V2DImode:
5665 case E_V2DFmode:
5666 shuf =
5667 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5668 TImode);
5669 break;
5670 case E_V4SImode:
5671 case E_V4SFmode:
5672 shuf =
5673 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5674 TImode);
5675 break;
5676 case E_V8HImode:
5677 shuf =
5678 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5679 TImode);
5680 break;
5681 case E_V16QImode:
5682 shuf =
5683 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5684 TImode);
5685 break;
5686 default:
5687 abort ();
5688 }
5689 emit_move_insn (reg, shuf);
5690 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5691 }
5692 }
5693
5694 void
5695 spu_builtin_extract (rtx ops[])
5696 {
5697 machine_mode mode;
5698 rtx rot, from, tmp;
5699
5700 mode = GET_MODE (ops[1]);
5701
5702 if (GET_CODE (ops[2]) == CONST_INT)
5703 {
5704 switch (mode)
5705 {
5706 case E_V16QImode:
5707 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5708 break;
5709 case E_V8HImode:
5710 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5711 break;
5712 case E_V4SFmode:
5713 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5714 break;
5715 case E_V4SImode:
5716 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5717 break;
5718 case E_V2DImode:
5719 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5720 break;
5721 case E_V2DFmode:
5722 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5723 break;
5724 default:
5725 abort ();
5726 }
5727 return;
5728 }
5729
5730 from = spu_gen_subreg (TImode, ops[1]);
5731 rot = gen_reg_rtx (TImode);
5732 tmp = gen_reg_rtx (SImode);
5733
5734 switch (mode)
5735 {
5736 case E_V16QImode:
5737 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5738 break;
5739 case E_V8HImode:
5740 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5741 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5742 break;
5743 case E_V4SFmode:
5744 case E_V4SImode:
5745 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5746 break;
5747 case E_V2DImode:
5748 case E_V2DFmode:
5749 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5750 break;
5751 default:
5752 abort ();
5753 }
5754 emit_insn (gen_rotqby_ti (rot, from, tmp));
5755
5756 emit_insn (gen_spu_convert (ops[0], rot));
5757 }
5758
5759 void
5760 spu_builtin_insert (rtx ops[])
5761 {
5762 machine_mode mode = GET_MODE (ops[0]);
5763 machine_mode imode = GET_MODE_INNER (mode);
5764 rtx mask = gen_reg_rtx (TImode);
5765 rtx offset;
5766
5767 if (GET_CODE (ops[3]) == CONST_INT)
5768 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5769 else
5770 {
5771 offset = gen_reg_rtx (SImode);
5772 emit_insn (gen_mulsi3
5773 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5774 }
5775 emit_insn (gen_cpat
5776 (mask, stack_pointer_rtx, offset,
5777 GEN_INT (GET_MODE_SIZE (imode))));
5778 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5779 }
5780
5781 void
5782 spu_builtin_promote (rtx ops[])
5783 {
5784 machine_mode mode, imode;
5785 rtx rot, from, offset;
5786 HOST_WIDE_INT pos;
5787
5788 mode = GET_MODE (ops[0]);
5789 imode = GET_MODE_INNER (mode);
5790
5791 from = gen_reg_rtx (TImode);
5792 rot = spu_gen_subreg (TImode, ops[0]);
5793
5794 emit_insn (gen_spu_convert (from, ops[1]));
5795
5796 if (GET_CODE (ops[2]) == CONST_INT)
5797 {
5798 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5799 if (GET_MODE_SIZE (imode) < 4)
5800 pos += 4 - GET_MODE_SIZE (imode);
5801 offset = GEN_INT (pos & 15);
5802 }
5803 else
5804 {
5805 offset = gen_reg_rtx (SImode);
5806 switch (mode)
5807 {
5808 case E_V16QImode:
5809 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5810 break;
5811 case E_V8HImode:
5812 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5813 emit_insn (gen_addsi3 (offset, offset, offset));
5814 break;
5815 case E_V4SFmode:
5816 case E_V4SImode:
5817 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5818 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5819 break;
5820 case E_V2DImode:
5821 case E_V2DFmode:
5822 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5823 break;
5824 default:
5825 abort ();
5826 }
5827 }
5828 emit_insn (gen_rotqby_ti (rot, from, offset));
5829 }
5830
5831 static void
5832 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5833 {
5834 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5835 rtx shuf = gen_reg_rtx (V4SImode);
5836 rtx insn = gen_reg_rtx (V4SImode);
5837 rtx shufc;
5838 rtx insnc;
5839 rtx mem;
5840
5841 fnaddr = force_reg (SImode, fnaddr);
5842 cxt = force_reg (SImode, cxt);
5843
5844 if (TARGET_LARGE_MEM)
5845 {
5846 rtx rotl = gen_reg_rtx (V4SImode);
5847 rtx mask = gen_reg_rtx (V4SImode);
5848 rtx bi = gen_reg_rtx (SImode);
5849 static unsigned char const shufa[16] = {
5850 2, 3, 0, 1, 18, 19, 16, 17,
5851 0, 1, 2, 3, 16, 17, 18, 19
5852 };
5853 static unsigned char const insna[16] = {
5854 0x41, 0, 0, 79,
5855 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5856 0x60, 0x80, 0, 79,
5857 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5858 };
5859
5860 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5861 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5862
5863 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5864 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5865 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5866 emit_insn (gen_selb (insn, insnc, rotl, mask));
5867
5868 mem = adjust_address (m_tramp, V4SImode, 0);
5869 emit_move_insn (mem, insn);
5870
5871 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5872 mem = adjust_address (m_tramp, Pmode, 16);
5873 emit_move_insn (mem, bi);
5874 }
5875 else
5876 {
5877 rtx scxt = gen_reg_rtx (SImode);
5878 rtx sfnaddr = gen_reg_rtx (SImode);
5879 static unsigned char const insna[16] = {
5880 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5881 0x30, 0, 0, 0,
5882 0, 0, 0, 0,
5883 0, 0, 0, 0
5884 };
5885
5886 shufc = gen_reg_rtx (TImode);
5887 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5888
5889 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5890 fits 18 bits and the last 4 are zeros. This will be true if
5891 the stack pointer is initialized to 0x3fff0 at program start,
5892 otherwise the ila instruction will be garbage. */
5893
5894 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5895 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5896 emit_insn (gen_cpat
5897 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5898 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5899 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5900
5901 mem = adjust_address (m_tramp, V4SImode, 0);
5902 emit_move_insn (mem, insn);
5903 }
5904 emit_insn (gen_sync ());
5905 }
5906
5907 static bool
5908 spu_warn_func_return (tree decl)
5909 {
5910 /* Naked functions are implemented entirely in assembly, including the
5911 return sequence, so suppress warnings about this. */
5912 return !spu_naked_function_p (decl);
5913 }
5914
5915 void
5916 spu_expand_sign_extend (rtx ops[])
5917 {
5918 unsigned char arr[16];
5919 rtx pat = gen_reg_rtx (TImode);
5920 rtx sign, c;
5921 int i, last;
5922 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5923 if (GET_MODE (ops[1]) == QImode)
5924 {
5925 sign = gen_reg_rtx (HImode);
5926 emit_insn (gen_extendqihi2 (sign, ops[1]));
5927 for (i = 0; i < 16; i++)
5928 arr[i] = 0x12;
5929 arr[last] = 0x13;
5930 }
5931 else
5932 {
5933 for (i = 0; i < 16; i++)
5934 arr[i] = 0x10;
5935 switch (GET_MODE (ops[1]))
5936 {
5937 case E_HImode:
5938 sign = gen_reg_rtx (SImode);
5939 emit_insn (gen_extendhisi2 (sign, ops[1]));
5940 arr[last] = 0x03;
5941 arr[last - 1] = 0x02;
5942 break;
5943 case E_SImode:
5944 sign = gen_reg_rtx (SImode);
5945 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5946 for (i = 0; i < 4; i++)
5947 arr[last - i] = 3 - i;
5948 break;
5949 case E_DImode:
5950 sign = gen_reg_rtx (SImode);
5951 c = gen_reg_rtx (SImode);
5952 emit_insn (gen_spu_convert (c, ops[1]));
5953 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5954 for (i = 0; i < 8; i++)
5955 arr[last - i] = 7 - i;
5956 break;
5957 default:
5958 abort ();
5959 }
5960 }
5961 emit_move_insn (pat, array_to_constant (TImode, arr));
5962 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5963 }
5964
5965 /* expand vector initialization. If there are any constant parts,
5966 load constant parts first. Then load any non-constant parts. */
5967 void
5968 spu_expand_vector_init (rtx target, rtx vals)
5969 {
5970 machine_mode mode = GET_MODE (target);
5971 int n_elts = GET_MODE_NUNITS (mode);
5972 int n_var = 0;
5973 bool all_same = true;
5974 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5975 int i;
5976
5977 first = XVECEXP (vals, 0, 0);
5978 for (i = 0; i < n_elts; ++i)
5979 {
5980 x = XVECEXP (vals, 0, i);
5981 if (!(CONST_INT_P (x)
5982 || GET_CODE (x) == CONST_DOUBLE
5983 || GET_CODE (x) == CONST_FIXED))
5984 ++n_var;
5985 else
5986 {
5987 if (first_constant == NULL_RTX)
5988 first_constant = x;
5989 }
5990 if (i > 0 && !rtx_equal_p (x, first))
5991 all_same = false;
5992 }
5993
5994 /* if all elements are the same, use splats to repeat elements */
5995 if (all_same)
5996 {
5997 if (!CONSTANT_P (first)
5998 && !register_operand (first, GET_MODE (x)))
5999 first = force_reg (GET_MODE (first), first);
6000 emit_insn (gen_spu_splats (target, first));
6001 return;
6002 }
6003
6004 /* load constant parts */
6005 if (n_var != n_elts)
6006 {
6007 if (n_var == 0)
6008 {
6009 emit_move_insn (target,
6010 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6011 }
6012 else
6013 {
6014 rtx constant_parts_rtx = copy_rtx (vals);
6015
6016 gcc_assert (first_constant != NULL_RTX);
6017 /* fill empty slots with the first constant, this increases
6018 our chance of using splats in the recursive call below. */
6019 for (i = 0; i < n_elts; ++i)
6020 {
6021 x = XVECEXP (constant_parts_rtx, 0, i);
6022 if (!(CONST_INT_P (x)
6023 || GET_CODE (x) == CONST_DOUBLE
6024 || GET_CODE (x) == CONST_FIXED))
6025 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6026 }
6027
6028 spu_expand_vector_init (target, constant_parts_rtx);
6029 }
6030 }
6031
6032 /* load variable parts */
6033 if (n_var != 0)
6034 {
6035 rtx insert_operands[4];
6036
6037 insert_operands[0] = target;
6038 insert_operands[2] = target;
6039 for (i = 0; i < n_elts; ++i)
6040 {
6041 x = XVECEXP (vals, 0, i);
6042 if (!(CONST_INT_P (x)
6043 || GET_CODE (x) == CONST_DOUBLE
6044 || GET_CODE (x) == CONST_FIXED))
6045 {
6046 if (!register_operand (x, GET_MODE (x)))
6047 x = force_reg (GET_MODE (x), x);
6048 insert_operands[1] = x;
6049 insert_operands[3] = GEN_INT (i);
6050 spu_builtin_insert (insert_operands);
6051 }
6052 }
6053 }
6054 }
6055
6056 /* Return insn index for the vector compare instruction for given CODE,
6057 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6058
6059 static int
6060 get_vec_cmp_insn (enum rtx_code code,
6061 machine_mode dest_mode,
6062 machine_mode op_mode)
6063
6064 {
6065 switch (code)
6066 {
6067 case EQ:
6068 if (dest_mode == V16QImode && op_mode == V16QImode)
6069 return CODE_FOR_ceq_v16qi;
6070 if (dest_mode == V8HImode && op_mode == V8HImode)
6071 return CODE_FOR_ceq_v8hi;
6072 if (dest_mode == V4SImode && op_mode == V4SImode)
6073 return CODE_FOR_ceq_v4si;
6074 if (dest_mode == V4SImode && op_mode == V4SFmode)
6075 return CODE_FOR_ceq_v4sf;
6076 if (dest_mode == V2DImode && op_mode == V2DFmode)
6077 return CODE_FOR_ceq_v2df;
6078 break;
6079 case GT:
6080 if (dest_mode == V16QImode && op_mode == V16QImode)
6081 return CODE_FOR_cgt_v16qi;
6082 if (dest_mode == V8HImode && op_mode == V8HImode)
6083 return CODE_FOR_cgt_v8hi;
6084 if (dest_mode == V4SImode && op_mode == V4SImode)
6085 return CODE_FOR_cgt_v4si;
6086 if (dest_mode == V4SImode && op_mode == V4SFmode)
6087 return CODE_FOR_cgt_v4sf;
6088 if (dest_mode == V2DImode && op_mode == V2DFmode)
6089 return CODE_FOR_cgt_v2df;
6090 break;
6091 case GTU:
6092 if (dest_mode == V16QImode && op_mode == V16QImode)
6093 return CODE_FOR_clgt_v16qi;
6094 if (dest_mode == V8HImode && op_mode == V8HImode)
6095 return CODE_FOR_clgt_v8hi;
6096 if (dest_mode == V4SImode && op_mode == V4SImode)
6097 return CODE_FOR_clgt_v4si;
6098 break;
6099 default:
6100 break;
6101 }
6102 return -1;
6103 }
6104
6105 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6106 DMODE is expected destination mode. This is a recursive function. */
6107
6108 static rtx
6109 spu_emit_vector_compare (enum rtx_code rcode,
6110 rtx op0, rtx op1,
6111 machine_mode dmode)
6112 {
6113 int vec_cmp_insn;
6114 rtx mask;
6115 machine_mode dest_mode;
6116 machine_mode op_mode = GET_MODE (op1);
6117
6118 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6119
6120 /* Floating point vector compare instructions uses destination V4SImode.
6121 Double floating point vector compare instructions uses destination V2DImode.
6122 Move destination to appropriate mode later. */
6123 if (dmode == V4SFmode)
6124 dest_mode = V4SImode;
6125 else if (dmode == V2DFmode)
6126 dest_mode = V2DImode;
6127 else
6128 dest_mode = dmode;
6129
6130 mask = gen_reg_rtx (dest_mode);
6131 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6132
6133 if (vec_cmp_insn == -1)
6134 {
6135 bool swap_operands = false;
6136 bool try_again = false;
6137 switch (rcode)
6138 {
6139 case LT:
6140 rcode = GT;
6141 swap_operands = true;
6142 try_again = true;
6143 break;
6144 case LTU:
6145 rcode = GTU;
6146 swap_operands = true;
6147 try_again = true;
6148 break;
6149 case NE:
6150 case UNEQ:
6151 case UNLE:
6152 case UNLT:
6153 case UNGE:
6154 case UNGT:
6155 case UNORDERED:
6156 /* Treat A != B as ~(A==B). */
6157 {
6158 enum rtx_code rev_code;
6159 enum insn_code nor_code;
6160 rtx rev_mask;
6161
6162 rev_code = reverse_condition_maybe_unordered (rcode);
6163 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6164
6165 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6166 gcc_assert (nor_code != CODE_FOR_nothing);
6167 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6168 if (dmode != dest_mode)
6169 {
6170 rtx temp = gen_reg_rtx (dest_mode);
6171 convert_move (temp, mask, 0);
6172 return temp;
6173 }
6174 return mask;
6175 }
6176 break;
6177 case GE:
6178 case GEU:
6179 case LE:
6180 case LEU:
6181 /* Try GT/GTU/LT/LTU OR EQ */
6182 {
6183 rtx c_rtx, eq_rtx;
6184 enum insn_code ior_code;
6185 enum rtx_code new_code;
6186
6187 switch (rcode)
6188 {
6189 case GE: new_code = GT; break;
6190 case GEU: new_code = GTU; break;
6191 case LE: new_code = LT; break;
6192 case LEU: new_code = LTU; break;
6193 default:
6194 gcc_unreachable ();
6195 }
6196
6197 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6198 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6199
6200 ior_code = optab_handler (ior_optab, dest_mode);
6201 gcc_assert (ior_code != CODE_FOR_nothing);
6202 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6203 if (dmode != dest_mode)
6204 {
6205 rtx temp = gen_reg_rtx (dest_mode);
6206 convert_move (temp, mask, 0);
6207 return temp;
6208 }
6209 return mask;
6210 }
6211 break;
6212 case LTGT:
6213 /* Try LT OR GT */
6214 {
6215 rtx lt_rtx, gt_rtx;
6216 enum insn_code ior_code;
6217
6218 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6219 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6220
6221 ior_code = optab_handler (ior_optab, dest_mode);
6222 gcc_assert (ior_code != CODE_FOR_nothing);
6223 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6224 if (dmode != dest_mode)
6225 {
6226 rtx temp = gen_reg_rtx (dest_mode);
6227 convert_move (temp, mask, 0);
6228 return temp;
6229 }
6230 return mask;
6231 }
6232 break;
6233 case ORDERED:
6234 /* Implement as (A==A) & (B==B) */
6235 {
6236 rtx a_rtx, b_rtx;
6237 enum insn_code and_code;
6238
6239 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6240 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6241
6242 and_code = optab_handler (and_optab, dest_mode);
6243 gcc_assert (and_code != CODE_FOR_nothing);
6244 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6245 if (dmode != dest_mode)
6246 {
6247 rtx temp = gen_reg_rtx (dest_mode);
6248 convert_move (temp, mask, 0);
6249 return temp;
6250 }
6251 return mask;
6252 }
6253 break;
6254 default:
6255 gcc_unreachable ();
6256 }
6257
6258 /* You only get two chances. */
6259 if (try_again)
6260 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6261
6262 gcc_assert (vec_cmp_insn != -1);
6263
6264 if (swap_operands)
6265 {
6266 rtx tmp;
6267 tmp = op0;
6268 op0 = op1;
6269 op1 = tmp;
6270 }
6271 }
6272
6273 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6274 if (dmode != dest_mode)
6275 {
6276 rtx temp = gen_reg_rtx (dest_mode);
6277 convert_move (temp, mask, 0);
6278 return temp;
6279 }
6280 return mask;
6281 }
6282
6283
6284 /* Emit vector conditional expression.
6285 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6286 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6287
6288 int
6289 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6290 rtx cond, rtx cc_op0, rtx cc_op1)
6291 {
6292 machine_mode dest_mode = GET_MODE (dest);
6293 enum rtx_code rcode = GET_CODE (cond);
6294 rtx mask;
6295
6296 /* Get the vector mask for the given relational operations. */
6297 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6298
6299 emit_insn(gen_selb (dest, op2, op1, mask));
6300
6301 return 1;
6302 }
6303
6304 static rtx
6305 spu_force_reg (machine_mode mode, rtx op)
6306 {
6307 rtx x, r;
6308 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6309 {
6310 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6311 || GET_MODE (op) == BLKmode)
6312 return force_reg (mode, convert_to_mode (mode, op, 0));
6313 abort ();
6314 }
6315
6316 r = force_reg (GET_MODE (op), op);
6317 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6318 {
6319 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6320 if (x)
6321 return x;
6322 }
6323
6324 x = gen_reg_rtx (mode);
6325 emit_insn (gen_spu_convert (x, r));
6326 return x;
6327 }
6328
6329 static void
6330 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6331 {
6332 HOST_WIDE_INT v = 0;
6333 int lsbits;
6334 /* Check the range of immediate operands. */
6335 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6336 {
6337 int range = p - SPU_BTI_7;
6338
6339 if (!CONSTANT_P (op))
6340 error ("%s expects an integer literal in the range [%d, %d]",
6341 d->name,
6342 spu_builtin_range[range].low, spu_builtin_range[range].high);
6343
6344 if (GET_CODE (op) == CONST
6345 && (GET_CODE (XEXP (op, 0)) == PLUS
6346 || GET_CODE (XEXP (op, 0)) == MINUS))
6347 {
6348 v = INTVAL (XEXP (XEXP (op, 0), 1));
6349 op = XEXP (XEXP (op, 0), 0);
6350 }
6351 else if (GET_CODE (op) == CONST_INT)
6352 v = INTVAL (op);
6353 else if (GET_CODE (op) == CONST_VECTOR
6354 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6355 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6356
6357 /* The default for v is 0 which is valid in every range. */
6358 if (v < spu_builtin_range[range].low
6359 || v > spu_builtin_range[range].high)
6360 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6361 d->name,
6362 spu_builtin_range[range].low, spu_builtin_range[range].high,
6363 v);
6364
6365 switch (p)
6366 {
6367 case SPU_BTI_S10_4:
6368 lsbits = 4;
6369 break;
6370 case SPU_BTI_U16_2:
6371 /* This is only used in lqa, and stqa. Even though the insns
6372 encode 16 bits of the address (all but the 2 least
6373 significant), only 14 bits are used because it is masked to
6374 be 16 byte aligned. */
6375 lsbits = 4;
6376 break;
6377 case SPU_BTI_S16_2:
6378 /* This is used for lqr and stqr. */
6379 lsbits = 2;
6380 break;
6381 default:
6382 lsbits = 0;
6383 }
6384
6385 if (GET_CODE (op) == LABEL_REF
6386 || (GET_CODE (op) == SYMBOL_REF
6387 && SYMBOL_REF_FUNCTION_P (op))
6388 || (v & ((1 << lsbits) - 1)) != 0)
6389 warning (0, "%d least significant bits of %s are ignored", lsbits,
6390 d->name);
6391 }
6392 }
6393
6394
6395 static int
6396 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6397 rtx target, rtx ops[])
6398 {
6399 enum insn_code icode = (enum insn_code) d->icode;
6400 int i = 0, a;
6401
6402 /* Expand the arguments into rtl. */
6403
6404 if (d->parm[0] != SPU_BTI_VOID)
6405 ops[i++] = target;
6406
6407 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6408 {
6409 tree arg = CALL_EXPR_ARG (exp, a);
6410 if (arg == 0)
6411 abort ();
6412 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6413 }
6414
6415 gcc_assert (i == insn_data[icode].n_generator_args);
6416 return i;
6417 }
6418
6419 static rtx
6420 spu_expand_builtin_1 (struct spu_builtin_description *d,
6421 tree exp, rtx target)
6422 {
6423 rtx pat;
6424 rtx ops[8];
6425 enum insn_code icode = (enum insn_code) d->icode;
6426 machine_mode mode, tmode;
6427 int i, p;
6428 int n_operands;
6429 tree return_type;
6430
6431 /* Set up ops[] with values from arglist. */
6432 n_operands = expand_builtin_args (d, exp, target, ops);
6433
6434 /* Handle the target operand which must be operand 0. */
6435 i = 0;
6436 if (d->parm[0] != SPU_BTI_VOID)
6437 {
6438
6439 /* We prefer the mode specified for the match_operand otherwise
6440 use the mode from the builtin function prototype. */
6441 tmode = insn_data[d->icode].operand[0].mode;
6442 if (tmode == VOIDmode)
6443 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6444
6445 /* Try to use target because not using it can lead to extra copies
6446 and when we are using all of the registers extra copies leads
6447 to extra spills. */
6448 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6449 ops[0] = target;
6450 else
6451 target = ops[0] = gen_reg_rtx (tmode);
6452
6453 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6454 abort ();
6455
6456 i++;
6457 }
6458
6459 if (d->fcode == SPU_MASK_FOR_LOAD)
6460 {
6461 machine_mode mode = insn_data[icode].operand[1].mode;
6462 tree arg;
6463 rtx addr, op, pat;
6464
6465 /* get addr */
6466 arg = CALL_EXPR_ARG (exp, 0);
6467 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6468 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6469 addr = memory_address (mode, op);
6470
6471 /* negate addr */
6472 op = gen_reg_rtx (GET_MODE (addr));
6473 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6474 op = gen_rtx_MEM (mode, op);
6475
6476 pat = GEN_FCN (icode) (target, op);
6477 if (!pat)
6478 return 0;
6479 emit_insn (pat);
6480 return target;
6481 }
6482
6483 /* Ignore align_hint, but still expand it's args in case they have
6484 side effects. */
6485 if (icode == CODE_FOR_spu_align_hint)
6486 return 0;
6487
6488 /* Handle the rest of the operands. */
6489 for (p = 1; i < n_operands; i++, p++)
6490 {
6491 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6492 mode = insn_data[d->icode].operand[i].mode;
6493 else
6494 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6495
6496 /* mode can be VOIDmode here for labels */
6497
6498 /* For specific intrinsics with an immediate operand, e.g.,
6499 si_ai(), we sometimes need to convert the scalar argument to a
6500 vector argument by splatting the scalar. */
6501 if (VECTOR_MODE_P (mode)
6502 && (GET_CODE (ops[i]) == CONST_INT
6503 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6504 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6505 {
6506 if (GET_CODE (ops[i]) == CONST_INT)
6507 ops[i] = spu_const (mode, INTVAL (ops[i]));
6508 else
6509 {
6510 rtx reg = gen_reg_rtx (mode);
6511 machine_mode imode = GET_MODE_INNER (mode);
6512 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6513 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6514 if (imode != GET_MODE (ops[i]))
6515 ops[i] = convert_to_mode (imode, ops[i],
6516 TYPE_UNSIGNED (spu_builtin_types
6517 [d->parm[i]]));
6518 emit_insn (gen_spu_splats (reg, ops[i]));
6519 ops[i] = reg;
6520 }
6521 }
6522
6523 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6524
6525 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6526 ops[i] = spu_force_reg (mode, ops[i]);
6527 }
6528
6529 switch (n_operands)
6530 {
6531 case 0:
6532 pat = GEN_FCN (icode) (0);
6533 break;
6534 case 1:
6535 pat = GEN_FCN (icode) (ops[0]);
6536 break;
6537 case 2:
6538 pat = GEN_FCN (icode) (ops[0], ops[1]);
6539 break;
6540 case 3:
6541 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6542 break;
6543 case 4:
6544 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6545 break;
6546 case 5:
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6548 break;
6549 case 6:
6550 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6551 break;
6552 default:
6553 abort ();
6554 }
6555
6556 if (!pat)
6557 abort ();
6558
6559 if (d->type == B_CALL || d->type == B_BISLED)
6560 emit_call_insn (pat);
6561 else if (d->type == B_JUMP)
6562 {
6563 emit_jump_insn (pat);
6564 emit_barrier ();
6565 }
6566 else
6567 emit_insn (pat);
6568
6569 return_type = spu_builtin_types[d->parm[0]];
6570 if (d->parm[0] != SPU_BTI_VOID
6571 && GET_MODE (target) != TYPE_MODE (return_type))
6572 {
6573 /* target is the return value. It should always be the mode of
6574 the builtin function prototype. */
6575 target = spu_force_reg (TYPE_MODE (return_type), target);
6576 }
6577
6578 return target;
6579 }
6580
6581 rtx
6582 spu_expand_builtin (tree exp,
6583 rtx target,
6584 rtx subtarget ATTRIBUTE_UNUSED,
6585 machine_mode mode ATTRIBUTE_UNUSED,
6586 int ignore ATTRIBUTE_UNUSED)
6587 {
6588 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6589 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
6590 struct spu_builtin_description *d;
6591
6592 if (fcode < NUM_SPU_BUILTINS)
6593 {
6594 d = &spu_builtins[fcode];
6595
6596 return spu_expand_builtin_1 (d, exp, target);
6597 }
6598 abort ();
6599 }
6600
6601 /* Implement targetm.vectorize.builtin_mask_for_load. */
6602 static tree
6603 spu_builtin_mask_for_load (void)
6604 {
6605 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6606 }
6607
6608 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6609 static int
6610 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6611 tree vectype,
6612 int misalign ATTRIBUTE_UNUSED)
6613 {
6614 unsigned elements;
6615
6616 switch (type_of_cost)
6617 {
6618 case scalar_stmt:
6619 case vector_stmt:
6620 case vector_load:
6621 case vector_store:
6622 case vec_to_scalar:
6623 case scalar_to_vec:
6624 case cond_branch_not_taken:
6625 case vec_perm:
6626 case vec_promote_demote:
6627 return 1;
6628
6629 case scalar_store:
6630 return 10;
6631
6632 case scalar_load:
6633 /* Load + rotate. */
6634 return 2;
6635
6636 case unaligned_load:
6637 case vector_gather_load:
6638 case vector_scatter_store:
6639 return 2;
6640
6641 case cond_branch_taken:
6642 return 6;
6643
6644 case vec_construct:
6645 elements = TYPE_VECTOR_SUBPARTS (vectype);
6646 return elements / 2 + 1;
6647
6648 default:
6649 gcc_unreachable ();
6650 }
6651 }
6652
6653 /* Implement targetm.vectorize.init_cost. */
6654
6655 static void *
6656 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6657 {
6658 unsigned *cost = XNEWVEC (unsigned, 3);
6659 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6660 return cost;
6661 }
6662
6663 /* Implement targetm.vectorize.add_stmt_cost. */
6664
6665 static unsigned
6666 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6667 struct _stmt_vec_info *stmt_info, int misalign,
6668 enum vect_cost_model_location where)
6669 {
6670 unsigned *cost = (unsigned *) data;
6671 unsigned retval = 0;
6672
6673 if (flag_vect_cost_model)
6674 {
6675 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6676 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6677
6678 /* Statements in an inner loop relative to the loop being
6679 vectorized are weighted more heavily. The value here is
6680 arbitrary and could potentially be improved with analysis. */
6681 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6682 count *= 50; /* FIXME. */
6683
6684 retval = (unsigned) (count * stmt_cost);
6685 cost[where] += retval;
6686 }
6687
6688 return retval;
6689 }
6690
6691 /* Implement targetm.vectorize.finish_cost. */
6692
6693 static void
6694 spu_finish_cost (void *data, unsigned *prologue_cost,
6695 unsigned *body_cost, unsigned *epilogue_cost)
6696 {
6697 unsigned *cost = (unsigned *) data;
6698 *prologue_cost = cost[vect_prologue];
6699 *body_cost = cost[vect_body];
6700 *epilogue_cost = cost[vect_epilogue];
6701 }
6702
6703 /* Implement targetm.vectorize.destroy_cost_data. */
6704
6705 static void
6706 spu_destroy_cost_data (void *data)
6707 {
6708 free (data);
6709 }
6710
6711 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6712 after applying N number of iterations. This routine does not determine
6713 how may iterations are required to reach desired alignment. */
6714
6715 static bool
6716 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6717 {
6718 if (is_packed)
6719 return false;
6720
6721 /* All other types are naturally aligned. */
6722 return true;
6723 }
6724
6725 /* Return the appropriate mode for a named address pointer. */
6726 static scalar_int_mode
6727 spu_addr_space_pointer_mode (addr_space_t addrspace)
6728 {
6729 switch (addrspace)
6730 {
6731 case ADDR_SPACE_GENERIC:
6732 return ptr_mode;
6733 case ADDR_SPACE_EA:
6734 return EAmode;
6735 default:
6736 gcc_unreachable ();
6737 }
6738 }
6739
6740 /* Return the appropriate mode for a named address address. */
6741 static scalar_int_mode
6742 spu_addr_space_address_mode (addr_space_t addrspace)
6743 {
6744 switch (addrspace)
6745 {
6746 case ADDR_SPACE_GENERIC:
6747 return Pmode;
6748 case ADDR_SPACE_EA:
6749 return EAmode;
6750 default:
6751 gcc_unreachable ();
6752 }
6753 }
6754
6755 /* Determine if one named address space is a subset of another. */
6756
6757 static bool
6758 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6759 {
6760 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6761 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6762
6763 if (subset == superset)
6764 return true;
6765
6766 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6767 being subsets but instead as disjoint address spaces. */
6768 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6769 return false;
6770
6771 else
6772 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6773 }
6774
6775 /* Convert from one address space to another. */
6776 static rtx
6777 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6778 {
6779 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6780 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6781
6782 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6783 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6784
6785 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6786 {
6787 rtx result, ls;
6788
6789 ls = gen_const_mem (DImode,
6790 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6791 set_mem_align (ls, 128);
6792
6793 result = gen_reg_rtx (Pmode);
6794 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6795 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6796 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6797 ls, const0_rtx, Pmode, 1);
6798
6799 emit_insn (gen_subsi3 (result, op, ls));
6800
6801 return result;
6802 }
6803
6804 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6805 {
6806 rtx result, ls;
6807
6808 ls = gen_const_mem (DImode,
6809 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6810 set_mem_align (ls, 128);
6811
6812 result = gen_reg_rtx (EAmode);
6813 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6814 op = force_reg (Pmode, op);
6815 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6816 ls, const0_rtx, EAmode, 1);
6817 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6818
6819 if (EAmode == SImode)
6820 emit_insn (gen_addsi3 (result, op, ls));
6821 else
6822 emit_insn (gen_adddi3 (result, op, ls));
6823
6824 return result;
6825 }
6826
6827 else
6828 gcc_unreachable ();
6829 }
6830
6831
6832 /* Count the total number of instructions in each pipe and return the
6833 maximum, which is used as the Minimum Iteration Interval (MII)
6834 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6835 -2 are instructions that can go in pipe0 or pipe1. */
6836 static int
6837 spu_sms_res_mii (struct ddg *g)
6838 {
6839 int i;
6840 unsigned t[4] = {0, 0, 0, 0};
6841
6842 for (i = 0; i < g->num_nodes; i++)
6843 {
6844 rtx_insn *insn = g->nodes[i].insn;
6845 int p = get_pipe (insn) + 2;
6846
6847 gcc_assert (p >= 0);
6848 gcc_assert (p < 4);
6849
6850 t[p]++;
6851 if (dump_file && INSN_P (insn))
6852 fprintf (dump_file, "i%d %s %d %d\n",
6853 INSN_UID (insn),
6854 insn_data[INSN_CODE(insn)].name,
6855 p, t[p]);
6856 }
6857 if (dump_file)
6858 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6859
6860 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6861 }
6862
6863
6864 void
6865 spu_init_expanders (void)
6866 {
6867 if (cfun)
6868 {
6869 rtx r0, r1;
6870 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6871 frame_pointer_needed is true. We don't know that until we're
6872 expanding the prologue. */
6873 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6874
6875 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6876 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6877 to be treated as aligned, so generate them here. */
6878 r0 = gen_reg_rtx (SImode);
6879 r1 = gen_reg_rtx (SImode);
6880 mark_reg_pointer (r0, 128);
6881 mark_reg_pointer (r1, 128);
6882 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6883 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6884 }
6885 }
6886
6887 static scalar_int_mode
6888 spu_libgcc_cmp_return_mode (void)
6889 {
6890
6891 /* For SPU word mode is TI mode so it is better to use SImode
6892 for compare returns. */
6893 return SImode;
6894 }
6895
6896 static scalar_int_mode
6897 spu_libgcc_shift_count_mode (void)
6898 {
6899 /* For SPU word mode is TI mode so it is better to use SImode
6900 for shift counts. */
6901 return SImode;
6902 }
6903
6904 /* Implement targetm.section_type_flags. */
6905 static unsigned int
6906 spu_section_type_flags (tree decl, const char *name, int reloc)
6907 {
6908 /* .toe needs to have type @nobits. */
6909 if (strcmp (name, ".toe") == 0)
6910 return SECTION_BSS;
6911 /* Don't load _ea into the current address space. */
6912 if (strcmp (name, "._ea") == 0)
6913 return SECTION_WRITE | SECTION_DEBUG;
6914 return default_section_type_flags (decl, name, reloc);
6915 }
6916
6917 /* Implement targetm.select_section. */
6918 static section *
6919 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6920 {
6921 /* Variables and constants defined in the __ea address space
6922 go into a special section named "._ea". */
6923 if (TREE_TYPE (decl) != error_mark_node
6924 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6925 {
6926 /* We might get called with string constants, but get_named_section
6927 doesn't like them as they are not DECLs. Also, we need to set
6928 flags in that case. */
6929 if (!DECL_P (decl))
6930 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6931
6932 return get_named_section (decl, "._ea", reloc);
6933 }
6934
6935 return default_elf_select_section (decl, reloc, align);
6936 }
6937
6938 /* Implement targetm.unique_section. */
6939 static void
6940 spu_unique_section (tree decl, int reloc)
6941 {
6942 /* We don't support unique section names in the __ea address
6943 space for now. */
6944 if (TREE_TYPE (decl) != error_mark_node
6945 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6946 return;
6947
6948 default_unique_section (decl, reloc);
6949 }
6950
6951 /* Generate a constant or register which contains 2^SCALE. We assume
6952 the result is valid for MODE. Currently, MODE must be V4SFmode and
6953 SCALE must be SImode. */
6954 rtx
6955 spu_gen_exp2 (machine_mode mode, rtx scale)
6956 {
6957 gcc_assert (mode == V4SFmode);
6958 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6959 if (GET_CODE (scale) != CONST_INT)
6960 {
6961 /* unsigned int exp = (127 + scale) << 23;
6962 __vector float m = (__vector float) spu_splats (exp); */
6963 rtx reg = force_reg (SImode, scale);
6964 rtx exp = gen_reg_rtx (SImode);
6965 rtx mul = gen_reg_rtx (mode);
6966 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6967 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6968 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6969 return mul;
6970 }
6971 else
6972 {
6973 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6974 unsigned char arr[16];
6975 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6976 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6977 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6978 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6979 return array_to_constant (mode, arr);
6980 }
6981 }
6982
6983 /* After reload, just change the convert into a move instruction
6984 or a dead instruction. */
6985 void
6986 spu_split_convert (rtx ops[])
6987 {
6988 if (REGNO (ops[0]) == REGNO (ops[1]))
6989 emit_note (NOTE_INSN_DELETED);
6990 else
6991 {
6992 /* Use TImode always as this might help hard reg copyprop. */
6993 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6994 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6995 emit_insn (gen_move_insn (op0, op1));
6996 }
6997 }
6998
6999 void
7000 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7001 {
7002 fprintf (file, "# profile\n");
7003 fprintf (file, "brsl $75, _mcount\n");
7004 }
7005
7006 /* Implement targetm.ref_may_alias_errno. */
7007 static bool
7008 spu_ref_may_alias_errno (ao_ref *ref)
7009 {
7010 tree base = ao_ref_base (ref);
7011
7012 /* With SPU newlib, errno is defined as something like
7013 _impure_data._errno
7014 The default implementation of this target macro does not
7015 recognize such expressions, so special-code for it here. */
7016
7017 if (TREE_CODE (base) == VAR_DECL
7018 && !TREE_STATIC (base)
7019 && DECL_EXTERNAL (base)
7020 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7021 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7022 "_impure_data") == 0
7023 /* _errno is the first member of _impure_data. */
7024 && ref->offset == 0)
7025 return true;
7026
7027 return default_ref_may_alias_errno (ref);
7028 }
7029
7030 /* Output thunk to FILE that implements a C++ virtual function call (with
7031 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7032 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7033 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7034 relative to the resulting this pointer. */
7035
7036 static void
7037 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7038 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7039 tree function)
7040 {
7041 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
7042 rtx op[8];
7043
7044 assemble_start_function (thunk, fnname);
7045 /* Make sure unwind info is emitted for the thunk if needed. */
7046 final_start_function (emit_barrier (), file, 1);
7047
7048 /* Operand 0 is the target function. */
7049 op[0] = XEXP (DECL_RTL (function), 0);
7050
7051 /* Operand 1 is the 'this' pointer. */
7052 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7053 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7054 else
7055 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7056
7057 /* Operands 2/3 are the low/high halfwords of delta. */
7058 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7059 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7060
7061 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7062 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7063 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7064
7065 /* Operands 6/7 are temporary registers. */
7066 op[6] = gen_rtx_REG (Pmode, 79);
7067 op[7] = gen_rtx_REG (Pmode, 78);
7068
7069 /* Add DELTA to this pointer. */
7070 if (delta)
7071 {
7072 if (delta >= -0x200 && delta < 0x200)
7073 output_asm_insn ("ai\t%1,%1,%2", op);
7074 else if (delta >= -0x8000 && delta < 0x8000)
7075 {
7076 output_asm_insn ("il\t%6,%2", op);
7077 output_asm_insn ("a\t%1,%1,%6", op);
7078 }
7079 else
7080 {
7081 output_asm_insn ("ilhu\t%6,%3", op);
7082 output_asm_insn ("iohl\t%6,%2", op);
7083 output_asm_insn ("a\t%1,%1,%6", op);
7084 }
7085 }
7086
7087 /* Perform vcall adjustment. */
7088 if (vcall_offset)
7089 {
7090 output_asm_insn ("lqd\t%7,0(%1)", op);
7091 output_asm_insn ("rotqby\t%7,%7,%1", op);
7092
7093 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7094 output_asm_insn ("ai\t%7,%7,%4", op);
7095 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7096 {
7097 output_asm_insn ("il\t%6,%4", op);
7098 output_asm_insn ("a\t%7,%7,%6", op);
7099 }
7100 else
7101 {
7102 output_asm_insn ("ilhu\t%6,%5", op);
7103 output_asm_insn ("iohl\t%6,%4", op);
7104 output_asm_insn ("a\t%7,%7,%6", op);
7105 }
7106
7107 output_asm_insn ("lqd\t%6,0(%7)", op);
7108 output_asm_insn ("rotqby\t%6,%6,%7", op);
7109 output_asm_insn ("a\t%1,%1,%6", op);
7110 }
7111
7112 /* Jump to target. */
7113 output_asm_insn ("br\t%0", op);
7114
7115 final_end_function ();
7116 assemble_end_function (thunk, fnname);
7117 }
7118
7119 /* Canonicalize a comparison from one we don't have to one we do have. */
7120 static void
7121 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7122 bool op0_preserve_value)
7123 {
7124 if (!op0_preserve_value
7125 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7126 {
7127 rtx tem = *op0;
7128 *op0 = *op1;
7129 *op1 = tem;
7130 *code = (int)swap_condition ((enum rtx_code)*code);
7131 }
7132 }
7133
7134 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7135 to perform. MEM is the memory on which to operate. VAL is the second
7136 operand of the binary operator. BEFORE and AFTER are optional locations to
7137 return the value of MEM either before of after the operation. */
7138 void
7139 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7140 rtx orig_before, rtx orig_after)
7141 {
7142 machine_mode mode = GET_MODE (mem);
7143 rtx before = orig_before, after = orig_after;
7144
7145 if (before == NULL_RTX)
7146 before = gen_reg_rtx (mode);
7147
7148 emit_move_insn (before, mem);
7149
7150 if (code == MULT) /* NAND operation */
7151 {
7152 rtx x = expand_simple_binop (mode, AND, before, val,
7153 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7154 after = expand_simple_unop (mode, NOT, x, after, 1);
7155 }
7156 else
7157 {
7158 after = expand_simple_binop (mode, code, before, val,
7159 after, 1, OPTAB_LIB_WIDEN);
7160 }
7161
7162 emit_move_insn (mem, after);
7163
7164 if (orig_after && after != orig_after)
7165 emit_move_insn (orig_after, after);
7166 }
7167
7168 /* Implement TARGET_MODES_TIEABLE_P. */
7169
7170 static bool
7171 spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7172 {
7173 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7174 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7175 }
7176
7177 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7178 in the lowpart of a register, which is only true for SPU. */
7179
7180 static bool
7181 spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7182 {
7183 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7184 || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7185 || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7186 }
7187
7188 /* Implement TARGET_TRULY_NOOP_TRUNCATION. */
7189
7190 static bool
7191 spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
7192 {
7193 return inprec <= 32 && outprec <= inprec;
7194 }
7195
7196 /* Implement TARGET_STATIC_RTX_ALIGNMENT.
7197
7198 Make all static objects 16-byte aligned. This allows us to assume
7199 they are also padded to 16 bytes, which means we can use a single
7200 load or store instruction to access them. */
7201
7202 static HOST_WIDE_INT
7203 spu_static_rtx_alignment (machine_mode mode)
7204 {
7205 return MAX (GET_MODE_ALIGNMENT (mode), 128);
7206 }
7207
7208 /* Implement TARGET_CONSTANT_ALIGNMENT.
7209
7210 Make all static objects 16-byte aligned. This allows us to assume
7211 they are also padded to 16 bytes, which means we can use a single
7212 load or store instruction to access them. */
7213
7214 static HOST_WIDE_INT
7215 spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7216 {
7217 return MAX (align, 128);
7218 }
7219 \f
7220 /* Table of machine attributes. */
7221 static const struct attribute_spec spu_attribute_table[] =
7222 {
7223 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7224 affects_type_identity, handler, exclude } */
7225 { "naked", 0, 0, true, false, false, false,
7226 spu_handle_fndecl_attribute, NULL },
7227 { "spu_vector", 0, 0, false, true, false, false,
7228 spu_handle_vector_attribute, NULL },
7229 { NULL, 0, 0, false, false, false, false, NULL, NULL }
7230 };
7231
7232 /* TARGET overrides. */
7233
7234 #undef TARGET_LRA_P
7235 #define TARGET_LRA_P hook_bool_void_false
7236
7237 #undef TARGET_ADDR_SPACE_POINTER_MODE
7238 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7239
7240 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7241 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7242
7243 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7244 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7245 spu_addr_space_legitimate_address_p
7246
7247 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7248 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7249
7250 #undef TARGET_ADDR_SPACE_SUBSET_P
7251 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7252
7253 #undef TARGET_ADDR_SPACE_CONVERT
7254 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7255
7256 #undef TARGET_INIT_BUILTINS
7257 #define TARGET_INIT_BUILTINS spu_init_builtins
7258 #undef TARGET_BUILTIN_DECL
7259 #define TARGET_BUILTIN_DECL spu_builtin_decl
7260
7261 #undef TARGET_EXPAND_BUILTIN
7262 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7263
7264 #undef TARGET_UNWIND_WORD_MODE
7265 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7266
7267 #undef TARGET_LEGITIMIZE_ADDRESS
7268 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7269
7270 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7271 and .quad for the debugger. When it is known that the assembler is fixed,
7272 these can be removed. */
7273 #undef TARGET_ASM_UNALIGNED_SI_OP
7274 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7275
7276 #undef TARGET_ASM_ALIGNED_DI_OP
7277 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7278
7279 /* The .8byte directive doesn't seem to work well for a 32 bit
7280 architecture. */
7281 #undef TARGET_ASM_UNALIGNED_DI_OP
7282 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7283
7284 #undef TARGET_RTX_COSTS
7285 #define TARGET_RTX_COSTS spu_rtx_costs
7286
7287 #undef TARGET_ADDRESS_COST
7288 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7289
7290 #undef TARGET_SCHED_ISSUE_RATE
7291 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7292
7293 #undef TARGET_SCHED_INIT_GLOBAL
7294 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7295
7296 #undef TARGET_SCHED_INIT
7297 #define TARGET_SCHED_INIT spu_sched_init
7298
7299 #undef TARGET_SCHED_VARIABLE_ISSUE
7300 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7301
7302 #undef TARGET_SCHED_REORDER
7303 #define TARGET_SCHED_REORDER spu_sched_reorder
7304
7305 #undef TARGET_SCHED_REORDER2
7306 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7307
7308 #undef TARGET_SCHED_ADJUST_COST
7309 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7310
7311 #undef TARGET_ATTRIBUTE_TABLE
7312 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7313
7314 #undef TARGET_ASM_INTEGER
7315 #define TARGET_ASM_INTEGER spu_assemble_integer
7316
7317 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7318 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7319
7320 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7321 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7322
7323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7324 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7325
7326 #undef TARGET_ASM_GLOBALIZE_LABEL
7327 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7328
7329 #undef TARGET_PASS_BY_REFERENCE
7330 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7331
7332 #undef TARGET_FUNCTION_ARG
7333 #define TARGET_FUNCTION_ARG spu_function_arg
7334
7335 #undef TARGET_FUNCTION_ARG_ADVANCE
7336 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7337
7338 #undef TARGET_FUNCTION_ARG_OFFSET
7339 #define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7340
7341 #undef TARGET_FUNCTION_ARG_PADDING
7342 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7343
7344 #undef TARGET_MUST_PASS_IN_STACK
7345 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7346
7347 #undef TARGET_BUILD_BUILTIN_VA_LIST
7348 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7349
7350 #undef TARGET_EXPAND_BUILTIN_VA_START
7351 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7352
7353 #undef TARGET_SETUP_INCOMING_VARARGS
7354 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7355
7356 #undef TARGET_MACHINE_DEPENDENT_REORG
7357 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7358
7359 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7360 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7361
7362 #undef TARGET_INIT_LIBFUNCS
7363 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7364
7365 #undef TARGET_RETURN_IN_MEMORY
7366 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7367
7368 #undef TARGET_ENCODE_SECTION_INFO
7369 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7370
7371 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7372 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7373
7374 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7375 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7376
7377 #undef TARGET_VECTORIZE_INIT_COST
7378 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7379
7380 #undef TARGET_VECTORIZE_ADD_STMT_COST
7381 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7382
7383 #undef TARGET_VECTORIZE_FINISH_COST
7384 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7385
7386 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7387 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7388
7389 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7390 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7391
7392 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7393 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7394
7395 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7396 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7397
7398 #undef TARGET_SCHED_SMS_RES_MII
7399 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7400
7401 #undef TARGET_SECTION_TYPE_FLAGS
7402 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7403
7404 #undef TARGET_ASM_SELECT_SECTION
7405 #define TARGET_ASM_SELECT_SECTION spu_select_section
7406
7407 #undef TARGET_ASM_UNIQUE_SECTION
7408 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7409
7410 #undef TARGET_LEGITIMATE_ADDRESS_P
7411 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7412
7413 #undef TARGET_LEGITIMATE_CONSTANT_P
7414 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7415
7416 #undef TARGET_TRAMPOLINE_INIT
7417 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7418
7419 #undef TARGET_WARN_FUNC_RETURN
7420 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7421
7422 #undef TARGET_OPTION_OVERRIDE
7423 #define TARGET_OPTION_OVERRIDE spu_option_override
7424
7425 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7426 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7427
7428 #undef TARGET_REF_MAY_ALIAS_ERRNO
7429 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7430
7431 #undef TARGET_ASM_OUTPUT_MI_THUNK
7432 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7433 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7434 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7435
7436 /* Variable tracking should be run after all optimizations which
7437 change order of insns. It also needs a valid CFG. */
7438 #undef TARGET_DELAY_VARTRACK
7439 #define TARGET_DELAY_VARTRACK true
7440
7441 #undef TARGET_CANONICALIZE_COMPARISON
7442 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7443
7444 #undef TARGET_CAN_USE_DOLOOP_P
7445 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7446
7447 #undef TARGET_MODES_TIEABLE_P
7448 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7449
7450 #undef TARGET_HARD_REGNO_NREGS
7451 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7452
7453 #undef TARGET_CAN_CHANGE_MODE_CLASS
7454 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7455
7456 #undef TARGET_TRULY_NOOP_TRUNCATION
7457 #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7458
7459 #undef TARGET_STATIC_RTX_ALIGNMENT
7460 #define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
7461 #undef TARGET_CONSTANT_ALIGNMENT
7462 #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7463
7464 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
7465 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
7466
7467 struct gcc_target targetm = TARGET_INITIALIZER;
7468
7469 #include "gt-spu.h"