]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/spu/spu.c
[19/77] Add a smallest_int_mode_for_size helper function
[thirdparty/gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "target.h"
22 #include "rtl.h"
23 #include "tree.h"
24 #include "gimple.h"
25 #include "cfghooks.h"
26 #include "cfgloop.h"
27 #include "df.h"
28 #include "memmodel.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "output.h"
47 #include "cfgrtl.h"
48 #include "cfgbuild.h"
49 #include "langhooks.h"
50 #include "reload.h"
51 #include "sched-int.h"
52 #include "params.h"
53 #include "gimplify.h"
54 #include "tm-constrs.h"
55 #include "ddg.h"
56 #include "dumpfile.h"
57 #include "builtins.h"
58 #include "rtl-iter.h"
59
60 /* This file should be included last. */
61 #include "target-def.h"
62
63 /* Builtin types, data and prototypes. */
64
65 enum spu_builtin_type_index
66 {
67 SPU_BTI_END_OF_PARAMS,
68
69 /* We create new type nodes for these. */
70 SPU_BTI_V16QI,
71 SPU_BTI_V8HI,
72 SPU_BTI_V4SI,
73 SPU_BTI_V2DI,
74 SPU_BTI_V4SF,
75 SPU_BTI_V2DF,
76 SPU_BTI_UV16QI,
77 SPU_BTI_UV8HI,
78 SPU_BTI_UV4SI,
79 SPU_BTI_UV2DI,
80
81 /* A 16-byte type. (Implemented with V16QI_type_node) */
82 SPU_BTI_QUADWORD,
83
84 /* These all correspond to intSI_type_node */
85 SPU_BTI_7,
86 SPU_BTI_S7,
87 SPU_BTI_U7,
88 SPU_BTI_S10,
89 SPU_BTI_S10_4,
90 SPU_BTI_U14,
91 SPU_BTI_16,
92 SPU_BTI_S16,
93 SPU_BTI_S16_2,
94 SPU_BTI_U16,
95 SPU_BTI_U16_2,
96 SPU_BTI_U18,
97
98 /* These correspond to the standard types */
99 SPU_BTI_INTQI,
100 SPU_BTI_INTHI,
101 SPU_BTI_INTSI,
102 SPU_BTI_INTDI,
103
104 SPU_BTI_UINTQI,
105 SPU_BTI_UINTHI,
106 SPU_BTI_UINTSI,
107 SPU_BTI_UINTDI,
108
109 SPU_BTI_FLOAT,
110 SPU_BTI_DOUBLE,
111
112 SPU_BTI_VOID,
113 SPU_BTI_PTR,
114
115 SPU_BTI_MAX
116 };
117
118 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
119 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
120 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
121 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
122 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
123 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
124 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
125 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
126 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
127 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
128
129 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
130
131 struct spu_builtin_range
132 {
133 int low, high;
134 };
135
136 static struct spu_builtin_range spu_builtin_range[] = {
137 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
138 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
139 {0ll, 0x7fll}, /* SPU_BTI_U7 */
140 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
141 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
142 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
143 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
144 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
145 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
146 {0ll, 0xffffll}, /* SPU_BTI_U16 */
147 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
148 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
149 };
150
151 \f
152 /* Target specific attribute specifications. */
153 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
154
155 /* Prototypes and external defs. */
156 static int get_pipe (rtx_insn *insn);
157 static int spu_naked_function_p (tree func);
158 static int mem_is_padded_component_ref (rtx x);
159 static void fix_range (const char *);
160 static rtx spu_expand_load (rtx, rtx, rtx, int);
161
162 /* Which instruction set architecture to use. */
163 int spu_arch;
164 /* Which cpu are we tuning for. */
165 int spu_tune;
166
167 /* The hardware requires 8 insns between a hint and the branch it
168 effects. This variable describes how many rtl instructions the
169 compiler needs to see before inserting a hint, and then the compiler
170 will insert enough nops to make it at least 8 insns. The default is
171 for the compiler to allow up to 2 nops be emitted. The nops are
172 inserted in pairs, so we round down. */
173 int spu_hint_dist = (8*4) - (2*4);
174
175 enum spu_immediate {
176 SPU_NONE,
177 SPU_IL,
178 SPU_ILA,
179 SPU_ILH,
180 SPU_ILHU,
181 SPU_ORI,
182 SPU_ORHI,
183 SPU_ORBI,
184 SPU_IOHL
185 };
186 enum immediate_class
187 {
188 IC_POOL, /* constant pool */
189 IC_IL1, /* one il* instruction */
190 IC_IL2, /* both ilhu and iohl instructions */
191 IC_IL1s, /* one il* instruction */
192 IC_IL2s, /* both ilhu and iohl instructions */
193 IC_FSMBI, /* the fsmbi instruction */
194 IC_CPAT, /* one of the c*d instructions */
195 IC_FSMBI2 /* fsmbi plus 1 other instruction */
196 };
197
198 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
199 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
200 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
201 static enum immediate_class classify_immediate (rtx op,
202 machine_mode mode);
203
204 /* Pointer mode for __ea references. */
205 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
206
207 \f
208 /* Define the structure for the machine field in struct function. */
209 struct GTY(()) machine_function
210 {
211 /* Register to use for PIC accesses. */
212 rtx pic_reg;
213 };
214
215 /* How to allocate a 'struct machine_function'. */
216 static struct machine_function *
217 spu_init_machine_status (void)
218 {
219 return ggc_cleared_alloc<machine_function> ();
220 }
221
222 /* Implement TARGET_OPTION_OVERRIDE. */
223 static void
224 spu_option_override (void)
225 {
226 /* Set up function hooks. */
227 init_machine_status = spu_init_machine_status;
228
229 /* Small loops will be unpeeled at -O3. For SPU it is more important
230 to keep code small by default. */
231 if (!flag_unroll_loops && !flag_peel_loops)
232 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
233 global_options.x_param_values,
234 global_options_set.x_param_values);
235
236 flag_omit_frame_pointer = 1;
237
238 /* Functions must be 8 byte aligned so we correctly handle dual issue */
239 if (align_functions < 8)
240 align_functions = 8;
241
242 spu_hint_dist = 8*4 - spu_max_nops*4;
243 if (spu_hint_dist < 0)
244 spu_hint_dist = 0;
245
246 if (spu_fixed_range_string)
247 fix_range (spu_fixed_range_string);
248
249 /* Determine processor architectural level. */
250 if (spu_arch_string)
251 {
252 if (strcmp (&spu_arch_string[0], "cell") == 0)
253 spu_arch = PROCESSOR_CELL;
254 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
255 spu_arch = PROCESSOR_CELLEDP;
256 else
257 error ("bad value (%s) for -march= switch", spu_arch_string);
258 }
259
260 /* Determine processor to tune for. */
261 if (spu_tune_string)
262 {
263 if (strcmp (&spu_tune_string[0], "cell") == 0)
264 spu_tune = PROCESSOR_CELL;
265 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
266 spu_tune = PROCESSOR_CELLEDP;
267 else
268 error ("bad value (%s) for -mtune= switch", spu_tune_string);
269 }
270
271 /* Change defaults according to the processor architecture. */
272 if (spu_arch == PROCESSOR_CELLEDP)
273 {
274 /* If no command line option has been otherwise specified, change
275 the default to -mno-safe-hints on celledp -- only the original
276 Cell/B.E. processors require this workaround. */
277 if (!(target_flags_explicit & MASK_SAFE_HINTS))
278 target_flags &= ~MASK_SAFE_HINTS;
279 }
280
281 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
282 }
283 \f
284 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
285 struct attribute_spec.handler. */
286
287 /* True if MODE is valid for the target. By "valid", we mean able to
288 be manipulated in non-trivial ways. In particular, this means all
289 the arithmetic is supported. */
290 static bool
291 spu_scalar_mode_supported_p (machine_mode mode)
292 {
293 switch (mode)
294 {
295 case E_QImode:
296 case E_HImode:
297 case E_SImode:
298 case E_SFmode:
299 case E_DImode:
300 case E_TImode:
301 case E_DFmode:
302 return true;
303
304 default:
305 return false;
306 }
307 }
308
309 /* Similarly for vector modes. "Supported" here is less strict. At
310 least some operations are supported; need to check optabs or builtins
311 for further details. */
312 static bool
313 spu_vector_mode_supported_p (machine_mode mode)
314 {
315 switch (mode)
316 {
317 case E_V16QImode:
318 case E_V8HImode:
319 case E_V4SImode:
320 case E_V2DImode:
321 case E_V4SFmode:
322 case E_V2DFmode:
323 return true;
324
325 default:
326 return false;
327 }
328 }
329
330 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
331 least significant bytes of the outer mode. This function returns
332 TRUE for the SUBREG's where this is correct. */
333 int
334 valid_subreg (rtx op)
335 {
336 machine_mode om = GET_MODE (op);
337 machine_mode im = GET_MODE (SUBREG_REG (op));
338 return om != VOIDmode && im != VOIDmode
339 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
340 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
341 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
342 }
343
344 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
345 and adjust the start offset. */
346 static rtx
347 adjust_operand (rtx op, HOST_WIDE_INT * start)
348 {
349 machine_mode mode;
350 int op_size;
351 /* Strip any paradoxical SUBREG. */
352 if (GET_CODE (op) == SUBREG
353 && (GET_MODE_BITSIZE (GET_MODE (op))
354 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
355 {
356 if (start)
357 *start -=
358 GET_MODE_BITSIZE (GET_MODE (op)) -
359 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
360 op = SUBREG_REG (op);
361 }
362 /* If it is smaller than SI, assure a SUBREG */
363 op_size = GET_MODE_BITSIZE (GET_MODE (op));
364 if (op_size < 32)
365 {
366 if (start)
367 *start += 32 - op_size;
368 op_size = 32;
369 }
370 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
371 mode = mode_for_size (op_size, MODE_INT, 0);
372 if (mode != GET_MODE (op))
373 op = gen_rtx_SUBREG (mode, op, 0);
374 return op;
375 }
376
377 void
378 spu_expand_extv (rtx ops[], int unsignedp)
379 {
380 rtx dst = ops[0], src = ops[1];
381 HOST_WIDE_INT width = INTVAL (ops[2]);
382 HOST_WIDE_INT start = INTVAL (ops[3]);
383 HOST_WIDE_INT align_mask;
384 rtx s0, s1, mask, r0;
385
386 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
387
388 if (MEM_P (src))
389 {
390 /* First, determine if we need 1 TImode load or 2. We need only 1
391 if the bits being extracted do not cross the alignment boundary
392 as determined by the MEM and its address. */
393
394 align_mask = -MEM_ALIGN (src);
395 if ((start & align_mask) == ((start + width - 1) & align_mask))
396 {
397 /* Alignment is sufficient for 1 load. */
398 s0 = gen_reg_rtx (TImode);
399 r0 = spu_expand_load (s0, 0, src, start / 8);
400 start &= 7;
401 if (r0)
402 emit_insn (gen_rotqby_ti (s0, s0, r0));
403 }
404 else
405 {
406 /* Need 2 loads. */
407 s0 = gen_reg_rtx (TImode);
408 s1 = gen_reg_rtx (TImode);
409 r0 = spu_expand_load (s0, s1, src, start / 8);
410 start &= 7;
411
412 gcc_assert (start + width <= 128);
413 if (r0)
414 {
415 rtx r1 = gen_reg_rtx (SImode);
416 mask = gen_reg_rtx (TImode);
417 emit_move_insn (mask, GEN_INT (-1));
418 emit_insn (gen_rotqby_ti (s0, s0, r0));
419 emit_insn (gen_rotqby_ti (s1, s1, r0));
420 if (GET_CODE (r0) == CONST_INT)
421 r1 = GEN_INT (INTVAL (r0) & 15);
422 else
423 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
424 emit_insn (gen_shlqby_ti (mask, mask, r1));
425 emit_insn (gen_selb (s0, s1, s0, mask));
426 }
427 }
428
429 }
430 else if (GET_CODE (src) == SUBREG)
431 {
432 rtx r = SUBREG_REG (src);
433 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
434 s0 = gen_reg_rtx (TImode);
435 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
436 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
437 else
438 emit_move_insn (s0, src);
439 }
440 else
441 {
442 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
443 s0 = gen_reg_rtx (TImode);
444 emit_move_insn (s0, src);
445 }
446
447 /* Now s0 is TImode and contains the bits to extract at start. */
448
449 if (start)
450 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
451
452 if (128 - width)
453 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
454
455 emit_move_insn (dst, s0);
456 }
457
458 void
459 spu_expand_insv (rtx ops[])
460 {
461 HOST_WIDE_INT width = INTVAL (ops[1]);
462 HOST_WIDE_INT start = INTVAL (ops[2]);
463 unsigned HOST_WIDE_INT maskbits;
464 machine_mode dst_mode;
465 rtx dst = ops[0], src = ops[3];
466 int dst_size;
467 rtx mask;
468 rtx shift_reg;
469 int shift;
470
471
472 if (GET_CODE (ops[0]) == MEM)
473 dst = gen_reg_rtx (TImode);
474 else
475 dst = adjust_operand (dst, &start);
476 dst_mode = GET_MODE (dst);
477 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
478
479 if (CONSTANT_P (src))
480 {
481 machine_mode m =
482 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
483 src = force_reg (m, convert_to_mode (m, src, 0));
484 }
485 src = adjust_operand (src, 0);
486
487 mask = gen_reg_rtx (dst_mode);
488 shift_reg = gen_reg_rtx (dst_mode);
489 shift = dst_size - start - width;
490
491 /* It's not safe to use subreg here because the compiler assumes
492 that the SUBREG_REG is right justified in the SUBREG. */
493 convert_move (shift_reg, src, 1);
494
495 if (shift > 0)
496 {
497 switch (dst_mode)
498 {
499 case E_SImode:
500 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
501 break;
502 case E_DImode:
503 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
504 break;
505 case E_TImode:
506 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
507 break;
508 default:
509 abort ();
510 }
511 }
512 else if (shift < 0)
513 abort ();
514
515 switch (dst_size)
516 {
517 case 32:
518 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
519 if (start)
520 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
521 emit_move_insn (mask, GEN_INT (maskbits));
522 break;
523 case 64:
524 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
525 if (start)
526 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
527 emit_move_insn (mask, GEN_INT (maskbits));
528 break;
529 case 128:
530 {
531 unsigned char arr[16];
532 int i = start / 8;
533 memset (arr, 0, sizeof (arr));
534 arr[i] = 0xff >> (start & 7);
535 for (i++; i <= (start + width - 1) / 8; i++)
536 arr[i] = 0xff;
537 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
538 emit_move_insn (mask, array_to_constant (TImode, arr));
539 }
540 break;
541 default:
542 abort ();
543 }
544 if (GET_CODE (ops[0]) == MEM)
545 {
546 rtx low = gen_reg_rtx (SImode);
547 rtx rotl = gen_reg_rtx (SImode);
548 rtx mask0 = gen_reg_rtx (TImode);
549 rtx addr;
550 rtx addr0;
551 rtx addr1;
552 rtx mem;
553
554 addr = force_reg (Pmode, XEXP (ops[0], 0));
555 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
556 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
557 emit_insn (gen_negsi2 (rotl, low));
558 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
559 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
560 mem = change_address (ops[0], TImode, addr0);
561 set_mem_alias_set (mem, 0);
562 emit_move_insn (dst, mem);
563 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
564 if (start + width > MEM_ALIGN (ops[0]))
565 {
566 rtx shl = gen_reg_rtx (SImode);
567 rtx mask1 = gen_reg_rtx (TImode);
568 rtx dst1 = gen_reg_rtx (TImode);
569 rtx mem1;
570 addr1 = plus_constant (Pmode, addr, 16);
571 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
572 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
573 emit_insn (gen_shlqby_ti (mask1, mask, shl));
574 mem1 = change_address (ops[0], TImode, addr1);
575 set_mem_alias_set (mem1, 0);
576 emit_move_insn (dst1, mem1);
577 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
578 emit_move_insn (mem1, dst1);
579 }
580 emit_move_insn (mem, dst);
581 }
582 else
583 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
584 }
585
586
587 int
588 spu_expand_block_move (rtx ops[])
589 {
590 HOST_WIDE_INT bytes, align, offset;
591 rtx src, dst, sreg, dreg, target;
592 int i;
593 if (GET_CODE (ops[2]) != CONST_INT
594 || GET_CODE (ops[3]) != CONST_INT
595 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
596 return 0;
597
598 bytes = INTVAL (ops[2]);
599 align = INTVAL (ops[3]);
600
601 if (bytes <= 0)
602 return 1;
603
604 dst = ops[0];
605 src = ops[1];
606
607 if (align == 16)
608 {
609 for (offset = 0; offset + 16 <= bytes; offset += 16)
610 {
611 dst = adjust_address (ops[0], V16QImode, offset);
612 src = adjust_address (ops[1], V16QImode, offset);
613 emit_move_insn (dst, src);
614 }
615 if (offset < bytes)
616 {
617 rtx mask;
618 unsigned char arr[16] = { 0 };
619 for (i = 0; i < bytes - offset; i++)
620 arr[i] = 0xff;
621 dst = adjust_address (ops[0], V16QImode, offset);
622 src = adjust_address (ops[1], V16QImode, offset);
623 mask = gen_reg_rtx (V16QImode);
624 sreg = gen_reg_rtx (V16QImode);
625 dreg = gen_reg_rtx (V16QImode);
626 target = gen_reg_rtx (V16QImode);
627 emit_move_insn (mask, array_to_constant (V16QImode, arr));
628 emit_move_insn (dreg, dst);
629 emit_move_insn (sreg, src);
630 emit_insn (gen_selb (target, dreg, sreg, mask));
631 emit_move_insn (dst, target);
632 }
633 return 1;
634 }
635 return 0;
636 }
637
638 enum spu_comp_code
639 { SPU_EQ, SPU_GT, SPU_GTU };
640
641 int spu_comp_icode[12][3] = {
642 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
643 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
644 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
645 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
646 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
647 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
648 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
649 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
650 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
651 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
652 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
653 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
654 };
655
656 /* Generate a compare for CODE. Return a brand-new rtx that represents
657 the result of the compare. GCC can figure this out too if we don't
658 provide all variations of compares, but GCC always wants to use
659 WORD_MODE, we can generate better code in most cases if we do it
660 ourselves. */
661 void
662 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
663 {
664 int reverse_compare = 0;
665 int reverse_test = 0;
666 rtx compare_result, eq_result;
667 rtx comp_rtx, eq_rtx;
668 machine_mode comp_mode;
669 machine_mode op_mode;
670 enum spu_comp_code scode, eq_code;
671 enum insn_code ior_code;
672 enum rtx_code code = GET_CODE (cmp);
673 rtx op0 = XEXP (cmp, 0);
674 rtx op1 = XEXP (cmp, 1);
675 int index;
676 int eq_test = 0;
677
678 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
679 and so on, to keep the constant in operand 1. */
680 if (GET_CODE (op1) == CONST_INT)
681 {
682 HOST_WIDE_INT val = INTVAL (op1) - 1;
683 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
684 switch (code)
685 {
686 case GE:
687 op1 = GEN_INT (val);
688 code = GT;
689 break;
690 case LT:
691 op1 = GEN_INT (val);
692 code = LE;
693 break;
694 case GEU:
695 op1 = GEN_INT (val);
696 code = GTU;
697 break;
698 case LTU:
699 op1 = GEN_INT (val);
700 code = LEU;
701 break;
702 default:
703 break;
704 }
705 }
706
707 /* However, if we generate an integer result, performing a reverse test
708 would require an extra negation, so avoid that where possible. */
709 if (GET_CODE (op1) == CONST_INT && is_set == 1)
710 {
711 HOST_WIDE_INT val = INTVAL (op1) + 1;
712 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
713 switch (code)
714 {
715 case LE:
716 op1 = GEN_INT (val);
717 code = LT;
718 break;
719 case LEU:
720 op1 = GEN_INT (val);
721 code = LTU;
722 break;
723 default:
724 break;
725 }
726 }
727
728 comp_mode = SImode;
729 op_mode = GET_MODE (op0);
730
731 switch (code)
732 {
733 case GE:
734 scode = SPU_GT;
735 if (HONOR_NANS (op_mode))
736 {
737 reverse_compare = 0;
738 reverse_test = 0;
739 eq_test = 1;
740 eq_code = SPU_EQ;
741 }
742 else
743 {
744 reverse_compare = 1;
745 reverse_test = 1;
746 }
747 break;
748 case LE:
749 scode = SPU_GT;
750 if (HONOR_NANS (op_mode))
751 {
752 reverse_compare = 1;
753 reverse_test = 0;
754 eq_test = 1;
755 eq_code = SPU_EQ;
756 }
757 else
758 {
759 reverse_compare = 0;
760 reverse_test = 1;
761 }
762 break;
763 case LT:
764 reverse_compare = 1;
765 reverse_test = 0;
766 scode = SPU_GT;
767 break;
768 case GEU:
769 reverse_compare = 1;
770 reverse_test = 1;
771 scode = SPU_GTU;
772 break;
773 case LEU:
774 reverse_compare = 0;
775 reverse_test = 1;
776 scode = SPU_GTU;
777 break;
778 case LTU:
779 reverse_compare = 1;
780 reverse_test = 0;
781 scode = SPU_GTU;
782 break;
783 case NE:
784 reverse_compare = 0;
785 reverse_test = 1;
786 scode = SPU_EQ;
787 break;
788
789 case EQ:
790 scode = SPU_EQ;
791 break;
792 case GT:
793 scode = SPU_GT;
794 break;
795 case GTU:
796 scode = SPU_GTU;
797 break;
798 default:
799 scode = SPU_EQ;
800 break;
801 }
802
803 switch (op_mode)
804 {
805 case E_QImode:
806 index = 0;
807 comp_mode = QImode;
808 break;
809 case E_HImode:
810 index = 1;
811 comp_mode = HImode;
812 break;
813 case E_SImode:
814 index = 2;
815 break;
816 case E_DImode:
817 index = 3;
818 break;
819 case E_TImode:
820 index = 4;
821 break;
822 case E_SFmode:
823 index = 5;
824 break;
825 case E_DFmode:
826 index = 6;
827 break;
828 case E_V16QImode:
829 index = 7;
830 comp_mode = op_mode;
831 break;
832 case E_V8HImode:
833 index = 8;
834 comp_mode = op_mode;
835 break;
836 case E_V4SImode:
837 index = 9;
838 comp_mode = op_mode;
839 break;
840 case E_V4SFmode:
841 index = 10;
842 comp_mode = V4SImode;
843 break;
844 case E_V2DFmode:
845 index = 11;
846 comp_mode = V2DImode;
847 break;
848 case E_V2DImode:
849 default:
850 abort ();
851 }
852
853 if (GET_MODE (op1) == DFmode
854 && (scode != SPU_GT && scode != SPU_EQ))
855 abort ();
856
857 if (is_set == 0 && op1 == const0_rtx
858 && (GET_MODE (op0) == SImode
859 || GET_MODE (op0) == HImode
860 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
861 {
862 /* Don't need to set a register with the result when we are
863 comparing against zero and branching. */
864 reverse_test = !reverse_test;
865 compare_result = op0;
866 }
867 else
868 {
869 compare_result = gen_reg_rtx (comp_mode);
870
871 if (reverse_compare)
872 {
873 rtx t = op1;
874 op1 = op0;
875 op0 = t;
876 }
877
878 if (spu_comp_icode[index][scode] == 0)
879 abort ();
880
881 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
882 (op0, op_mode))
883 op0 = force_reg (op_mode, op0);
884 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
885 (op1, op_mode))
886 op1 = force_reg (op_mode, op1);
887 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
888 op0, op1);
889 if (comp_rtx == 0)
890 abort ();
891 emit_insn (comp_rtx);
892
893 if (eq_test)
894 {
895 eq_result = gen_reg_rtx (comp_mode);
896 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
897 op0, op1);
898 if (eq_rtx == 0)
899 abort ();
900 emit_insn (eq_rtx);
901 ior_code = optab_handler (ior_optab, comp_mode);
902 gcc_assert (ior_code != CODE_FOR_nothing);
903 emit_insn (GEN_FCN (ior_code)
904 (compare_result, compare_result, eq_result));
905 }
906 }
907
908 if (is_set == 0)
909 {
910 rtx bcomp;
911 rtx loc_ref;
912
913 /* We don't have branch on QI compare insns, so we convert the
914 QI compare result to a HI result. */
915 if (comp_mode == QImode)
916 {
917 rtx old_res = compare_result;
918 compare_result = gen_reg_rtx (HImode);
919 comp_mode = HImode;
920 emit_insn (gen_extendqihi2 (compare_result, old_res));
921 }
922
923 if (reverse_test)
924 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
925 else
926 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
927
928 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
929 emit_jump_insn (gen_rtx_SET (pc_rtx,
930 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
931 loc_ref, pc_rtx)));
932 }
933 else if (is_set == 2)
934 {
935 rtx target = operands[0];
936 int compare_size = GET_MODE_BITSIZE (comp_mode);
937 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
938 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
939 rtx select_mask;
940 rtx op_t = operands[2];
941 rtx op_f = operands[3];
942
943 /* The result of the comparison can be SI, HI or QI mode. Create a
944 mask based on that result. */
945 if (target_size > compare_size)
946 {
947 select_mask = gen_reg_rtx (mode);
948 emit_insn (gen_extend_compare (select_mask, compare_result));
949 }
950 else if (target_size < compare_size)
951 select_mask =
952 gen_rtx_SUBREG (mode, compare_result,
953 (compare_size - target_size) / BITS_PER_UNIT);
954 else if (comp_mode != mode)
955 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
956 else
957 select_mask = compare_result;
958
959 if (GET_MODE (target) != GET_MODE (op_t)
960 || GET_MODE (target) != GET_MODE (op_f))
961 abort ();
962
963 if (reverse_test)
964 emit_insn (gen_selb (target, op_t, op_f, select_mask));
965 else
966 emit_insn (gen_selb (target, op_f, op_t, select_mask));
967 }
968 else
969 {
970 rtx target = operands[0];
971 if (reverse_test)
972 emit_insn (gen_rtx_SET (compare_result,
973 gen_rtx_NOT (comp_mode, compare_result)));
974 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
975 emit_insn (gen_extendhisi2 (target, compare_result));
976 else if (GET_MODE (target) == SImode
977 && GET_MODE (compare_result) == QImode)
978 emit_insn (gen_extend_compare (target, compare_result));
979 else
980 emit_move_insn (target, compare_result);
981 }
982 }
983
984 HOST_WIDE_INT
985 const_double_to_hwint (rtx x)
986 {
987 HOST_WIDE_INT val;
988 if (GET_MODE (x) == SFmode)
989 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
990 else if (GET_MODE (x) == DFmode)
991 {
992 long l[2];
993 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
994 val = l[0];
995 val = (val << 32) | (l[1] & 0xffffffff);
996 }
997 else
998 abort ();
999 return val;
1000 }
1001
1002 rtx
1003 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1004 {
1005 long tv[2];
1006 REAL_VALUE_TYPE rv;
1007 gcc_assert (mode == SFmode || mode == DFmode);
1008
1009 if (mode == SFmode)
1010 tv[0] = (v << 32) >> 32;
1011 else if (mode == DFmode)
1012 {
1013 tv[1] = (v << 32) >> 32;
1014 tv[0] = v >> 32;
1015 }
1016 real_from_target (&rv, tv, mode);
1017 return const_double_from_real_value (rv, mode);
1018 }
1019
1020 void
1021 print_operand_address (FILE * file, register rtx addr)
1022 {
1023 rtx reg;
1024 rtx offset;
1025
1026 if (GET_CODE (addr) == AND
1027 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1028 && INTVAL (XEXP (addr, 1)) == -16)
1029 addr = XEXP (addr, 0);
1030
1031 switch (GET_CODE (addr))
1032 {
1033 case REG:
1034 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1035 break;
1036
1037 case PLUS:
1038 reg = XEXP (addr, 0);
1039 offset = XEXP (addr, 1);
1040 if (GET_CODE (offset) == REG)
1041 {
1042 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1043 reg_names[REGNO (offset)]);
1044 }
1045 else if (GET_CODE (offset) == CONST_INT)
1046 {
1047 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1048 INTVAL (offset), reg_names[REGNO (reg)]);
1049 }
1050 else
1051 abort ();
1052 break;
1053
1054 case CONST:
1055 case LABEL_REF:
1056 case SYMBOL_REF:
1057 case CONST_INT:
1058 output_addr_const (file, addr);
1059 break;
1060
1061 default:
1062 debug_rtx (addr);
1063 abort ();
1064 }
1065 }
1066
1067 void
1068 print_operand (FILE * file, rtx x, int code)
1069 {
1070 machine_mode mode = GET_MODE (x);
1071 HOST_WIDE_INT val;
1072 unsigned char arr[16];
1073 int xcode = GET_CODE (x);
1074 int i, info;
1075 if (GET_MODE (x) == VOIDmode)
1076 switch (code)
1077 {
1078 case 'L': /* 128 bits, signed */
1079 case 'm': /* 128 bits, signed */
1080 case 'T': /* 128 bits, signed */
1081 case 't': /* 128 bits, signed */
1082 mode = TImode;
1083 break;
1084 case 'K': /* 64 bits, signed */
1085 case 'k': /* 64 bits, signed */
1086 case 'D': /* 64 bits, signed */
1087 case 'd': /* 64 bits, signed */
1088 mode = DImode;
1089 break;
1090 case 'J': /* 32 bits, signed */
1091 case 'j': /* 32 bits, signed */
1092 case 's': /* 32 bits, signed */
1093 case 'S': /* 32 bits, signed */
1094 mode = SImode;
1095 break;
1096 }
1097 switch (code)
1098 {
1099
1100 case 'j': /* 32 bits, signed */
1101 case 'k': /* 64 bits, signed */
1102 case 'm': /* 128 bits, signed */
1103 if (xcode == CONST_INT
1104 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1105 {
1106 gcc_assert (logical_immediate_p (x, mode));
1107 constant_to_array (mode, x, arr);
1108 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1109 val = trunc_int_for_mode (val, SImode);
1110 switch (which_logical_immediate (val))
1111 {
1112 case SPU_ORI:
1113 break;
1114 case SPU_ORHI:
1115 fprintf (file, "h");
1116 break;
1117 case SPU_ORBI:
1118 fprintf (file, "b");
1119 break;
1120 default:
1121 gcc_unreachable();
1122 }
1123 }
1124 else
1125 gcc_unreachable();
1126 return;
1127
1128 case 'J': /* 32 bits, signed */
1129 case 'K': /* 64 bits, signed */
1130 case 'L': /* 128 bits, signed */
1131 if (xcode == CONST_INT
1132 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1133 {
1134 gcc_assert (logical_immediate_p (x, mode)
1135 || iohl_immediate_p (x, mode));
1136 constant_to_array (mode, x, arr);
1137 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1138 val = trunc_int_for_mode (val, SImode);
1139 switch (which_logical_immediate (val))
1140 {
1141 case SPU_ORI:
1142 case SPU_IOHL:
1143 break;
1144 case SPU_ORHI:
1145 val = trunc_int_for_mode (val, HImode);
1146 break;
1147 case SPU_ORBI:
1148 val = trunc_int_for_mode (val, QImode);
1149 break;
1150 default:
1151 gcc_unreachable();
1152 }
1153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1154 }
1155 else
1156 gcc_unreachable();
1157 return;
1158
1159 case 't': /* 128 bits, signed */
1160 case 'd': /* 64 bits, signed */
1161 case 's': /* 32 bits, signed */
1162 if (CONSTANT_P (x))
1163 {
1164 enum immediate_class c = classify_immediate (x, mode);
1165 switch (c)
1166 {
1167 case IC_IL1:
1168 constant_to_array (mode, x, arr);
1169 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1170 val = trunc_int_for_mode (val, SImode);
1171 switch (which_immediate_load (val))
1172 {
1173 case SPU_IL:
1174 break;
1175 case SPU_ILA:
1176 fprintf (file, "a");
1177 break;
1178 case SPU_ILH:
1179 fprintf (file, "h");
1180 break;
1181 case SPU_ILHU:
1182 fprintf (file, "hu");
1183 break;
1184 default:
1185 gcc_unreachable ();
1186 }
1187 break;
1188 case IC_CPAT:
1189 constant_to_array (mode, x, arr);
1190 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1191 if (info == 1)
1192 fprintf (file, "b");
1193 else if (info == 2)
1194 fprintf (file, "h");
1195 else if (info == 4)
1196 fprintf (file, "w");
1197 else if (info == 8)
1198 fprintf (file, "d");
1199 break;
1200 case IC_IL1s:
1201 if (xcode == CONST_VECTOR)
1202 {
1203 x = CONST_VECTOR_ELT (x, 0);
1204 xcode = GET_CODE (x);
1205 }
1206 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1207 fprintf (file, "a");
1208 else if (xcode == HIGH)
1209 fprintf (file, "hu");
1210 break;
1211 case IC_FSMBI:
1212 case IC_FSMBI2:
1213 case IC_IL2:
1214 case IC_IL2s:
1215 case IC_POOL:
1216 abort ();
1217 }
1218 }
1219 else
1220 gcc_unreachable ();
1221 return;
1222
1223 case 'T': /* 128 bits, signed */
1224 case 'D': /* 64 bits, signed */
1225 case 'S': /* 32 bits, signed */
1226 if (CONSTANT_P (x))
1227 {
1228 enum immediate_class c = classify_immediate (x, mode);
1229 switch (c)
1230 {
1231 case IC_IL1:
1232 constant_to_array (mode, x, arr);
1233 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1234 val = trunc_int_for_mode (val, SImode);
1235 switch (which_immediate_load (val))
1236 {
1237 case SPU_IL:
1238 case SPU_ILA:
1239 break;
1240 case SPU_ILH:
1241 case SPU_ILHU:
1242 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1243 break;
1244 default:
1245 gcc_unreachable ();
1246 }
1247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1248 break;
1249 case IC_FSMBI:
1250 constant_to_array (mode, x, arr);
1251 val = 0;
1252 for (i = 0; i < 16; i++)
1253 {
1254 val <<= 1;
1255 val |= arr[i] & 1;
1256 }
1257 print_operand (file, GEN_INT (val), 0);
1258 break;
1259 case IC_CPAT:
1260 constant_to_array (mode, x, arr);
1261 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1263 break;
1264 case IC_IL1s:
1265 if (xcode == HIGH)
1266 x = XEXP (x, 0);
1267 if (GET_CODE (x) == CONST_VECTOR)
1268 x = CONST_VECTOR_ELT (x, 0);
1269 output_addr_const (file, x);
1270 if (xcode == HIGH)
1271 fprintf (file, "@h");
1272 break;
1273 case IC_IL2:
1274 case IC_IL2s:
1275 case IC_FSMBI2:
1276 case IC_POOL:
1277 abort ();
1278 }
1279 }
1280 else
1281 gcc_unreachable ();
1282 return;
1283
1284 case 'C':
1285 if (xcode == CONST_INT)
1286 {
1287 /* Only 4 least significant bits are relevant for generate
1288 control word instructions. */
1289 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1290 return;
1291 }
1292 break;
1293
1294 case 'M': /* print code for c*d */
1295 if (GET_CODE (x) == CONST_INT)
1296 switch (INTVAL (x))
1297 {
1298 case 1:
1299 fprintf (file, "b");
1300 break;
1301 case 2:
1302 fprintf (file, "h");
1303 break;
1304 case 4:
1305 fprintf (file, "w");
1306 break;
1307 case 8:
1308 fprintf (file, "d");
1309 break;
1310 default:
1311 gcc_unreachable();
1312 }
1313 else
1314 gcc_unreachable();
1315 return;
1316
1317 case 'N': /* Negate the operand */
1318 if (xcode == CONST_INT)
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1320 else if (xcode == CONST_VECTOR)
1321 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1322 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1323 return;
1324
1325 case 'I': /* enable/disable interrupts */
1326 if (xcode == CONST_INT)
1327 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1328 return;
1329
1330 case 'b': /* branch modifiers */
1331 if (xcode == REG)
1332 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1333 else if (COMPARISON_P (x))
1334 fprintf (file, "%s", xcode == NE ? "n" : "");
1335 return;
1336
1337 case 'i': /* indirect call */
1338 if (xcode == MEM)
1339 {
1340 if (GET_CODE (XEXP (x, 0)) == REG)
1341 /* Used in indirect function calls. */
1342 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1343 else
1344 output_address (GET_MODE (x), XEXP (x, 0));
1345 }
1346 return;
1347
1348 case 'p': /* load/store */
1349 if (xcode == MEM)
1350 {
1351 x = XEXP (x, 0);
1352 xcode = GET_CODE (x);
1353 }
1354 if (xcode == AND)
1355 {
1356 x = XEXP (x, 0);
1357 xcode = GET_CODE (x);
1358 }
1359 if (xcode == REG)
1360 fprintf (file, "d");
1361 else if (xcode == CONST_INT)
1362 fprintf (file, "a");
1363 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1364 fprintf (file, "r");
1365 else if (xcode == PLUS || xcode == LO_SUM)
1366 {
1367 if (GET_CODE (XEXP (x, 1)) == REG)
1368 fprintf (file, "x");
1369 else
1370 fprintf (file, "d");
1371 }
1372 return;
1373
1374 case 'e':
1375 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1376 val &= 0x7;
1377 output_addr_const (file, GEN_INT (val));
1378 return;
1379
1380 case 'f':
1381 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1382 val &= 0x1f;
1383 output_addr_const (file, GEN_INT (val));
1384 return;
1385
1386 case 'g':
1387 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1388 val &= 0x3f;
1389 output_addr_const (file, GEN_INT (val));
1390 return;
1391
1392 case 'h':
1393 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1394 val = (val >> 3) & 0x1f;
1395 output_addr_const (file, GEN_INT (val));
1396 return;
1397
1398 case 'E':
1399 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1400 val = -val;
1401 val &= 0x7;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1404
1405 case 'F':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val = -val;
1408 val &= 0x1f;
1409 output_addr_const (file, GEN_INT (val));
1410 return;
1411
1412 case 'G':
1413 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1414 val = -val;
1415 val &= 0x3f;
1416 output_addr_const (file, GEN_INT (val));
1417 return;
1418
1419 case 'H':
1420 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1421 val = -(val & -8ll);
1422 val = (val >> 3) & 0x1f;
1423 output_addr_const (file, GEN_INT (val));
1424 return;
1425
1426 case 'v':
1427 case 'w':
1428 constant_to_array (mode, x, arr);
1429 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1430 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1431 return;
1432
1433 case 0:
1434 if (xcode == REG)
1435 fprintf (file, "%s", reg_names[REGNO (x)]);
1436 else if (xcode == MEM)
1437 output_address (GET_MODE (x), XEXP (x, 0));
1438 else if (xcode == CONST_VECTOR)
1439 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1440 else
1441 output_addr_const (file, x);
1442 return;
1443
1444 /* unused letters
1445 o qr u yz
1446 AB OPQR UVWXYZ */
1447 default:
1448 output_operand_lossage ("invalid %%xn code");
1449 }
1450 gcc_unreachable ();
1451 }
1452
1453 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1454 caller saved register. For leaf functions it is more efficient to
1455 use a volatile register because we won't need to save and restore the
1456 pic register. This routine is only valid after register allocation
1457 is completed, so we can pick an unused register. */
1458 static rtx
1459 get_pic_reg (void)
1460 {
1461 if (!reload_completed && !reload_in_progress)
1462 abort ();
1463
1464 /* If we've already made the decision, we need to keep with it. Once we've
1465 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1466 return true since the register is now live; this should not cause us to
1467 "switch back" to using pic_offset_table_rtx. */
1468 if (!cfun->machine->pic_reg)
1469 {
1470 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1471 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1472 else
1473 cfun->machine->pic_reg = pic_offset_table_rtx;
1474 }
1475
1476 return cfun->machine->pic_reg;
1477 }
1478
1479 /* Split constant addresses to handle cases that are too large.
1480 Add in the pic register when in PIC mode.
1481 Split immediates that require more than 1 instruction. */
1482 int
1483 spu_split_immediate (rtx * ops)
1484 {
1485 machine_mode mode = GET_MODE (ops[0]);
1486 enum immediate_class c = classify_immediate (ops[1], mode);
1487
1488 switch (c)
1489 {
1490 case IC_IL2:
1491 {
1492 unsigned char arrhi[16];
1493 unsigned char arrlo[16];
1494 rtx to, temp, hi, lo;
1495 int i;
1496 /* We need to do reals as ints because the constant used in the
1497 IOR might not be a legitimate real constant. */
1498 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1499 constant_to_array (mode, ops[1], arrhi);
1500 if (imode != mode)
1501 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1502 else
1503 to = ops[0];
1504 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1505 for (i = 0; i < 16; i += 4)
1506 {
1507 arrlo[i + 2] = arrhi[i + 2];
1508 arrlo[i + 3] = arrhi[i + 3];
1509 arrlo[i + 0] = arrlo[i + 1] = 0;
1510 arrhi[i + 2] = arrhi[i + 3] = 0;
1511 }
1512 hi = array_to_constant (imode, arrhi);
1513 lo = array_to_constant (imode, arrlo);
1514 emit_move_insn (temp, hi);
1515 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1516 return 1;
1517 }
1518 case IC_FSMBI2:
1519 {
1520 unsigned char arr_fsmbi[16];
1521 unsigned char arr_andbi[16];
1522 rtx to, reg_fsmbi, reg_and;
1523 int i;
1524 /* We need to do reals as ints because the constant used in the
1525 * AND might not be a legitimate real constant. */
1526 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1527 constant_to_array (mode, ops[1], arr_fsmbi);
1528 if (imode != mode)
1529 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1530 else
1531 to = ops[0];
1532 for (i = 0; i < 16; i++)
1533 if (arr_fsmbi[i] != 0)
1534 {
1535 arr_andbi[0] = arr_fsmbi[i];
1536 arr_fsmbi[i] = 0xff;
1537 }
1538 for (i = 1; i < 16; i++)
1539 arr_andbi[i] = arr_andbi[0];
1540 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1541 reg_and = array_to_constant (imode, arr_andbi);
1542 emit_move_insn (to, reg_fsmbi);
1543 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1544 return 1;
1545 }
1546 case IC_POOL:
1547 if (reload_in_progress || reload_completed)
1548 {
1549 rtx mem = force_const_mem (mode, ops[1]);
1550 if (TARGET_LARGE_MEM)
1551 {
1552 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1553 emit_move_insn (addr, XEXP (mem, 0));
1554 mem = replace_equiv_address (mem, addr);
1555 }
1556 emit_move_insn (ops[0], mem);
1557 return 1;
1558 }
1559 break;
1560 case IC_IL1s:
1561 case IC_IL2s:
1562 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1563 {
1564 if (c == IC_IL2s)
1565 {
1566 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1567 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1568 }
1569 else if (flag_pic)
1570 emit_insn (gen_pic (ops[0], ops[1]));
1571 if (flag_pic)
1572 {
1573 rtx pic_reg = get_pic_reg ();
1574 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1575 }
1576 return flag_pic || c == IC_IL2s;
1577 }
1578 break;
1579 case IC_IL1:
1580 case IC_FSMBI:
1581 case IC_CPAT:
1582 break;
1583 }
1584 return 0;
1585 }
1586
1587 /* SAVING is TRUE when we are generating the actual load and store
1588 instructions for REGNO. When determining the size of the stack
1589 needed for saving register we must allocate enough space for the
1590 worst case, because we don't always have the information early enough
1591 to not allocate it. But we can at least eliminate the actual loads
1592 and stores during the prologue/epilogue. */
1593 static int
1594 need_to_save_reg (int regno, int saving)
1595 {
1596 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1597 return 1;
1598 if (flag_pic
1599 && regno == PIC_OFFSET_TABLE_REGNUM
1600 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1601 return 1;
1602 return 0;
1603 }
1604
1605 /* This function is only correct starting with local register
1606 allocation */
1607 int
1608 spu_saved_regs_size (void)
1609 {
1610 int reg_save_size = 0;
1611 int regno;
1612
1613 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1614 if (need_to_save_reg (regno, 0))
1615 reg_save_size += 0x10;
1616 return reg_save_size;
1617 }
1618
1619 static rtx_insn *
1620 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1621 {
1622 rtx reg = gen_rtx_REG (V4SImode, regno);
1623 rtx mem =
1624 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1625 return emit_insn (gen_movv4si (mem, reg));
1626 }
1627
1628 static rtx_insn *
1629 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1630 {
1631 rtx reg = gen_rtx_REG (V4SImode, regno);
1632 rtx mem =
1633 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1634 return emit_insn (gen_movv4si (reg, mem));
1635 }
1636
1637 /* This happens after reload, so we need to expand it. */
1638 static rtx_insn *
1639 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1640 {
1641 rtx_insn *insn;
1642 if (satisfies_constraint_K (GEN_INT (imm)))
1643 {
1644 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1645 }
1646 else
1647 {
1648 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1649 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1650 if (REGNO (src) == REGNO (scratch))
1651 abort ();
1652 }
1653 return insn;
1654 }
1655
1656 /* Return nonzero if this function is known to have a null epilogue. */
1657
1658 int
1659 direct_return (void)
1660 {
1661 if (reload_completed)
1662 {
1663 if (cfun->static_chain_decl == 0
1664 && (spu_saved_regs_size ()
1665 + get_frame_size ()
1666 + crtl->outgoing_args_size
1667 + crtl->args.pretend_args_size == 0)
1668 && crtl->is_leaf)
1669 return 1;
1670 }
1671 return 0;
1672 }
1673
1674 /*
1675 The stack frame looks like this:
1676 +-------------+
1677 | incoming |
1678 | args |
1679 AP -> +-------------+
1680 | $lr save |
1681 +-------------+
1682 prev SP | back chain |
1683 +-------------+
1684 | var args |
1685 | reg save | crtl->args.pretend_args_size bytes
1686 +-------------+
1687 | ... |
1688 | saved regs | spu_saved_regs_size() bytes
1689 FP -> +-------------+
1690 | ... |
1691 | vars | get_frame_size() bytes
1692 HFP -> +-------------+
1693 | ... |
1694 | outgoing |
1695 | args | crtl->outgoing_args_size bytes
1696 +-------------+
1697 | $lr of next |
1698 | frame |
1699 +-------------+
1700 | back chain |
1701 SP -> +-------------+
1702
1703 */
1704 void
1705 spu_expand_prologue (void)
1706 {
1707 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1708 HOST_WIDE_INT total_size;
1709 HOST_WIDE_INT saved_regs_size;
1710 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1711 rtx scratch_reg_0, scratch_reg_1;
1712 rtx_insn *insn;
1713 rtx real;
1714
1715 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1716 cfun->machine->pic_reg = pic_offset_table_rtx;
1717
1718 if (spu_naked_function_p (current_function_decl))
1719 return;
1720
1721 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1722 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1723
1724 saved_regs_size = spu_saved_regs_size ();
1725 total_size = size + saved_regs_size
1726 + crtl->outgoing_args_size
1727 + crtl->args.pretend_args_size;
1728
1729 if (!crtl->is_leaf
1730 || cfun->calls_alloca || total_size > 0)
1731 total_size += STACK_POINTER_OFFSET;
1732
1733 /* Save this first because code after this might use the link
1734 register as a scratch register. */
1735 if (!crtl->is_leaf)
1736 {
1737 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1738 RTX_FRAME_RELATED_P (insn) = 1;
1739 }
1740
1741 if (total_size > 0)
1742 {
1743 offset = -crtl->args.pretend_args_size;
1744 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1745 if (need_to_save_reg (regno, 1))
1746 {
1747 offset -= 16;
1748 insn = frame_emit_store (regno, sp_reg, offset);
1749 RTX_FRAME_RELATED_P (insn) = 1;
1750 }
1751 }
1752
1753 if (flag_pic && cfun->machine->pic_reg)
1754 {
1755 rtx pic_reg = cfun->machine->pic_reg;
1756 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1757 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1758 }
1759
1760 if (total_size > 0)
1761 {
1762 if (flag_stack_check)
1763 {
1764 /* We compare against total_size-1 because
1765 ($sp >= total_size) <=> ($sp > total_size-1) */
1766 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1767 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1768 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1769 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1770 {
1771 emit_move_insn (scratch_v4si, size_v4si);
1772 size_v4si = scratch_v4si;
1773 }
1774 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1775 emit_insn (gen_vec_extractv4sisi
1776 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1777 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1778 }
1779
1780 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1781 the value of the previous $sp because we save it as the back
1782 chain. */
1783 if (total_size <= 2000)
1784 {
1785 /* In this case we save the back chain first. */
1786 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1787 insn =
1788 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1789 }
1790 else
1791 {
1792 insn = emit_move_insn (scratch_reg_0, sp_reg);
1793 insn =
1794 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1795 }
1796 RTX_FRAME_RELATED_P (insn) = 1;
1797 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1798 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1799
1800 if (total_size > 2000)
1801 {
1802 /* Save the back chain ptr */
1803 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1804 }
1805
1806 if (frame_pointer_needed)
1807 {
1808 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1809 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1810 + crtl->outgoing_args_size;
1811 /* Set the new frame_pointer */
1812 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1813 RTX_FRAME_RELATED_P (insn) = 1;
1814 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1815 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1816 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1817 }
1818 }
1819
1820 if (flag_stack_usage_info)
1821 current_function_static_stack_size = total_size;
1822 }
1823
1824 void
1825 spu_expand_epilogue (bool sibcall_p)
1826 {
1827 int size = get_frame_size (), offset, regno;
1828 HOST_WIDE_INT saved_regs_size, total_size;
1829 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1830 rtx scratch_reg_0;
1831
1832 if (spu_naked_function_p (current_function_decl))
1833 return;
1834
1835 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1836
1837 saved_regs_size = spu_saved_regs_size ();
1838 total_size = size + saved_regs_size
1839 + crtl->outgoing_args_size
1840 + crtl->args.pretend_args_size;
1841
1842 if (!crtl->is_leaf
1843 || cfun->calls_alloca || total_size > 0)
1844 total_size += STACK_POINTER_OFFSET;
1845
1846 if (total_size > 0)
1847 {
1848 if (cfun->calls_alloca)
1849 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1850 else
1851 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1852
1853
1854 if (saved_regs_size > 0)
1855 {
1856 offset = -crtl->args.pretend_args_size;
1857 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1858 if (need_to_save_reg (regno, 1))
1859 {
1860 offset -= 0x10;
1861 frame_emit_load (regno, sp_reg, offset);
1862 }
1863 }
1864 }
1865
1866 if (!crtl->is_leaf)
1867 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1868
1869 if (!sibcall_p)
1870 {
1871 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1872 emit_jump_insn (gen__return ());
1873 }
1874 }
1875
1876 rtx
1877 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1878 {
1879 if (count != 0)
1880 return 0;
1881 /* This is inefficient because it ends up copying to a save-register
1882 which then gets saved even though $lr has already been saved. But
1883 it does generate better code for leaf functions and we don't need
1884 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1885 used for __builtin_return_address anyway, so maybe we don't care if
1886 it's inefficient. */
1887 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1888 }
1889 \f
1890
1891 /* Given VAL, generate a constant appropriate for MODE.
1892 If MODE is a vector mode, every element will be VAL.
1893 For TImode, VAL will be zero extended to 128 bits. */
1894 rtx
1895 spu_const (machine_mode mode, HOST_WIDE_INT val)
1896 {
1897 rtx inner;
1898 rtvec v;
1899 int units, i;
1900
1901 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1902 || GET_MODE_CLASS (mode) == MODE_FLOAT
1903 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1904 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1905
1906 if (GET_MODE_CLASS (mode) == MODE_INT)
1907 return immed_double_const (val, 0, mode);
1908
1909 /* val is the bit representation of the float */
1910 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1911 return hwint_to_const_double (mode, val);
1912
1913 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1914 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1915 else
1916 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1917
1918 units = GET_MODE_NUNITS (mode);
1919
1920 v = rtvec_alloc (units);
1921
1922 for (i = 0; i < units; ++i)
1923 RTVEC_ELT (v, i) = inner;
1924
1925 return gen_rtx_CONST_VECTOR (mode, v);
1926 }
1927
1928 /* Create a MODE vector constant from 4 ints. */
1929 rtx
1930 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1931 {
1932 unsigned char arr[16];
1933 arr[0] = (a >> 24) & 0xff;
1934 arr[1] = (a >> 16) & 0xff;
1935 arr[2] = (a >> 8) & 0xff;
1936 arr[3] = (a >> 0) & 0xff;
1937 arr[4] = (b >> 24) & 0xff;
1938 arr[5] = (b >> 16) & 0xff;
1939 arr[6] = (b >> 8) & 0xff;
1940 arr[7] = (b >> 0) & 0xff;
1941 arr[8] = (c >> 24) & 0xff;
1942 arr[9] = (c >> 16) & 0xff;
1943 arr[10] = (c >> 8) & 0xff;
1944 arr[11] = (c >> 0) & 0xff;
1945 arr[12] = (d >> 24) & 0xff;
1946 arr[13] = (d >> 16) & 0xff;
1947 arr[14] = (d >> 8) & 0xff;
1948 arr[15] = (d >> 0) & 0xff;
1949 return array_to_constant(mode, arr);
1950 }
1951 \f
1952 /* branch hint stuff */
1953
1954 /* An array of these is used to propagate hints to predecessor blocks. */
1955 struct spu_bb_info
1956 {
1957 rtx_insn *prop_jump; /* propagated from another block */
1958 int bb_index; /* the original block. */
1959 };
1960 static struct spu_bb_info *spu_bb_info;
1961
1962 #define STOP_HINT_P(INSN) \
1963 (CALL_P(INSN) \
1964 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1965 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1966
1967 /* 1 when RTX is a hinted branch or its target. We keep track of
1968 what has been hinted so the safe-hint code can test it easily. */
1969 #define HINTED_P(RTX) \
1970 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1971
1972 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1973 #define SCHED_ON_EVEN_P(RTX) \
1974 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1975
1976 /* Emit a nop for INSN such that the two will dual issue. This assumes
1977 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1978 We check for TImode to handle a MULTI1 insn which has dual issued its
1979 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1980 static void
1981 emit_nop_for_insn (rtx_insn *insn)
1982 {
1983 int p;
1984 rtx_insn *new_insn;
1985
1986 /* We need to handle JUMP_TABLE_DATA separately. */
1987 if (JUMP_TABLE_DATA_P (insn))
1988 {
1989 new_insn = emit_insn_after (gen_lnop(), insn);
1990 recog_memoized (new_insn);
1991 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1992 return;
1993 }
1994
1995 p = get_pipe (insn);
1996 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1997 new_insn = emit_insn_after (gen_lnop (), insn);
1998 else if (p == 1 && GET_MODE (insn) == TImode)
1999 {
2000 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2001 PUT_MODE (new_insn, TImode);
2002 PUT_MODE (insn, VOIDmode);
2003 }
2004 else
2005 new_insn = emit_insn_after (gen_lnop (), insn);
2006 recog_memoized (new_insn);
2007 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2008 }
2009
2010 /* Insert nops in basic blocks to meet dual issue alignment
2011 requirements. Also make sure hbrp and hint instructions are at least
2012 one cycle apart, possibly inserting a nop. */
2013 static void
2014 pad_bb(void)
2015 {
2016 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2017 int length;
2018 int addr;
2019
2020 /* This sets up INSN_ADDRESSES. */
2021 shorten_branches (get_insns ());
2022
2023 /* Keep track of length added by nops. */
2024 length = 0;
2025
2026 prev_insn = 0;
2027 insn = get_insns ();
2028 if (!active_insn_p (insn))
2029 insn = next_active_insn (insn);
2030 for (; insn; insn = next_insn)
2031 {
2032 next_insn = next_active_insn (insn);
2033 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2034 || INSN_CODE (insn) == CODE_FOR_hbr)
2035 {
2036 if (hbr_insn)
2037 {
2038 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2039 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2040 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2041 || (a1 - a0 == 4))
2042 {
2043 prev_insn = emit_insn_before (gen_lnop (), insn);
2044 PUT_MODE (prev_insn, GET_MODE (insn));
2045 PUT_MODE (insn, TImode);
2046 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2047 length += 4;
2048 }
2049 }
2050 hbr_insn = insn;
2051 }
2052 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2053 {
2054 if (GET_MODE (insn) == TImode)
2055 PUT_MODE (next_insn, TImode);
2056 insn = next_insn;
2057 next_insn = next_active_insn (insn);
2058 }
2059 addr = INSN_ADDRESSES (INSN_UID (insn));
2060 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2061 {
2062 if (((addr + length) & 7) != 0)
2063 {
2064 emit_nop_for_insn (prev_insn);
2065 length += 4;
2066 }
2067 }
2068 else if (GET_MODE (insn) == TImode
2069 && ((next_insn && GET_MODE (next_insn) != TImode)
2070 || get_attr_type (insn) == TYPE_MULTI0)
2071 && ((addr + length) & 7) != 0)
2072 {
2073 /* prev_insn will always be set because the first insn is
2074 always 8-byte aligned. */
2075 emit_nop_for_insn (prev_insn);
2076 length += 4;
2077 }
2078 prev_insn = insn;
2079 }
2080 }
2081
2082 \f
2083 /* Routines for branch hints. */
2084
2085 static void
2086 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2087 int distance, sbitmap blocks)
2088 {
2089 rtx_insn *hint;
2090 rtx_insn *insn;
2091 rtx_jump_table_data *table;
2092
2093 if (before == 0 || branch == 0 || target == 0)
2094 return;
2095
2096 /* While scheduling we require hints to be no further than 600, so
2097 we need to enforce that here too */
2098 if (distance > 600)
2099 return;
2100
2101 /* If we have a Basic block note, emit it after the basic block note. */
2102 if (NOTE_INSN_BASIC_BLOCK_P (before))
2103 before = NEXT_INSN (before);
2104
2105 rtx_code_label *branch_label = gen_label_rtx ();
2106 LABEL_NUSES (branch_label)++;
2107 LABEL_PRESERVE_P (branch_label) = 1;
2108 insn = emit_label_before (branch_label, branch);
2109 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2110 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2111
2112 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2113 recog_memoized (hint);
2114 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2115 HINTED_P (branch) = 1;
2116
2117 if (GET_CODE (target) == LABEL_REF)
2118 HINTED_P (XEXP (target, 0)) = 1;
2119 else if (tablejump_p (branch, 0, &table))
2120 {
2121 rtvec vec;
2122 int j;
2123 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2124 vec = XVEC (PATTERN (table), 0);
2125 else
2126 vec = XVEC (PATTERN (table), 1);
2127 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2128 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2129 }
2130
2131 if (distance >= 588)
2132 {
2133 /* Make sure the hint isn't scheduled any earlier than this point,
2134 which could make it too far for the branch offest to fit */
2135 insn = emit_insn_before (gen_blockage (), hint);
2136 recog_memoized (insn);
2137 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2138 }
2139 else if (distance <= 8 * 4)
2140 {
2141 /* To guarantee at least 8 insns between the hint and branch we
2142 insert nops. */
2143 int d;
2144 for (d = distance; d < 8 * 4; d += 4)
2145 {
2146 insn =
2147 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2148 recog_memoized (insn);
2149 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2150 }
2151
2152 /* Make sure any nops inserted aren't scheduled before the hint. */
2153 insn = emit_insn_after (gen_blockage (), hint);
2154 recog_memoized (insn);
2155 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2156
2157 /* Make sure any nops inserted aren't scheduled after the call. */
2158 if (CALL_P (branch) && distance < 8 * 4)
2159 {
2160 insn = emit_insn_before (gen_blockage (), branch);
2161 recog_memoized (insn);
2162 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2163 }
2164 }
2165 }
2166
2167 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2168 the rtx for the branch target. */
2169 static rtx
2170 get_branch_target (rtx_insn *branch)
2171 {
2172 if (JUMP_P (branch))
2173 {
2174 rtx set, src;
2175
2176 /* Return statements */
2177 if (GET_CODE (PATTERN (branch)) == RETURN)
2178 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2179
2180 /* ASM GOTOs. */
2181 if (extract_asm_operands (PATTERN (branch)) != NULL)
2182 return NULL;
2183
2184 set = single_set (branch);
2185 src = SET_SRC (set);
2186 if (GET_CODE (SET_DEST (set)) != PC)
2187 abort ();
2188
2189 if (GET_CODE (src) == IF_THEN_ELSE)
2190 {
2191 rtx lab = 0;
2192 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2193 if (note)
2194 {
2195 /* If the more probable case is not a fall through, then
2196 try a branch hint. */
2197 int prob = profile_probability::from_reg_br_prob_note
2198 (XINT (note, 0)).to_reg_br_prob_base ();
2199 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2200 && GET_CODE (XEXP (src, 1)) != PC)
2201 lab = XEXP (src, 1);
2202 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2203 && GET_CODE (XEXP (src, 2)) != PC)
2204 lab = XEXP (src, 2);
2205 }
2206 if (lab)
2207 {
2208 if (GET_CODE (lab) == RETURN)
2209 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2210 return lab;
2211 }
2212 return 0;
2213 }
2214
2215 return src;
2216 }
2217 else if (CALL_P (branch))
2218 {
2219 rtx call;
2220 /* All of our call patterns are in a PARALLEL and the CALL is
2221 the first pattern in the PARALLEL. */
2222 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2223 abort ();
2224 call = XVECEXP (PATTERN (branch), 0, 0);
2225 if (GET_CODE (call) == SET)
2226 call = SET_SRC (call);
2227 if (GET_CODE (call) != CALL)
2228 abort ();
2229 return XEXP (XEXP (call, 0), 0);
2230 }
2231 return 0;
2232 }
2233
2234 /* The special $hbr register is used to prevent the insn scheduler from
2235 moving hbr insns across instructions which invalidate them. It
2236 should only be used in a clobber, and this function searches for
2237 insns which clobber it. */
2238 static bool
2239 insn_clobbers_hbr (rtx_insn *insn)
2240 {
2241 if (INSN_P (insn)
2242 && GET_CODE (PATTERN (insn)) == PARALLEL)
2243 {
2244 rtx parallel = PATTERN (insn);
2245 rtx clobber;
2246 int j;
2247 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2248 {
2249 clobber = XVECEXP (parallel, 0, j);
2250 if (GET_CODE (clobber) == CLOBBER
2251 && GET_CODE (XEXP (clobber, 0)) == REG
2252 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2253 return 1;
2254 }
2255 }
2256 return 0;
2257 }
2258
2259 /* Search up to 32 insns starting at FIRST:
2260 - at any kind of hinted branch, just return
2261 - at any unconditional branch in the first 15 insns, just return
2262 - at a call or indirect branch, after the first 15 insns, force it to
2263 an even address and return
2264 - at any unconditional branch, after the first 15 insns, force it to
2265 an even address.
2266 At then end of the search, insert an hbrp within 4 insns of FIRST,
2267 and an hbrp within 16 instructions of FIRST.
2268 */
2269 static void
2270 insert_hbrp_for_ilb_runout (rtx_insn *first)
2271 {
2272 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2273 int addr = 0, length, first_addr = -1;
2274 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2275 int insert_lnop_after = 0;
2276 for (insn = first; insn; insn = NEXT_INSN (insn))
2277 if (INSN_P (insn))
2278 {
2279 if (first_addr == -1)
2280 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2281 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2282 length = get_attr_length (insn);
2283
2284 if (before_4 == 0 && addr + length >= 4 * 4)
2285 before_4 = insn;
2286 /* We test for 14 instructions because the first hbrp will add
2287 up to 2 instructions. */
2288 if (before_16 == 0 && addr + length >= 14 * 4)
2289 before_16 = insn;
2290
2291 if (INSN_CODE (insn) == CODE_FOR_hbr)
2292 {
2293 /* Make sure an hbrp is at least 2 cycles away from a hint.
2294 Insert an lnop after the hbrp when necessary. */
2295 if (before_4 == 0 && addr > 0)
2296 {
2297 before_4 = insn;
2298 insert_lnop_after |= 1;
2299 }
2300 else if (before_4 && addr <= 4 * 4)
2301 insert_lnop_after |= 1;
2302 if (before_16 == 0 && addr > 10 * 4)
2303 {
2304 before_16 = insn;
2305 insert_lnop_after |= 2;
2306 }
2307 else if (before_16 && addr <= 14 * 4)
2308 insert_lnop_after |= 2;
2309 }
2310
2311 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2312 {
2313 if (addr < hbrp_addr0)
2314 hbrp_addr0 = addr;
2315 else if (addr < hbrp_addr1)
2316 hbrp_addr1 = addr;
2317 }
2318
2319 if (CALL_P (insn) || JUMP_P (insn))
2320 {
2321 if (HINTED_P (insn))
2322 return;
2323
2324 /* Any branch after the first 15 insns should be on an even
2325 address to avoid a special case branch. There might be
2326 some nops and/or hbrps inserted, so we test after 10
2327 insns. */
2328 if (addr > 10 * 4)
2329 SCHED_ON_EVEN_P (insn) = 1;
2330 }
2331
2332 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2333 return;
2334
2335
2336 if (addr + length >= 32 * 4)
2337 {
2338 gcc_assert (before_4 && before_16);
2339 if (hbrp_addr0 > 4 * 4)
2340 {
2341 insn =
2342 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2343 recog_memoized (insn);
2344 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2345 INSN_ADDRESSES_NEW (insn,
2346 INSN_ADDRESSES (INSN_UID (before_4)));
2347 PUT_MODE (insn, GET_MODE (before_4));
2348 PUT_MODE (before_4, TImode);
2349 if (insert_lnop_after & 1)
2350 {
2351 insn = emit_insn_before (gen_lnop (), before_4);
2352 recog_memoized (insn);
2353 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2354 INSN_ADDRESSES_NEW (insn,
2355 INSN_ADDRESSES (INSN_UID (before_4)));
2356 PUT_MODE (insn, TImode);
2357 }
2358 }
2359 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2360 && hbrp_addr1 > 16 * 4)
2361 {
2362 insn =
2363 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2364 recog_memoized (insn);
2365 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2366 INSN_ADDRESSES_NEW (insn,
2367 INSN_ADDRESSES (INSN_UID (before_16)));
2368 PUT_MODE (insn, GET_MODE (before_16));
2369 PUT_MODE (before_16, TImode);
2370 if (insert_lnop_after & 2)
2371 {
2372 insn = emit_insn_before (gen_lnop (), before_16);
2373 recog_memoized (insn);
2374 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2375 INSN_ADDRESSES_NEW (insn,
2376 INSN_ADDRESSES (INSN_UID
2377 (before_16)));
2378 PUT_MODE (insn, TImode);
2379 }
2380 }
2381 return;
2382 }
2383 }
2384 else if (BARRIER_P (insn))
2385 return;
2386
2387 }
2388
2389 /* The SPU might hang when it executes 48 inline instructions after a
2390 hinted branch jumps to its hinted target. The beginning of a
2391 function and the return from a call might have been hinted, and
2392 must be handled as well. To prevent a hang we insert 2 hbrps. The
2393 first should be within 6 insns of the branch target. The second
2394 should be within 22 insns of the branch target. When determining
2395 if hbrps are necessary, we look for only 32 inline instructions,
2396 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2397 when inserting new hbrps, we insert them within 4 and 16 insns of
2398 the target. */
2399 static void
2400 insert_hbrp (void)
2401 {
2402 rtx_insn *insn;
2403 if (TARGET_SAFE_HINTS)
2404 {
2405 shorten_branches (get_insns ());
2406 /* Insert hbrp at beginning of function */
2407 insn = next_active_insn (get_insns ());
2408 if (insn)
2409 insert_hbrp_for_ilb_runout (insn);
2410 /* Insert hbrp after hinted targets. */
2411 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2412 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2413 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2414 }
2415 }
2416
2417 static int in_spu_reorg;
2418
2419 static void
2420 spu_var_tracking (void)
2421 {
2422 if (flag_var_tracking)
2423 {
2424 df_analyze ();
2425 timevar_push (TV_VAR_TRACKING);
2426 variable_tracking_main ();
2427 timevar_pop (TV_VAR_TRACKING);
2428 df_finish_pass (false);
2429 }
2430 }
2431
2432 /* Insert branch hints. There are no branch optimizations after this
2433 pass, so it's safe to set our branch hints now. */
2434 static void
2435 spu_machine_dependent_reorg (void)
2436 {
2437 sbitmap blocks;
2438 basic_block bb;
2439 rtx_insn *branch, *insn;
2440 rtx branch_target = 0;
2441 int branch_addr = 0, insn_addr, required_dist = 0;
2442 int i;
2443 unsigned int j;
2444
2445 if (!TARGET_BRANCH_HINTS || optimize == 0)
2446 {
2447 /* We still do it for unoptimized code because an external
2448 function might have hinted a call or return. */
2449 compute_bb_for_insn ();
2450 insert_hbrp ();
2451 pad_bb ();
2452 spu_var_tracking ();
2453 free_bb_for_insn ();
2454 return;
2455 }
2456
2457 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2458 bitmap_clear (blocks);
2459
2460 in_spu_reorg = 1;
2461 compute_bb_for_insn ();
2462
2463 /* (Re-)discover loops so that bb->loop_father can be used
2464 in the analysis below. */
2465 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2466
2467 compact_blocks ();
2468
2469 spu_bb_info =
2470 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2471 sizeof (struct spu_bb_info));
2472
2473 /* We need exact insn addresses and lengths. */
2474 shorten_branches (get_insns ());
2475
2476 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2477 {
2478 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2479 branch = 0;
2480 if (spu_bb_info[i].prop_jump)
2481 {
2482 branch = spu_bb_info[i].prop_jump;
2483 branch_target = get_branch_target (branch);
2484 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2485 required_dist = spu_hint_dist;
2486 }
2487 /* Search from end of a block to beginning. In this loop, find
2488 jumps which need a branch and emit them only when:
2489 - it's an indirect branch and we're at the insn which sets
2490 the register
2491 - we're at an insn that will invalidate the hint. e.g., a
2492 call, another hint insn, inline asm that clobbers $hbr, and
2493 some inlined operations (divmodsi4). Don't consider jumps
2494 because they are only at the end of a block and are
2495 considered when we are deciding whether to propagate
2496 - we're getting too far away from the branch. The hbr insns
2497 only have a signed 10 bit offset
2498 We go back as far as possible so the branch will be considered
2499 for propagation when we get to the beginning of the block. */
2500 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2501 {
2502 if (INSN_P (insn))
2503 {
2504 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2505 if (branch
2506 && ((GET_CODE (branch_target) == REG
2507 && set_of (branch_target, insn) != NULL_RTX)
2508 || insn_clobbers_hbr (insn)
2509 || branch_addr - insn_addr > 600))
2510 {
2511 rtx_insn *next = NEXT_INSN (insn);
2512 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2513 if (insn != BB_END (bb)
2514 && branch_addr - next_addr >= required_dist)
2515 {
2516 if (dump_file)
2517 fprintf (dump_file,
2518 "hint for %i in block %i before %i\n",
2519 INSN_UID (branch), bb->index,
2520 INSN_UID (next));
2521 spu_emit_branch_hint (next, branch, branch_target,
2522 branch_addr - next_addr, blocks);
2523 }
2524 branch = 0;
2525 }
2526
2527 /* JUMP_P will only be true at the end of a block. When
2528 branch is already set it means we've previously decided
2529 to propagate a hint for that branch into this block. */
2530 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2531 {
2532 branch = 0;
2533 if ((branch_target = get_branch_target (insn)))
2534 {
2535 branch = insn;
2536 branch_addr = insn_addr;
2537 required_dist = spu_hint_dist;
2538 }
2539 }
2540 }
2541 if (insn == BB_HEAD (bb))
2542 break;
2543 }
2544
2545 if (branch)
2546 {
2547 /* If we haven't emitted a hint for this branch yet, it might
2548 be profitable to emit it in one of the predecessor blocks,
2549 especially for loops. */
2550 rtx_insn *bbend;
2551 basic_block prev = 0, prop = 0, prev2 = 0;
2552 int loop_exit = 0, simple_loop = 0;
2553 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2554
2555 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2556 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2557 prev = EDGE_PRED (bb, j)->src;
2558 else
2559 prev2 = EDGE_PRED (bb, j)->src;
2560
2561 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2562 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2563 loop_exit = 1;
2564 else if (EDGE_SUCC (bb, j)->dest == bb)
2565 simple_loop = 1;
2566
2567 /* If this branch is a loop exit then propagate to previous
2568 fallthru block. This catches the cases when it is a simple
2569 loop or when there is an initial branch into the loop. */
2570 if (prev && (loop_exit || simple_loop)
2571 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2572 prop = prev;
2573
2574 /* If there is only one adjacent predecessor. Don't propagate
2575 outside this loop. */
2576 else if (prev && single_pred_p (bb)
2577 && prev->loop_father == bb->loop_father)
2578 prop = prev;
2579
2580 /* If this is the JOIN block of a simple IF-THEN then
2581 propagate the hint to the HEADER block. */
2582 else if (prev && prev2
2583 && EDGE_COUNT (bb->preds) == 2
2584 && EDGE_COUNT (prev->preds) == 1
2585 && EDGE_PRED (prev, 0)->src == prev2
2586 && prev2->loop_father == bb->loop_father
2587 && GET_CODE (branch_target) != REG)
2588 prop = prev;
2589
2590 /* Don't propagate when:
2591 - this is a simple loop and the hint would be too far
2592 - this is not a simple loop and there are 16 insns in
2593 this block already
2594 - the predecessor block ends in a branch that will be
2595 hinted
2596 - the predecessor block ends in an insn that invalidates
2597 the hint */
2598 if (prop
2599 && prop->index >= 0
2600 && (bbend = BB_END (prop))
2601 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2602 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2603 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2604 {
2605 if (dump_file)
2606 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2607 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2608 bb->index, prop->index, bb_loop_depth (bb),
2609 INSN_UID (branch), loop_exit, simple_loop,
2610 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2611
2612 spu_bb_info[prop->index].prop_jump = branch;
2613 spu_bb_info[prop->index].bb_index = i;
2614 }
2615 else if (branch_addr - next_addr >= required_dist)
2616 {
2617 if (dump_file)
2618 fprintf (dump_file, "hint for %i in block %i before %i\n",
2619 INSN_UID (branch), bb->index,
2620 INSN_UID (NEXT_INSN (insn)));
2621 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2622 branch_addr - next_addr, blocks);
2623 }
2624 branch = 0;
2625 }
2626 }
2627 free (spu_bb_info);
2628
2629 if (!bitmap_empty_p (blocks))
2630 find_many_sub_basic_blocks (blocks);
2631
2632 /* We have to schedule to make sure alignment is ok. */
2633 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2634
2635 /* The hints need to be scheduled, so call it again. */
2636 schedule_insns ();
2637 df_finish_pass (true);
2638
2639 insert_hbrp ();
2640
2641 pad_bb ();
2642
2643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2644 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2645 {
2646 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2647 between its branch label and the branch . We don't move the
2648 label because GCC expects it at the beginning of the block. */
2649 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2650 rtx label_ref = XVECEXP (unspec, 0, 0);
2651 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2652 rtx_insn *branch;
2653 int offset = 0;
2654 for (branch = NEXT_INSN (label);
2655 !JUMP_P (branch) && !CALL_P (branch);
2656 branch = NEXT_INSN (branch))
2657 if (NONJUMP_INSN_P (branch))
2658 offset += get_attr_length (branch);
2659 if (offset > 0)
2660 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2661 }
2662
2663 spu_var_tracking ();
2664
2665 loop_optimizer_finalize ();
2666
2667 free_bb_for_insn ();
2668
2669 in_spu_reorg = 0;
2670 }
2671 \f
2672
2673 /* Insn scheduling routines, primarily for dual issue. */
2674 static int
2675 spu_sched_issue_rate (void)
2676 {
2677 return 2;
2678 }
2679
2680 static int
2681 uses_ls_unit(rtx_insn *insn)
2682 {
2683 rtx set = single_set (insn);
2684 if (set != 0
2685 && (GET_CODE (SET_DEST (set)) == MEM
2686 || GET_CODE (SET_SRC (set)) == MEM))
2687 return 1;
2688 return 0;
2689 }
2690
2691 static int
2692 get_pipe (rtx_insn *insn)
2693 {
2694 enum attr_type t;
2695 /* Handle inline asm */
2696 if (INSN_CODE (insn) == -1)
2697 return -1;
2698 t = get_attr_type (insn);
2699 switch (t)
2700 {
2701 case TYPE_CONVERT:
2702 return -2;
2703 case TYPE_MULTI0:
2704 return -1;
2705
2706 case TYPE_FX2:
2707 case TYPE_FX3:
2708 case TYPE_SPR:
2709 case TYPE_NOP:
2710 case TYPE_FXB:
2711 case TYPE_FPD:
2712 case TYPE_FP6:
2713 case TYPE_FP7:
2714 return 0;
2715
2716 case TYPE_LNOP:
2717 case TYPE_SHUF:
2718 case TYPE_LOAD:
2719 case TYPE_STORE:
2720 case TYPE_BR:
2721 case TYPE_MULTI1:
2722 case TYPE_HBR:
2723 case TYPE_IPREFETCH:
2724 return 1;
2725 default:
2726 abort ();
2727 }
2728 }
2729
2730
2731 /* haifa-sched.c has a static variable that keeps track of the current
2732 cycle. It is passed to spu_sched_reorder, and we record it here for
2733 use by spu_sched_variable_issue. It won't be accurate if the
2734 scheduler updates it's clock_var between the two calls. */
2735 static int clock_var;
2736
2737 /* This is used to keep track of insn alignment. Set to 0 at the
2738 beginning of each block and increased by the "length" attr of each
2739 insn scheduled. */
2740 static int spu_sched_length;
2741
2742 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2743 ready list appropriately in spu_sched_reorder(). */
2744 static int pipe0_clock;
2745 static int pipe1_clock;
2746
2747 static int prev_clock_var;
2748
2749 static int prev_priority;
2750
2751 /* The SPU needs to load the next ilb sometime during the execution of
2752 the previous ilb. There is a potential conflict if every cycle has a
2753 load or store. To avoid the conflict we make sure the load/store
2754 unit is free for at least one cycle during the execution of insns in
2755 the previous ilb. */
2756 static int spu_ls_first;
2757 static int prev_ls_clock;
2758
2759 static void
2760 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2761 int max_ready ATTRIBUTE_UNUSED)
2762 {
2763 spu_sched_length = 0;
2764 }
2765
2766 static void
2767 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2768 int max_ready ATTRIBUTE_UNUSED)
2769 {
2770 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2771 {
2772 /* When any block might be at least 8-byte aligned, assume they
2773 will all be at least 8-byte aligned to make sure dual issue
2774 works out correctly. */
2775 spu_sched_length = 0;
2776 }
2777 spu_ls_first = INT_MAX;
2778 clock_var = -1;
2779 prev_ls_clock = -1;
2780 pipe0_clock = -1;
2781 pipe1_clock = -1;
2782 prev_clock_var = -1;
2783 prev_priority = -1;
2784 }
2785
2786 static int
2787 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2788 int verbose ATTRIBUTE_UNUSED,
2789 rtx_insn *insn, int more)
2790 {
2791 int len;
2792 int p;
2793 if (GET_CODE (PATTERN (insn)) == USE
2794 || GET_CODE (PATTERN (insn)) == CLOBBER
2795 || (len = get_attr_length (insn)) == 0)
2796 return more;
2797
2798 spu_sched_length += len;
2799
2800 /* Reset on inline asm */
2801 if (INSN_CODE (insn) == -1)
2802 {
2803 spu_ls_first = INT_MAX;
2804 pipe0_clock = -1;
2805 pipe1_clock = -1;
2806 return 0;
2807 }
2808 p = get_pipe (insn);
2809 if (p == 0)
2810 pipe0_clock = clock_var;
2811 else
2812 pipe1_clock = clock_var;
2813
2814 if (in_spu_reorg)
2815 {
2816 if (clock_var - prev_ls_clock > 1
2817 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2818 spu_ls_first = INT_MAX;
2819 if (uses_ls_unit (insn))
2820 {
2821 if (spu_ls_first == INT_MAX)
2822 spu_ls_first = spu_sched_length;
2823 prev_ls_clock = clock_var;
2824 }
2825
2826 /* The scheduler hasn't inserted the nop, but we will later on.
2827 Include those nops in spu_sched_length. */
2828 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2829 spu_sched_length += 4;
2830 prev_clock_var = clock_var;
2831
2832 /* more is -1 when called from spu_sched_reorder for new insns
2833 that don't have INSN_PRIORITY */
2834 if (more >= 0)
2835 prev_priority = INSN_PRIORITY (insn);
2836 }
2837
2838 /* Always try issuing more insns. spu_sched_reorder will decide
2839 when the cycle should be advanced. */
2840 return 1;
2841 }
2842
2843 /* This function is called for both TARGET_SCHED_REORDER and
2844 TARGET_SCHED_REORDER2. */
2845 static int
2846 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2847 rtx_insn **ready, int *nreadyp, int clock)
2848 {
2849 int i, nready = *nreadyp;
2850 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2851 rtx_insn *insn;
2852
2853 clock_var = clock;
2854
2855 if (nready <= 0 || pipe1_clock >= clock)
2856 return 0;
2857
2858 /* Find any rtl insns that don't generate assembly insns and schedule
2859 them first. */
2860 for (i = nready - 1; i >= 0; i--)
2861 {
2862 insn = ready[i];
2863 if (INSN_CODE (insn) == -1
2864 || INSN_CODE (insn) == CODE_FOR_blockage
2865 || (INSN_P (insn) && get_attr_length (insn) == 0))
2866 {
2867 ready[i] = ready[nready - 1];
2868 ready[nready - 1] = insn;
2869 return 1;
2870 }
2871 }
2872
2873 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2874 for (i = 0; i < nready; i++)
2875 if (INSN_CODE (ready[i]) != -1)
2876 {
2877 insn = ready[i];
2878 switch (get_attr_type (insn))
2879 {
2880 default:
2881 case TYPE_MULTI0:
2882 case TYPE_CONVERT:
2883 case TYPE_FX2:
2884 case TYPE_FX3:
2885 case TYPE_SPR:
2886 case TYPE_NOP:
2887 case TYPE_FXB:
2888 case TYPE_FPD:
2889 case TYPE_FP6:
2890 case TYPE_FP7:
2891 pipe_0 = i;
2892 break;
2893 case TYPE_LOAD:
2894 case TYPE_STORE:
2895 pipe_ls = i;
2896 /* FALLTHRU */
2897 case TYPE_LNOP:
2898 case TYPE_SHUF:
2899 case TYPE_BR:
2900 case TYPE_MULTI1:
2901 case TYPE_HBR:
2902 pipe_1 = i;
2903 break;
2904 case TYPE_IPREFETCH:
2905 pipe_hbrp = i;
2906 break;
2907 }
2908 }
2909
2910 /* In the first scheduling phase, schedule loads and stores together
2911 to increase the chance they will get merged during postreload CSE. */
2912 if (!reload_completed && pipe_ls >= 0)
2913 {
2914 insn = ready[pipe_ls];
2915 ready[pipe_ls] = ready[nready - 1];
2916 ready[nready - 1] = insn;
2917 return 1;
2918 }
2919
2920 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2921 if (pipe_hbrp >= 0)
2922 pipe_1 = pipe_hbrp;
2923
2924 /* When we have loads/stores in every cycle of the last 15 insns and
2925 we are about to schedule another load/store, emit an hbrp insn
2926 instead. */
2927 if (in_spu_reorg
2928 && spu_sched_length - spu_ls_first >= 4 * 15
2929 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2930 {
2931 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2932 recog_memoized (insn);
2933 if (pipe0_clock < clock)
2934 PUT_MODE (insn, TImode);
2935 spu_sched_variable_issue (file, verbose, insn, -1);
2936 return 0;
2937 }
2938
2939 /* In general, we want to emit nops to increase dual issue, but dual
2940 issue isn't faster when one of the insns could be scheduled later
2941 without effecting the critical path. We look at INSN_PRIORITY to
2942 make a good guess, but it isn't perfect so -mdual-nops=n can be
2943 used to effect it. */
2944 if (in_spu_reorg && spu_dual_nops < 10)
2945 {
2946 /* When we are at an even address and we are not issuing nops to
2947 improve scheduling then we need to advance the cycle. */
2948 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2949 && (spu_dual_nops == 0
2950 || (pipe_1 != -1
2951 && prev_priority >
2952 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2953 return 0;
2954
2955 /* When at an odd address, schedule the highest priority insn
2956 without considering pipeline. */
2957 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2958 && (spu_dual_nops == 0
2959 || (prev_priority >
2960 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2961 return 1;
2962 }
2963
2964
2965 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2966 pipe0 insn in the ready list, schedule it. */
2967 if (pipe0_clock < clock && pipe_0 >= 0)
2968 schedule_i = pipe_0;
2969
2970 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2971 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2972 else
2973 schedule_i = pipe_1;
2974
2975 if (schedule_i > -1)
2976 {
2977 insn = ready[schedule_i];
2978 ready[schedule_i] = ready[nready - 1];
2979 ready[nready - 1] = insn;
2980 return 1;
2981 }
2982 return 0;
2983 }
2984
2985 /* INSN is dependent on DEP_INSN. */
2986 static int
2987 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2988 int cost, unsigned int)
2989 {
2990 rtx set;
2991
2992 /* The blockage pattern is used to prevent instructions from being
2993 moved across it and has no cost. */
2994 if (INSN_CODE (insn) == CODE_FOR_blockage
2995 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2996 return 0;
2997
2998 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2999 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3000 return 0;
3001
3002 /* Make sure hbrps are spread out. */
3003 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3004 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3005 return 8;
3006
3007 /* Make sure hints and hbrps are 2 cycles apart. */
3008 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3009 || INSN_CODE (insn) == CODE_FOR_hbr)
3010 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3011 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3012 return 2;
3013
3014 /* An hbrp has no real dependency on other insns. */
3015 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3016 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3017 return 0;
3018
3019 /* Assuming that it is unlikely an argument register will be used in
3020 the first cycle of the called function, we reduce the cost for
3021 slightly better scheduling of dep_insn. When not hinted, the
3022 mispredicted branch would hide the cost as well. */
3023 if (CALL_P (insn))
3024 {
3025 rtx target = get_branch_target (insn);
3026 if (GET_CODE (target) != REG || !set_of (target, insn))
3027 return cost - 2;
3028 return cost;
3029 }
3030
3031 /* And when returning from a function, let's assume the return values
3032 are completed sooner too. */
3033 if (CALL_P (dep_insn))
3034 return cost - 2;
3035
3036 /* Make sure an instruction that loads from the back chain is schedule
3037 away from the return instruction so a hint is more likely to get
3038 issued. */
3039 if (INSN_CODE (insn) == CODE_FOR__return
3040 && (set = single_set (dep_insn))
3041 && GET_CODE (SET_DEST (set)) == REG
3042 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3043 return 20;
3044
3045 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3046 scheduler makes every insn in a block anti-dependent on the final
3047 jump_insn. We adjust here so higher cost insns will get scheduled
3048 earlier. */
3049 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3050 return insn_cost (dep_insn) - 3;
3051
3052 return cost;
3053 }
3054 \f
3055 /* Create a CONST_DOUBLE from a string. */
3056 rtx
3057 spu_float_const (const char *string, machine_mode mode)
3058 {
3059 REAL_VALUE_TYPE value;
3060 value = REAL_VALUE_ATOF (string, mode);
3061 return const_double_from_real_value (value, mode);
3062 }
3063
3064 int
3065 spu_constant_address_p (rtx x)
3066 {
3067 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3068 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3069 || GET_CODE (x) == HIGH);
3070 }
3071
3072 static enum spu_immediate
3073 which_immediate_load (HOST_WIDE_INT val)
3074 {
3075 gcc_assert (val == trunc_int_for_mode (val, SImode));
3076
3077 if (val >= -0x8000 && val <= 0x7fff)
3078 return SPU_IL;
3079 if (val >= 0 && val <= 0x3ffff)
3080 return SPU_ILA;
3081 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3082 return SPU_ILH;
3083 if ((val & 0xffff) == 0)
3084 return SPU_ILHU;
3085
3086 return SPU_NONE;
3087 }
3088
3089 /* Return true when OP can be loaded by one of the il instructions, or
3090 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3091 int
3092 immediate_load_p (rtx op, machine_mode mode)
3093 {
3094 if (CONSTANT_P (op))
3095 {
3096 enum immediate_class c = classify_immediate (op, mode);
3097 return c == IC_IL1 || c == IC_IL1s
3098 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3099 }
3100 return 0;
3101 }
3102
3103 /* Return true if the first SIZE bytes of arr is a constant that can be
3104 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3105 represent the size and offset of the instruction to use. */
3106 static int
3107 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3108 {
3109 int cpat, run, i, start;
3110 cpat = 1;
3111 run = 0;
3112 start = -1;
3113 for (i = 0; i < size && cpat; i++)
3114 if (arr[i] != i+16)
3115 {
3116 if (!run)
3117 {
3118 start = i;
3119 if (arr[i] == 3)
3120 run = 1;
3121 else if (arr[i] == 2 && arr[i+1] == 3)
3122 run = 2;
3123 else if (arr[i] == 0)
3124 {
3125 while (arr[i+run] == run && i+run < 16)
3126 run++;
3127 if (run != 4 && run != 8)
3128 cpat = 0;
3129 }
3130 else
3131 cpat = 0;
3132 if ((i & (run-1)) != 0)
3133 cpat = 0;
3134 i += run;
3135 }
3136 else
3137 cpat = 0;
3138 }
3139 if (cpat && (run || size < 16))
3140 {
3141 if (run == 0)
3142 run = 1;
3143 if (prun)
3144 *prun = run;
3145 if (pstart)
3146 *pstart = start == -1 ? 16-run : start;
3147 return 1;
3148 }
3149 return 0;
3150 }
3151
3152 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3153 it into a register. MODE is only valid when OP is a CONST_INT. */
3154 static enum immediate_class
3155 classify_immediate (rtx op, machine_mode mode)
3156 {
3157 HOST_WIDE_INT val;
3158 unsigned char arr[16];
3159 int i, j, repeated, fsmbi, repeat;
3160
3161 gcc_assert (CONSTANT_P (op));
3162
3163 if (GET_MODE (op) != VOIDmode)
3164 mode = GET_MODE (op);
3165
3166 /* A V4SI const_vector with all identical symbols is ok. */
3167 if (!flag_pic
3168 && mode == V4SImode
3169 && GET_CODE (op) == CONST_VECTOR
3170 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3171 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3172 op = unwrap_const_vec_duplicate (op);
3173
3174 switch (GET_CODE (op))
3175 {
3176 case SYMBOL_REF:
3177 case LABEL_REF:
3178 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3179
3180 case CONST:
3181 /* We can never know if the resulting address fits in 18 bits and can be
3182 loaded with ila. For now, assume the address will not overflow if
3183 the displacement is "small" (fits 'K' constraint). */
3184 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3185 {
3186 rtx sym = XEXP (XEXP (op, 0), 0);
3187 rtx cst = XEXP (XEXP (op, 0), 1);
3188
3189 if (GET_CODE (sym) == SYMBOL_REF
3190 && GET_CODE (cst) == CONST_INT
3191 && satisfies_constraint_K (cst))
3192 return IC_IL1s;
3193 }
3194 return IC_IL2s;
3195
3196 case HIGH:
3197 return IC_IL1s;
3198
3199 case CONST_VECTOR:
3200 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3201 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3202 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3203 return IC_POOL;
3204 /* Fall through. */
3205
3206 case CONST_INT:
3207 case CONST_DOUBLE:
3208 constant_to_array (mode, op, arr);
3209
3210 /* Check that each 4-byte slot is identical. */
3211 repeated = 1;
3212 for (i = 4; i < 16; i += 4)
3213 for (j = 0; j < 4; j++)
3214 if (arr[j] != arr[i + j])
3215 repeated = 0;
3216
3217 if (repeated)
3218 {
3219 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3220 val = trunc_int_for_mode (val, SImode);
3221
3222 if (which_immediate_load (val) != SPU_NONE)
3223 return IC_IL1;
3224 }
3225
3226 /* Any mode of 2 bytes or smaller can be loaded with an il
3227 instruction. */
3228 gcc_assert (GET_MODE_SIZE (mode) > 2);
3229
3230 fsmbi = 1;
3231 repeat = 0;
3232 for (i = 0; i < 16 && fsmbi; i++)
3233 if (arr[i] != 0 && repeat == 0)
3234 repeat = arr[i];
3235 else if (arr[i] != 0 && arr[i] != repeat)
3236 fsmbi = 0;
3237 if (fsmbi)
3238 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3239
3240 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3241 return IC_CPAT;
3242
3243 if (repeated)
3244 return IC_IL2;
3245
3246 return IC_POOL;
3247 default:
3248 break;
3249 }
3250 gcc_unreachable ();
3251 }
3252
3253 static enum spu_immediate
3254 which_logical_immediate (HOST_WIDE_INT val)
3255 {
3256 gcc_assert (val == trunc_int_for_mode (val, SImode));
3257
3258 if (val >= -0x200 && val <= 0x1ff)
3259 return SPU_ORI;
3260 if (val >= 0 && val <= 0xffff)
3261 return SPU_IOHL;
3262 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3263 {
3264 val = trunc_int_for_mode (val, HImode);
3265 if (val >= -0x200 && val <= 0x1ff)
3266 return SPU_ORHI;
3267 if ((val & 0xff) == ((val >> 8) & 0xff))
3268 {
3269 val = trunc_int_for_mode (val, QImode);
3270 if (val >= -0x200 && val <= 0x1ff)
3271 return SPU_ORBI;
3272 }
3273 }
3274 return SPU_NONE;
3275 }
3276
3277 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3278 CONST_DOUBLEs. */
3279 static int
3280 const_vector_immediate_p (rtx x)
3281 {
3282 int i;
3283 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3284 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3285 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3286 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3287 return 0;
3288 return 1;
3289 }
3290
3291 int
3292 logical_immediate_p (rtx op, machine_mode mode)
3293 {
3294 HOST_WIDE_INT val;
3295 unsigned char arr[16];
3296 int i, j;
3297
3298 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3299 || GET_CODE (op) == CONST_VECTOR);
3300
3301 if (GET_CODE (op) == CONST_VECTOR
3302 && !const_vector_immediate_p (op))
3303 return 0;
3304
3305 if (GET_MODE (op) != VOIDmode)
3306 mode = GET_MODE (op);
3307
3308 constant_to_array (mode, op, arr);
3309
3310 /* Check that bytes are repeated. */
3311 for (i = 4; i < 16; i += 4)
3312 for (j = 0; j < 4; j++)
3313 if (arr[j] != arr[i + j])
3314 return 0;
3315
3316 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3317 val = trunc_int_for_mode (val, SImode);
3318
3319 i = which_logical_immediate (val);
3320 return i != SPU_NONE && i != SPU_IOHL;
3321 }
3322
3323 int
3324 iohl_immediate_p (rtx op, machine_mode mode)
3325 {
3326 HOST_WIDE_INT val;
3327 unsigned char arr[16];
3328 int i, j;
3329
3330 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3331 || GET_CODE (op) == CONST_VECTOR);
3332
3333 if (GET_CODE (op) == CONST_VECTOR
3334 && !const_vector_immediate_p (op))
3335 return 0;
3336
3337 if (GET_MODE (op) != VOIDmode)
3338 mode = GET_MODE (op);
3339
3340 constant_to_array (mode, op, arr);
3341
3342 /* Check that bytes are repeated. */
3343 for (i = 4; i < 16; i += 4)
3344 for (j = 0; j < 4; j++)
3345 if (arr[j] != arr[i + j])
3346 return 0;
3347
3348 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3349 val = trunc_int_for_mode (val, SImode);
3350
3351 return val >= 0 && val <= 0xffff;
3352 }
3353
3354 int
3355 arith_immediate_p (rtx op, machine_mode mode,
3356 HOST_WIDE_INT low, HOST_WIDE_INT high)
3357 {
3358 HOST_WIDE_INT val;
3359 unsigned char arr[16];
3360 int bytes, i, j;
3361
3362 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3363 || GET_CODE (op) == CONST_VECTOR);
3364
3365 if (GET_CODE (op) == CONST_VECTOR
3366 && !const_vector_immediate_p (op))
3367 return 0;
3368
3369 if (GET_MODE (op) != VOIDmode)
3370 mode = GET_MODE (op);
3371
3372 constant_to_array (mode, op, arr);
3373
3374 bytes = GET_MODE_UNIT_SIZE (mode);
3375 mode = mode_for_size (GET_MODE_UNIT_BITSIZE (mode), MODE_INT, 0);
3376
3377 /* Check that bytes are repeated. */
3378 for (i = bytes; i < 16; i += bytes)
3379 for (j = 0; j < bytes; j++)
3380 if (arr[j] != arr[i + j])
3381 return 0;
3382
3383 val = arr[0];
3384 for (j = 1; j < bytes; j++)
3385 val = (val << 8) | arr[j];
3386
3387 val = trunc_int_for_mode (val, mode);
3388
3389 return val >= low && val <= high;
3390 }
3391
3392 /* TRUE when op is an immediate and an exact power of 2, and given that
3393 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3394 all entries must be the same. */
3395 bool
3396 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3397 {
3398 machine_mode int_mode;
3399 HOST_WIDE_INT val;
3400 unsigned char arr[16];
3401 int bytes, i, j;
3402
3403 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3404 || GET_CODE (op) == CONST_VECTOR);
3405
3406 if (GET_CODE (op) == CONST_VECTOR
3407 && !const_vector_immediate_p (op))
3408 return 0;
3409
3410 if (GET_MODE (op) != VOIDmode)
3411 mode = GET_MODE (op);
3412
3413 constant_to_array (mode, op, arr);
3414
3415 mode = GET_MODE_INNER (mode);
3416
3417 bytes = GET_MODE_SIZE (mode);
3418 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3419
3420 /* Check that bytes are repeated. */
3421 for (i = bytes; i < 16; i += bytes)
3422 for (j = 0; j < bytes; j++)
3423 if (arr[j] != arr[i + j])
3424 return 0;
3425
3426 val = arr[0];
3427 for (j = 1; j < bytes; j++)
3428 val = (val << 8) | arr[j];
3429
3430 val = trunc_int_for_mode (val, int_mode);
3431
3432 /* Currently, we only handle SFmode */
3433 gcc_assert (mode == SFmode);
3434 if (mode == SFmode)
3435 {
3436 int exp = (val >> 23) - 127;
3437 return val > 0 && (val & 0x007fffff) == 0
3438 && exp >= low && exp <= high;
3439 }
3440 return FALSE;
3441 }
3442
3443 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3444
3445 static bool
3446 ea_symbol_ref_p (const_rtx x)
3447 {
3448 tree decl;
3449
3450 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3451 {
3452 rtx plus = XEXP (x, 0);
3453 rtx op0 = XEXP (plus, 0);
3454 rtx op1 = XEXP (plus, 1);
3455 if (GET_CODE (op1) == CONST_INT)
3456 x = op0;
3457 }
3458
3459 return (GET_CODE (x) == SYMBOL_REF
3460 && (decl = SYMBOL_REF_DECL (x)) != 0
3461 && TREE_CODE (decl) == VAR_DECL
3462 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3463 }
3464
3465 /* We accept:
3466 - any 32-bit constant (SImode, SFmode)
3467 - any constant that can be generated with fsmbi (any mode)
3468 - a 64-bit constant where the high and low bits are identical
3469 (DImode, DFmode)
3470 - a 128-bit constant where the four 32-bit words match. */
3471 bool
3472 spu_legitimate_constant_p (machine_mode mode, rtx x)
3473 {
3474 subrtx_iterator::array_type array;
3475 if (GET_CODE (x) == HIGH)
3476 x = XEXP (x, 0);
3477
3478 /* Reject any __ea qualified reference. These can't appear in
3479 instructions but must be forced to the constant pool. */
3480 FOR_EACH_SUBRTX (iter, array, x, ALL)
3481 if (ea_symbol_ref_p (*iter))
3482 return 0;
3483
3484 /* V4SI with all identical symbols is valid. */
3485 if (!flag_pic
3486 && mode == V4SImode
3487 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3488 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3489 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3490 return const_vec_duplicate_p (x);
3491
3492 if (GET_CODE (x) == CONST_VECTOR
3493 && !const_vector_immediate_p (x))
3494 return 0;
3495 return 1;
3496 }
3497
3498 /* Valid address are:
3499 - symbol_ref, label_ref, const
3500 - reg
3501 - reg + const_int, where const_int is 16 byte aligned
3502 - reg + reg, alignment doesn't matter
3503 The alignment matters in the reg+const case because lqd and stqd
3504 ignore the 4 least significant bits of the const. We only care about
3505 16 byte modes because the expand phase will change all smaller MEM
3506 references to TImode. */
3507 static bool
3508 spu_legitimate_address_p (machine_mode mode,
3509 rtx x, bool reg_ok_strict)
3510 {
3511 int aligned = GET_MODE_SIZE (mode) >= 16;
3512 if (aligned
3513 && GET_CODE (x) == AND
3514 && GET_CODE (XEXP (x, 1)) == CONST_INT
3515 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3516 x = XEXP (x, 0);
3517 switch (GET_CODE (x))
3518 {
3519 case LABEL_REF:
3520 return !TARGET_LARGE_MEM;
3521
3522 case SYMBOL_REF:
3523 case CONST:
3524 /* Keep __ea references until reload so that spu_expand_mov can see them
3525 in MEMs. */
3526 if (ea_symbol_ref_p (x))
3527 return !reload_in_progress && !reload_completed;
3528 return !TARGET_LARGE_MEM;
3529
3530 case CONST_INT:
3531 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3532
3533 case SUBREG:
3534 x = XEXP (x, 0);
3535 if (!REG_P (x))
3536 return 0;
3537 /* FALLTHRU */
3538
3539 case REG:
3540 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3541
3542 case PLUS:
3543 case LO_SUM:
3544 {
3545 rtx op0 = XEXP (x, 0);
3546 rtx op1 = XEXP (x, 1);
3547 if (GET_CODE (op0) == SUBREG)
3548 op0 = XEXP (op0, 0);
3549 if (GET_CODE (op1) == SUBREG)
3550 op1 = XEXP (op1, 0);
3551 if (GET_CODE (op0) == REG
3552 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3553 && GET_CODE (op1) == CONST_INT
3554 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3555 /* If virtual registers are involved, the displacement will
3556 change later on anyway, so checking would be premature.
3557 Reload will make sure the final displacement after
3558 register elimination is OK. */
3559 || op0 == arg_pointer_rtx
3560 || op0 == frame_pointer_rtx
3561 || op0 == virtual_stack_vars_rtx)
3562 && (!aligned || (INTVAL (op1) & 15) == 0))
3563 return TRUE;
3564 if (GET_CODE (op0) == REG
3565 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3566 && GET_CODE (op1) == REG
3567 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3568 return TRUE;
3569 }
3570 break;
3571
3572 default:
3573 break;
3574 }
3575 return FALSE;
3576 }
3577
3578 /* Like spu_legitimate_address_p, except with named addresses. */
3579 static bool
3580 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3581 bool reg_ok_strict, addr_space_t as)
3582 {
3583 if (as == ADDR_SPACE_EA)
3584 return (REG_P (x) && (GET_MODE (x) == EAmode));
3585
3586 else if (as != ADDR_SPACE_GENERIC)
3587 gcc_unreachable ();
3588
3589 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3590 }
3591
3592 /* When the address is reg + const_int, force the const_int into a
3593 register. */
3594 static rtx
3595 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3596 machine_mode mode ATTRIBUTE_UNUSED)
3597 {
3598 rtx op0, op1;
3599 /* Make sure both operands are registers. */
3600 if (GET_CODE (x) == PLUS)
3601 {
3602 op0 = XEXP (x, 0);
3603 op1 = XEXP (x, 1);
3604 if (ALIGNED_SYMBOL_REF_P (op0))
3605 {
3606 op0 = force_reg (Pmode, op0);
3607 mark_reg_pointer (op0, 128);
3608 }
3609 else if (GET_CODE (op0) != REG)
3610 op0 = force_reg (Pmode, op0);
3611 if (ALIGNED_SYMBOL_REF_P (op1))
3612 {
3613 op1 = force_reg (Pmode, op1);
3614 mark_reg_pointer (op1, 128);
3615 }
3616 else if (GET_CODE (op1) != REG)
3617 op1 = force_reg (Pmode, op1);
3618 x = gen_rtx_PLUS (Pmode, op0, op1);
3619 }
3620 return x;
3621 }
3622
3623 /* Like spu_legitimate_address, except with named address support. */
3624 static rtx
3625 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3626 addr_space_t as)
3627 {
3628 if (as != ADDR_SPACE_GENERIC)
3629 return x;
3630
3631 return spu_legitimize_address (x, oldx, mode);
3632 }
3633
3634 /* Reload reg + const_int for out-of-range displacements. */
3635 rtx
3636 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3637 int opnum, int type)
3638 {
3639 bool removed_and = false;
3640
3641 if (GET_CODE (ad) == AND
3642 && CONST_INT_P (XEXP (ad, 1))
3643 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3644 {
3645 ad = XEXP (ad, 0);
3646 removed_and = true;
3647 }
3648
3649 if (GET_CODE (ad) == PLUS
3650 && REG_P (XEXP (ad, 0))
3651 && CONST_INT_P (XEXP (ad, 1))
3652 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3653 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3654 {
3655 /* Unshare the sum. */
3656 ad = copy_rtx (ad);
3657
3658 /* Reload the displacement. */
3659 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3660 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3661 opnum, (enum reload_type) type);
3662
3663 /* Add back AND for alignment if we stripped it. */
3664 if (removed_and)
3665 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3666
3667 return ad;
3668 }
3669
3670 return NULL_RTX;
3671 }
3672
3673 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3674 struct attribute_spec.handler. */
3675 static tree
3676 spu_handle_fndecl_attribute (tree * node,
3677 tree name,
3678 tree args ATTRIBUTE_UNUSED,
3679 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3680 {
3681 if (TREE_CODE (*node) != FUNCTION_DECL)
3682 {
3683 warning (0, "%qE attribute only applies to functions",
3684 name);
3685 *no_add_attrs = true;
3686 }
3687
3688 return NULL_TREE;
3689 }
3690
3691 /* Handle the "vector" attribute. */
3692 static tree
3693 spu_handle_vector_attribute (tree * node, tree name,
3694 tree args ATTRIBUTE_UNUSED,
3695 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3696 {
3697 tree type = *node, result = NULL_TREE;
3698 machine_mode mode;
3699 int unsigned_p;
3700
3701 while (POINTER_TYPE_P (type)
3702 || TREE_CODE (type) == FUNCTION_TYPE
3703 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3704 type = TREE_TYPE (type);
3705
3706 mode = TYPE_MODE (type);
3707
3708 unsigned_p = TYPE_UNSIGNED (type);
3709 switch (mode)
3710 {
3711 case E_DImode:
3712 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3713 break;
3714 case E_SImode:
3715 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3716 break;
3717 case E_HImode:
3718 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3719 break;
3720 case E_QImode:
3721 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3722 break;
3723 case E_SFmode:
3724 result = V4SF_type_node;
3725 break;
3726 case E_DFmode:
3727 result = V2DF_type_node;
3728 break;
3729 default:
3730 break;
3731 }
3732
3733 /* Propagate qualifiers attached to the element type
3734 onto the vector type. */
3735 if (result && result != type && TYPE_QUALS (type))
3736 result = build_qualified_type (result, TYPE_QUALS (type));
3737
3738 *no_add_attrs = true; /* No need to hang on to the attribute. */
3739
3740 if (!result)
3741 warning (0, "%qE attribute ignored", name);
3742 else
3743 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3744
3745 return NULL_TREE;
3746 }
3747
3748 /* Return nonzero if FUNC is a naked function. */
3749 static int
3750 spu_naked_function_p (tree func)
3751 {
3752 tree a;
3753
3754 if (TREE_CODE (func) != FUNCTION_DECL)
3755 abort ();
3756
3757 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3758 return a != NULL_TREE;
3759 }
3760
3761 int
3762 spu_initial_elimination_offset (int from, int to)
3763 {
3764 int saved_regs_size = spu_saved_regs_size ();
3765 int sp_offset = 0;
3766 if (!crtl->is_leaf || crtl->outgoing_args_size
3767 || get_frame_size () || saved_regs_size)
3768 sp_offset = STACK_POINTER_OFFSET;
3769 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3770 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3771 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3772 return get_frame_size ();
3773 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3774 return sp_offset + crtl->outgoing_args_size
3775 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3776 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3777 return get_frame_size () + saved_regs_size + sp_offset;
3778 else
3779 gcc_unreachable ();
3780 }
3781
3782 rtx
3783 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3784 {
3785 machine_mode mode = TYPE_MODE (type);
3786 int byte_size = ((mode == BLKmode)
3787 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3788
3789 /* Make sure small structs are left justified in a register. */
3790 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3791 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3792 {
3793 machine_mode smode;
3794 rtvec v;
3795 int i;
3796 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3797 int n = byte_size / UNITS_PER_WORD;
3798 v = rtvec_alloc (nregs);
3799 for (i = 0; i < n; i++)
3800 {
3801 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3802 gen_rtx_REG (TImode,
3803 FIRST_RETURN_REGNUM
3804 + i),
3805 GEN_INT (UNITS_PER_WORD * i));
3806 byte_size -= UNITS_PER_WORD;
3807 }
3808
3809 if (n < nregs)
3810 {
3811 if (byte_size < 4)
3812 byte_size = 4;
3813 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3814 RTVEC_ELT (v, n) =
3815 gen_rtx_EXPR_LIST (VOIDmode,
3816 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3817 GEN_INT (UNITS_PER_WORD * n));
3818 }
3819 return gen_rtx_PARALLEL (mode, v);
3820 }
3821 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3822 }
3823
3824 static rtx
3825 spu_function_arg (cumulative_args_t cum_v,
3826 machine_mode mode,
3827 const_tree type, bool named ATTRIBUTE_UNUSED)
3828 {
3829 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3830 int byte_size;
3831
3832 if (*cum >= MAX_REGISTER_ARGS)
3833 return 0;
3834
3835 byte_size = ((mode == BLKmode)
3836 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3837
3838 /* The ABI does not allow parameters to be passed partially in
3839 reg and partially in stack. */
3840 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3841 return 0;
3842
3843 /* Make sure small structs are left justified in a register. */
3844 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3845 && byte_size < UNITS_PER_WORD && byte_size > 0)
3846 {
3847 machine_mode smode;
3848 rtx gr_reg;
3849 if (byte_size < 4)
3850 byte_size = 4;
3851 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3852 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3853 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3854 const0_rtx);
3855 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3856 }
3857 else
3858 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3859 }
3860
3861 static void
3862 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3863 const_tree type, bool named ATTRIBUTE_UNUSED)
3864 {
3865 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3866
3867 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3868 ? 1
3869 : mode == BLKmode
3870 ? ((int_size_in_bytes (type) + 15) / 16)
3871 : mode == VOIDmode
3872 ? 1
3873 : HARD_REGNO_NREGS (cum, mode));
3874 }
3875
3876 /* Variable sized types are passed by reference. */
3877 static bool
3878 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3879 machine_mode mode ATTRIBUTE_UNUSED,
3880 const_tree type, bool named ATTRIBUTE_UNUSED)
3881 {
3882 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3883 }
3884 \f
3885
3886 /* Var args. */
3887
3888 /* Create and return the va_list datatype.
3889
3890 On SPU, va_list is an array type equivalent to
3891
3892 typedef struct __va_list_tag
3893 {
3894 void *__args __attribute__((__aligned(16)));
3895 void *__skip __attribute__((__aligned(16)));
3896
3897 } va_list[1];
3898
3899 where __args points to the arg that will be returned by the next
3900 va_arg(), and __skip points to the previous stack frame such that
3901 when __args == __skip we should advance __args by 32 bytes. */
3902 static tree
3903 spu_build_builtin_va_list (void)
3904 {
3905 tree f_args, f_skip, record, type_decl;
3906 bool owp;
3907
3908 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3909
3910 type_decl =
3911 build_decl (BUILTINS_LOCATION,
3912 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3913
3914 f_args = build_decl (BUILTINS_LOCATION,
3915 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3916 f_skip = build_decl (BUILTINS_LOCATION,
3917 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3918
3919 DECL_FIELD_CONTEXT (f_args) = record;
3920 SET_DECL_ALIGN (f_args, 128);
3921 DECL_USER_ALIGN (f_args) = 1;
3922
3923 DECL_FIELD_CONTEXT (f_skip) = record;
3924 SET_DECL_ALIGN (f_skip, 128);
3925 DECL_USER_ALIGN (f_skip) = 1;
3926
3927 TYPE_STUB_DECL (record) = type_decl;
3928 TYPE_NAME (record) = type_decl;
3929 TYPE_FIELDS (record) = f_args;
3930 DECL_CHAIN (f_args) = f_skip;
3931
3932 /* We know this is being padded and we want it too. It is an internal
3933 type so hide the warnings from the user. */
3934 owp = warn_padded;
3935 warn_padded = false;
3936
3937 layout_type (record);
3938
3939 warn_padded = owp;
3940
3941 /* The correct type is an array type of one element. */
3942 return build_array_type (record, build_index_type (size_zero_node));
3943 }
3944
3945 /* Implement va_start by filling the va_list structure VALIST.
3946 NEXTARG points to the first anonymous stack argument.
3947
3948 The following global variables are used to initialize
3949 the va_list structure:
3950
3951 crtl->args.info;
3952 the CUMULATIVE_ARGS for this function
3953
3954 crtl->args.arg_offset_rtx:
3955 holds the offset of the first anonymous stack argument
3956 (relative to the virtual arg pointer). */
3957
3958 static void
3959 spu_va_start (tree valist, rtx nextarg)
3960 {
3961 tree f_args, f_skip;
3962 tree args, skip, t;
3963
3964 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3965 f_skip = DECL_CHAIN (f_args);
3966
3967 valist = build_simple_mem_ref (valist);
3968 args =
3969 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3970 skip =
3971 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3972
3973 /* Find the __args area. */
3974 t = make_tree (TREE_TYPE (args), nextarg);
3975 if (crtl->args.pretend_args_size > 0)
3976 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3977 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3978 TREE_SIDE_EFFECTS (t) = 1;
3979 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3980
3981 /* Find the __skip area. */
3982 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3983 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3984 - STACK_POINTER_OFFSET));
3985 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3986 TREE_SIDE_EFFECTS (t) = 1;
3987 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3988 }
3989
3990 /* Gimplify va_arg by updating the va_list structure
3991 VALIST as required to retrieve an argument of type
3992 TYPE, and returning that argument.
3993
3994 ret = va_arg(VALIST, TYPE);
3995
3996 generates code equivalent to:
3997
3998 paddedsize = (sizeof(TYPE) + 15) & -16;
3999 if (VALIST.__args + paddedsize > VALIST.__skip
4000 && VALIST.__args <= VALIST.__skip)
4001 addr = VALIST.__skip + 32;
4002 else
4003 addr = VALIST.__args;
4004 VALIST.__args = addr + paddedsize;
4005 ret = *(TYPE *)addr;
4006 */
4007 static tree
4008 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4009 gimple_seq * post_p ATTRIBUTE_UNUSED)
4010 {
4011 tree f_args, f_skip;
4012 tree args, skip;
4013 HOST_WIDE_INT size, rsize;
4014 tree addr, tmp;
4015 bool pass_by_reference_p;
4016
4017 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4018 f_skip = DECL_CHAIN (f_args);
4019
4020 args =
4021 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4022 skip =
4023 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4024
4025 addr = create_tmp_var (ptr_type_node, "va_arg");
4026
4027 /* if an object is dynamically sized, a pointer to it is passed
4028 instead of the object itself. */
4029 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4030 false);
4031 if (pass_by_reference_p)
4032 type = build_pointer_type (type);
4033 size = int_size_in_bytes (type);
4034 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4035
4036 /* build conditional expression to calculate addr. The expression
4037 will be gimplified later. */
4038 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4039 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4040 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4041 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4042 unshare_expr (skip)));
4043
4044 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4045 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4046 unshare_expr (args));
4047
4048 gimplify_assign (addr, tmp, pre_p);
4049
4050 /* update VALIST.__args */
4051 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4052 gimplify_assign (unshare_expr (args), tmp, pre_p);
4053
4054 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4055 addr);
4056
4057 if (pass_by_reference_p)
4058 addr = build_va_arg_indirect_ref (addr);
4059
4060 return build_va_arg_indirect_ref (addr);
4061 }
4062
4063 /* Save parameter registers starting with the register that corresponds
4064 to the first unnamed parameters. If the first unnamed parameter is
4065 in the stack then save no registers. Set pretend_args_size to the
4066 amount of space needed to save the registers. */
4067 static void
4068 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4069 tree type, int *pretend_size, int no_rtl)
4070 {
4071 if (!no_rtl)
4072 {
4073 rtx tmp;
4074 int regno;
4075 int offset;
4076 int ncum = *get_cumulative_args (cum);
4077
4078 /* cum currently points to the last named argument, we want to
4079 start at the next argument. */
4080 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4081
4082 offset = -STACK_POINTER_OFFSET;
4083 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4084 {
4085 tmp = gen_frame_mem (V4SImode,
4086 plus_constant (Pmode, virtual_incoming_args_rtx,
4087 offset));
4088 emit_move_insn (tmp,
4089 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4090 offset += 16;
4091 }
4092 *pretend_size = offset + STACK_POINTER_OFFSET;
4093 }
4094 }
4095 \f
4096 static void
4097 spu_conditional_register_usage (void)
4098 {
4099 if (flag_pic)
4100 {
4101 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4102 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4103 }
4104 }
4105
4106 /* This is called any time we inspect the alignment of a register for
4107 addresses. */
4108 static int
4109 reg_aligned_for_addr (rtx x)
4110 {
4111 int regno =
4112 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4113 return REGNO_POINTER_ALIGN (regno) >= 128;
4114 }
4115
4116 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4117 into its SYMBOL_REF_FLAGS. */
4118 static void
4119 spu_encode_section_info (tree decl, rtx rtl, int first)
4120 {
4121 default_encode_section_info (decl, rtl, first);
4122
4123 /* If a variable has a forced alignment to < 16 bytes, mark it with
4124 SYMBOL_FLAG_ALIGN1. */
4125 if (TREE_CODE (decl) == VAR_DECL
4126 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4127 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4128 }
4129
4130 /* Return TRUE if we are certain the mem refers to a complete object
4131 which is both 16-byte aligned and padded to a 16-byte boundary. This
4132 would make it safe to store with a single instruction.
4133 We guarantee the alignment and padding for static objects by aligning
4134 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4135 FIXME: We currently cannot guarantee this for objects on the stack
4136 because assign_parm_setup_stack calls assign_stack_local with the
4137 alignment of the parameter mode and in that case the alignment never
4138 gets adjusted by LOCAL_ALIGNMENT. */
4139 static int
4140 store_with_one_insn_p (rtx mem)
4141 {
4142 machine_mode mode = GET_MODE (mem);
4143 rtx addr = XEXP (mem, 0);
4144 if (mode == BLKmode)
4145 return 0;
4146 if (GET_MODE_SIZE (mode) >= 16)
4147 return 1;
4148 /* Only static objects. */
4149 if (GET_CODE (addr) == SYMBOL_REF)
4150 {
4151 /* We use the associated declaration to make sure the access is
4152 referring to the whole object.
4153 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4154 if it is necessary. Will there be cases where one exists, and
4155 the other does not? Will there be cases where both exist, but
4156 have different types? */
4157 tree decl = MEM_EXPR (mem);
4158 if (decl
4159 && TREE_CODE (decl) == VAR_DECL
4160 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4161 return 1;
4162 decl = SYMBOL_REF_DECL (addr);
4163 if (decl
4164 && TREE_CODE (decl) == VAR_DECL
4165 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4166 return 1;
4167 }
4168 return 0;
4169 }
4170
4171 /* Return 1 when the address is not valid for a simple load and store as
4172 required by the '_mov*' patterns. We could make this less strict
4173 for loads, but we prefer mem's to look the same so they are more
4174 likely to be merged. */
4175 static int
4176 address_needs_split (rtx mem)
4177 {
4178 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4179 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4180 || !(store_with_one_insn_p (mem)
4181 || mem_is_padded_component_ref (mem))))
4182 return 1;
4183
4184 return 0;
4185 }
4186
4187 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4188 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4189 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4190
4191 /* MEM is known to be an __ea qualified memory access. Emit a call to
4192 fetch the ppu memory to local store, and return its address in local
4193 store. */
4194
4195 static void
4196 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4197 {
4198 if (is_store)
4199 {
4200 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4201 if (!cache_fetch_dirty)
4202 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4203 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4204 2, ea_addr, EAmode, ndirty, SImode);
4205 }
4206 else
4207 {
4208 if (!cache_fetch)
4209 cache_fetch = init_one_libfunc ("__cache_fetch");
4210 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4211 1, ea_addr, EAmode);
4212 }
4213 }
4214
4215 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4216 dirty bit marking, inline.
4217
4218 The cache control data structure is an array of
4219
4220 struct __cache_tag_array
4221 {
4222 unsigned int tag_lo[4];
4223 unsigned int tag_hi[4];
4224 void *data_pointer[4];
4225 int reserved[4];
4226 vector unsigned short dirty_bits[4];
4227 } */
4228
4229 static void
4230 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4231 {
4232 rtx ea_addr_si;
4233 HOST_WIDE_INT v;
4234 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4235 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4236 rtx index_mask = gen_reg_rtx (SImode);
4237 rtx tag_arr = gen_reg_rtx (Pmode);
4238 rtx splat_mask = gen_reg_rtx (TImode);
4239 rtx splat = gen_reg_rtx (V4SImode);
4240 rtx splat_hi = NULL_RTX;
4241 rtx tag_index = gen_reg_rtx (Pmode);
4242 rtx block_off = gen_reg_rtx (SImode);
4243 rtx tag_addr = gen_reg_rtx (Pmode);
4244 rtx tag = gen_reg_rtx (V4SImode);
4245 rtx cache_tag = gen_reg_rtx (V4SImode);
4246 rtx cache_tag_hi = NULL_RTX;
4247 rtx cache_ptrs = gen_reg_rtx (TImode);
4248 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4249 rtx tag_equal = gen_reg_rtx (V4SImode);
4250 rtx tag_equal_hi = NULL_RTX;
4251 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4252 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4253 rtx eq_index = gen_reg_rtx (SImode);
4254 rtx bcomp, hit_label, hit_ref, cont_label;
4255 rtx_insn *insn;
4256
4257 if (spu_ea_model != 32)
4258 {
4259 splat_hi = gen_reg_rtx (V4SImode);
4260 cache_tag_hi = gen_reg_rtx (V4SImode);
4261 tag_equal_hi = gen_reg_rtx (V4SImode);
4262 }
4263
4264 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4265 emit_move_insn (tag_arr, tag_arr_sym);
4266 v = 0x0001020300010203LL;
4267 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4268 ea_addr_si = ea_addr;
4269 if (spu_ea_model != 32)
4270 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4271
4272 /* tag_index = ea_addr & (tag_array_size - 128) */
4273 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4274
4275 /* splat ea_addr to all 4 slots. */
4276 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4277 /* Similarly for high 32 bits of ea_addr. */
4278 if (spu_ea_model != 32)
4279 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4280
4281 /* block_off = ea_addr & 127 */
4282 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4283
4284 /* tag_addr = tag_arr + tag_index */
4285 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4286
4287 /* Read cache tags. */
4288 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4289 if (spu_ea_model != 32)
4290 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4291 plus_constant (Pmode,
4292 tag_addr, 16)));
4293
4294 /* tag = ea_addr & -128 */
4295 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4296
4297 /* Read all four cache data pointers. */
4298 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4299 plus_constant (Pmode,
4300 tag_addr, 32)));
4301
4302 /* Compare tags. */
4303 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4304 if (spu_ea_model != 32)
4305 {
4306 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4307 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4308 }
4309
4310 /* At most one of the tags compare equal, so tag_equal has one
4311 32-bit slot set to all 1's, with the other slots all zero.
4312 gbb picks off low bit from each byte in the 128-bit registers,
4313 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4314 we have a hit. */
4315 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4316 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4317
4318 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4319 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4320
4321 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4322 (rotating eq_index mod 16 bytes). */
4323 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4324 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4325
4326 /* Add block offset to form final data address. */
4327 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4328
4329 /* Check that we did hit. */
4330 hit_label = gen_label_rtx ();
4331 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4332 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4333 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4334 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4335 hit_ref, pc_rtx)));
4336 /* Say that this branch is very likely to happen. */
4337 add_reg_br_prob_note (insn, profile_probability::very_likely ());
4338
4339 ea_load_store (mem, is_store, ea_addr, data_addr);
4340 cont_label = gen_label_rtx ();
4341 emit_jump_insn (gen_jump (cont_label));
4342 emit_barrier ();
4343
4344 emit_label (hit_label);
4345
4346 if (is_store)
4347 {
4348 HOST_WIDE_INT v_hi;
4349 rtx dirty_bits = gen_reg_rtx (TImode);
4350 rtx dirty_off = gen_reg_rtx (SImode);
4351 rtx dirty_128 = gen_reg_rtx (TImode);
4352 rtx neg_block_off = gen_reg_rtx (SImode);
4353
4354 /* Set up mask with one dirty bit per byte of the mem we are
4355 writing, starting from top bit. */
4356 v_hi = v = -1;
4357 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4358 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4359 {
4360 v_hi = v;
4361 v = 0;
4362 }
4363 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4364
4365 /* Form index into cache dirty_bits. eq_index is one of
4366 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4367 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4368 offset to each of the four dirty_bits elements. */
4369 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4370
4371 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4372
4373 /* Rotate bit mask to proper bit. */
4374 emit_insn (gen_negsi2 (neg_block_off, block_off));
4375 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4376 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4377
4378 /* Or in the new dirty bits. */
4379 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4380
4381 /* Store. */
4382 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4383 }
4384
4385 emit_label (cont_label);
4386 }
4387
4388 static rtx
4389 expand_ea_mem (rtx mem, bool is_store)
4390 {
4391 rtx ea_addr;
4392 rtx data_addr = gen_reg_rtx (Pmode);
4393 rtx new_mem;
4394
4395 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4396 if (optimize_size || optimize == 0)
4397 ea_load_store (mem, is_store, ea_addr, data_addr);
4398 else
4399 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4400
4401 if (ea_alias_set == -1)
4402 ea_alias_set = new_alias_set ();
4403
4404 /* We generate a new MEM RTX to refer to the copy of the data
4405 in the cache. We do not copy memory attributes (except the
4406 alignment) from the original MEM, as they may no longer apply
4407 to the cache copy. */
4408 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4409 set_mem_alias_set (new_mem, ea_alias_set);
4410 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4411
4412 return new_mem;
4413 }
4414
4415 int
4416 spu_expand_mov (rtx * ops, machine_mode mode)
4417 {
4418 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4419 {
4420 /* Perform the move in the destination SUBREG's inner mode. */
4421 ops[0] = SUBREG_REG (ops[0]);
4422 mode = GET_MODE (ops[0]);
4423 ops[1] = gen_lowpart_common (mode, ops[1]);
4424 gcc_assert (ops[1]);
4425 }
4426
4427 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4428 {
4429 rtx from = SUBREG_REG (ops[1]);
4430 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4431
4432 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4433 && GET_MODE_CLASS (imode) == MODE_INT
4434 && subreg_lowpart_p (ops[1]));
4435
4436 if (GET_MODE_SIZE (imode) < 4)
4437 imode = SImode;
4438 if (imode != GET_MODE (from))
4439 from = gen_rtx_SUBREG (imode, from, 0);
4440
4441 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4442 {
4443 enum insn_code icode = convert_optab_handler (trunc_optab,
4444 mode, imode);
4445 emit_insn (GEN_FCN (icode) (ops[0], from));
4446 }
4447 else
4448 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4449 return 1;
4450 }
4451
4452 /* At least one of the operands needs to be a register. */
4453 if ((reload_in_progress | reload_completed) == 0
4454 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4455 {
4456 rtx temp = force_reg (mode, ops[1]);
4457 emit_move_insn (ops[0], temp);
4458 return 1;
4459 }
4460 if (reload_in_progress || reload_completed)
4461 {
4462 if (CONSTANT_P (ops[1]))
4463 return spu_split_immediate (ops);
4464 return 0;
4465 }
4466
4467 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4468 extend them. */
4469 if (GET_CODE (ops[1]) == CONST_INT)
4470 {
4471 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4472 if (val != INTVAL (ops[1]))
4473 {
4474 emit_move_insn (ops[0], GEN_INT (val));
4475 return 1;
4476 }
4477 }
4478 if (MEM_P (ops[0]))
4479 {
4480 if (MEM_ADDR_SPACE (ops[0]))
4481 ops[0] = expand_ea_mem (ops[0], true);
4482 return spu_split_store (ops);
4483 }
4484 if (MEM_P (ops[1]))
4485 {
4486 if (MEM_ADDR_SPACE (ops[1]))
4487 ops[1] = expand_ea_mem (ops[1], false);
4488 return spu_split_load (ops);
4489 }
4490
4491 return 0;
4492 }
4493
4494 static void
4495 spu_convert_move (rtx dst, rtx src)
4496 {
4497 machine_mode mode = GET_MODE (dst);
4498 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4499 rtx reg;
4500 gcc_assert (GET_MODE (src) == TImode);
4501 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4502 emit_insn (gen_rtx_SET (reg,
4503 gen_rtx_TRUNCATE (int_mode,
4504 gen_rtx_LSHIFTRT (TImode, src,
4505 GEN_INT (int_mode == DImode ? 64 : 96)))));
4506 if (int_mode != mode)
4507 {
4508 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4509 emit_move_insn (dst, reg);
4510 }
4511 }
4512
4513 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4514 the address from SRC and SRC+16. Return a REG or CONST_INT that
4515 specifies how many bytes to rotate the loaded registers, plus any
4516 extra from EXTRA_ROTQBY. The address and rotate amounts are
4517 normalized to improve merging of loads and rotate computations. */
4518 static rtx
4519 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4520 {
4521 rtx addr = XEXP (src, 0);
4522 rtx p0, p1, rot, addr0, addr1;
4523 int rot_amt;
4524
4525 rot = 0;
4526 rot_amt = 0;
4527
4528 if (MEM_ALIGN (src) >= 128)
4529 /* Address is already aligned; simply perform a TImode load. */ ;
4530 else if (GET_CODE (addr) == PLUS)
4531 {
4532 /* 8 cases:
4533 aligned reg + aligned reg => lqx
4534 aligned reg + unaligned reg => lqx, rotqby
4535 aligned reg + aligned const => lqd
4536 aligned reg + unaligned const => lqd, rotqbyi
4537 unaligned reg + aligned reg => lqx, rotqby
4538 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4539 unaligned reg + aligned const => lqd, rotqby
4540 unaligned reg + unaligned const -> not allowed by legitimate address
4541 */
4542 p0 = XEXP (addr, 0);
4543 p1 = XEXP (addr, 1);
4544 if (!reg_aligned_for_addr (p0))
4545 {
4546 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4547 {
4548 rot = gen_reg_rtx (SImode);
4549 emit_insn (gen_addsi3 (rot, p0, p1));
4550 }
4551 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4552 {
4553 if (INTVAL (p1) > 0
4554 && REG_POINTER (p0)
4555 && INTVAL (p1) * BITS_PER_UNIT
4556 < REGNO_POINTER_ALIGN (REGNO (p0)))
4557 {
4558 rot = gen_reg_rtx (SImode);
4559 emit_insn (gen_addsi3 (rot, p0, p1));
4560 addr = p0;
4561 }
4562 else
4563 {
4564 rtx x = gen_reg_rtx (SImode);
4565 emit_move_insn (x, p1);
4566 if (!spu_arith_operand (p1, SImode))
4567 p1 = x;
4568 rot = gen_reg_rtx (SImode);
4569 emit_insn (gen_addsi3 (rot, p0, p1));
4570 addr = gen_rtx_PLUS (Pmode, p0, x);
4571 }
4572 }
4573 else
4574 rot = p0;
4575 }
4576 else
4577 {
4578 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4579 {
4580 rot_amt = INTVAL (p1) & 15;
4581 if (INTVAL (p1) & -16)
4582 {
4583 p1 = GEN_INT (INTVAL (p1) & -16);
4584 addr = gen_rtx_PLUS (SImode, p0, p1);
4585 }
4586 else
4587 addr = p0;
4588 }
4589 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4590 rot = p1;
4591 }
4592 }
4593 else if (REG_P (addr))
4594 {
4595 if (!reg_aligned_for_addr (addr))
4596 rot = addr;
4597 }
4598 else if (GET_CODE (addr) == CONST)
4599 {
4600 if (GET_CODE (XEXP (addr, 0)) == PLUS
4601 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4602 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4603 {
4604 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4605 if (rot_amt & -16)
4606 addr = gen_rtx_CONST (Pmode,
4607 gen_rtx_PLUS (Pmode,
4608 XEXP (XEXP (addr, 0), 0),
4609 GEN_INT (rot_amt & -16)));
4610 else
4611 addr = XEXP (XEXP (addr, 0), 0);
4612 }
4613 else
4614 {
4615 rot = gen_reg_rtx (Pmode);
4616 emit_move_insn (rot, addr);
4617 }
4618 }
4619 else if (GET_CODE (addr) == CONST_INT)
4620 {
4621 rot_amt = INTVAL (addr);
4622 addr = GEN_INT (rot_amt & -16);
4623 }
4624 else if (!ALIGNED_SYMBOL_REF_P (addr))
4625 {
4626 rot = gen_reg_rtx (Pmode);
4627 emit_move_insn (rot, addr);
4628 }
4629
4630 rot_amt += extra_rotby;
4631
4632 rot_amt &= 15;
4633
4634 if (rot && rot_amt)
4635 {
4636 rtx x = gen_reg_rtx (SImode);
4637 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4638 rot = x;
4639 rot_amt = 0;
4640 }
4641 if (!rot && rot_amt)
4642 rot = GEN_INT (rot_amt);
4643
4644 addr0 = copy_rtx (addr);
4645 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4646 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4647
4648 if (dst1)
4649 {
4650 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4651 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4652 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4653 }
4654
4655 return rot;
4656 }
4657
4658 int
4659 spu_split_load (rtx * ops)
4660 {
4661 machine_mode mode = GET_MODE (ops[0]);
4662 rtx addr, load, rot;
4663 int rot_amt;
4664
4665 if (GET_MODE_SIZE (mode) >= 16)
4666 return 0;
4667
4668 addr = XEXP (ops[1], 0);
4669 gcc_assert (GET_CODE (addr) != AND);
4670
4671 if (!address_needs_split (ops[1]))
4672 {
4673 ops[1] = change_address (ops[1], TImode, addr);
4674 load = gen_reg_rtx (TImode);
4675 emit_insn (gen__movti (load, ops[1]));
4676 spu_convert_move (ops[0], load);
4677 return 1;
4678 }
4679
4680 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4681
4682 load = gen_reg_rtx (TImode);
4683 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4684
4685 if (rot)
4686 emit_insn (gen_rotqby_ti (load, load, rot));
4687
4688 spu_convert_move (ops[0], load);
4689 return 1;
4690 }
4691
4692 int
4693 spu_split_store (rtx * ops)
4694 {
4695 machine_mode mode = GET_MODE (ops[0]);
4696 rtx reg;
4697 rtx addr, p0, p1, p1_lo, smem;
4698 int aform;
4699 int scalar;
4700
4701 if (GET_MODE_SIZE (mode) >= 16)
4702 return 0;
4703
4704 addr = XEXP (ops[0], 0);
4705 gcc_assert (GET_CODE (addr) != AND);
4706
4707 if (!address_needs_split (ops[0]))
4708 {
4709 reg = gen_reg_rtx (TImode);
4710 emit_insn (gen_spu_convert (reg, ops[1]));
4711 ops[0] = change_address (ops[0], TImode, addr);
4712 emit_move_insn (ops[0], reg);
4713 return 1;
4714 }
4715
4716 if (GET_CODE (addr) == PLUS)
4717 {
4718 /* 8 cases:
4719 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4720 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4721 aligned reg + aligned const => lqd, c?d, shuf, stqx
4722 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4723 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4724 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4725 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4726 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4727 */
4728 aform = 0;
4729 p0 = XEXP (addr, 0);
4730 p1 = p1_lo = XEXP (addr, 1);
4731 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4732 {
4733 p1_lo = GEN_INT (INTVAL (p1) & 15);
4734 if (reg_aligned_for_addr (p0))
4735 {
4736 p1 = GEN_INT (INTVAL (p1) & -16);
4737 if (p1 == const0_rtx)
4738 addr = p0;
4739 else
4740 addr = gen_rtx_PLUS (SImode, p0, p1);
4741 }
4742 else
4743 {
4744 rtx x = gen_reg_rtx (SImode);
4745 emit_move_insn (x, p1);
4746 addr = gen_rtx_PLUS (SImode, p0, x);
4747 }
4748 }
4749 }
4750 else if (REG_P (addr))
4751 {
4752 aform = 0;
4753 p0 = addr;
4754 p1 = p1_lo = const0_rtx;
4755 }
4756 else
4757 {
4758 aform = 1;
4759 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4760 p1 = 0; /* aform doesn't use p1 */
4761 p1_lo = addr;
4762 if (ALIGNED_SYMBOL_REF_P (addr))
4763 p1_lo = const0_rtx;
4764 else if (GET_CODE (addr) == CONST
4765 && GET_CODE (XEXP (addr, 0)) == PLUS
4766 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4767 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4768 {
4769 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4770 if ((v & -16) != 0)
4771 addr = gen_rtx_CONST (Pmode,
4772 gen_rtx_PLUS (Pmode,
4773 XEXP (XEXP (addr, 0), 0),
4774 GEN_INT (v & -16)));
4775 else
4776 addr = XEXP (XEXP (addr, 0), 0);
4777 p1_lo = GEN_INT (v & 15);
4778 }
4779 else if (GET_CODE (addr) == CONST_INT)
4780 {
4781 p1_lo = GEN_INT (INTVAL (addr) & 15);
4782 addr = GEN_INT (INTVAL (addr) & -16);
4783 }
4784 else
4785 {
4786 p1_lo = gen_reg_rtx (SImode);
4787 emit_move_insn (p1_lo, addr);
4788 }
4789 }
4790
4791 gcc_assert (aform == 0 || aform == 1);
4792 reg = gen_reg_rtx (TImode);
4793
4794 scalar = store_with_one_insn_p (ops[0]);
4795 if (!scalar)
4796 {
4797 /* We could copy the flags from the ops[0] MEM to mem here,
4798 We don't because we want this load to be optimized away if
4799 possible, and copying the flags will prevent that in certain
4800 cases, e.g. consider the volatile flag. */
4801
4802 rtx pat = gen_reg_rtx (TImode);
4803 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4804 set_mem_alias_set (lmem, 0);
4805 emit_insn (gen_movti (reg, lmem));
4806
4807 if (!p0 || reg_aligned_for_addr (p0))
4808 p0 = stack_pointer_rtx;
4809 if (!p1_lo)
4810 p1_lo = const0_rtx;
4811
4812 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4813 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4814 }
4815 else
4816 {
4817 if (GET_CODE (ops[1]) == REG)
4818 emit_insn (gen_spu_convert (reg, ops[1]));
4819 else if (GET_CODE (ops[1]) == SUBREG)
4820 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4821 else
4822 abort ();
4823 }
4824
4825 if (GET_MODE_SIZE (mode) < 4 && scalar)
4826 emit_insn (gen_ashlti3
4827 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4828
4829 smem = change_address (ops[0], TImode, copy_rtx (addr));
4830 /* We can't use the previous alias set because the memory has changed
4831 size and can potentially overlap objects of other types. */
4832 set_mem_alias_set (smem, 0);
4833
4834 emit_insn (gen_movti (smem, reg));
4835 return 1;
4836 }
4837
4838 /* Return TRUE if X is MEM which is a struct member reference
4839 and the member can safely be loaded and stored with a single
4840 instruction because it is padded. */
4841 static int
4842 mem_is_padded_component_ref (rtx x)
4843 {
4844 tree t = MEM_EXPR (x);
4845 tree r;
4846 if (!t || TREE_CODE (t) != COMPONENT_REF)
4847 return 0;
4848 t = TREE_OPERAND (t, 1);
4849 if (!t || TREE_CODE (t) != FIELD_DECL
4850 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4851 return 0;
4852 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4853 r = DECL_FIELD_CONTEXT (t);
4854 if (!r || TREE_CODE (r) != RECORD_TYPE)
4855 return 0;
4856 /* Make sure they are the same mode */
4857 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4858 return 0;
4859 /* If there are no following fields then the field alignment assures
4860 the structure is padded to the alignment which means this field is
4861 padded too. */
4862 if (TREE_CHAIN (t) == 0)
4863 return 1;
4864 /* If the following field is also aligned then this field will be
4865 padded. */
4866 t = TREE_CHAIN (t);
4867 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4868 return 1;
4869 return 0;
4870 }
4871
4872 /* Parse the -mfixed-range= option string. */
4873 static void
4874 fix_range (const char *const_str)
4875 {
4876 int i, first, last;
4877 char *str, *dash, *comma;
4878
4879 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4880 REG2 are either register names or register numbers. The effect
4881 of this option is to mark the registers in the range from REG1 to
4882 REG2 as ``fixed'' so they won't be used by the compiler. */
4883
4884 i = strlen (const_str);
4885 str = (char *) alloca (i + 1);
4886 memcpy (str, const_str, i + 1);
4887
4888 while (1)
4889 {
4890 dash = strchr (str, '-');
4891 if (!dash)
4892 {
4893 warning (0, "value of -mfixed-range must have form REG1-REG2");
4894 return;
4895 }
4896 *dash = '\0';
4897 comma = strchr (dash + 1, ',');
4898 if (comma)
4899 *comma = '\0';
4900
4901 first = decode_reg_name (str);
4902 if (first < 0)
4903 {
4904 warning (0, "unknown register name: %s", str);
4905 return;
4906 }
4907
4908 last = decode_reg_name (dash + 1);
4909 if (last < 0)
4910 {
4911 warning (0, "unknown register name: %s", dash + 1);
4912 return;
4913 }
4914
4915 *dash = '-';
4916
4917 if (first > last)
4918 {
4919 warning (0, "%s-%s is an empty range", str, dash + 1);
4920 return;
4921 }
4922
4923 for (i = first; i <= last; ++i)
4924 fixed_regs[i] = call_used_regs[i] = 1;
4925
4926 if (!comma)
4927 break;
4928
4929 *comma = ',';
4930 str = comma + 1;
4931 }
4932 }
4933
4934 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4935 can be generated using the fsmbi instruction. */
4936 int
4937 fsmbi_const_p (rtx x)
4938 {
4939 if (CONSTANT_P (x))
4940 {
4941 /* We can always choose TImode for CONST_INT because the high bits
4942 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4943 enum immediate_class c = classify_immediate (x, TImode);
4944 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4945 }
4946 return 0;
4947 }
4948
4949 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4950 can be generated using the cbd, chd, cwd or cdd instruction. */
4951 int
4952 cpat_const_p (rtx x, machine_mode mode)
4953 {
4954 if (CONSTANT_P (x))
4955 {
4956 enum immediate_class c = classify_immediate (x, mode);
4957 return c == IC_CPAT;
4958 }
4959 return 0;
4960 }
4961
4962 rtx
4963 gen_cpat_const (rtx * ops)
4964 {
4965 unsigned char dst[16];
4966 int i, offset, shift, isize;
4967 if (GET_CODE (ops[3]) != CONST_INT
4968 || GET_CODE (ops[2]) != CONST_INT
4969 || (GET_CODE (ops[1]) != CONST_INT
4970 && GET_CODE (ops[1]) != REG))
4971 return 0;
4972 if (GET_CODE (ops[1]) == REG
4973 && (!REG_POINTER (ops[1])
4974 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4975 return 0;
4976
4977 for (i = 0; i < 16; i++)
4978 dst[i] = i + 16;
4979 isize = INTVAL (ops[3]);
4980 if (isize == 1)
4981 shift = 3;
4982 else if (isize == 2)
4983 shift = 2;
4984 else
4985 shift = 0;
4986 offset = (INTVAL (ops[2]) +
4987 (GET_CODE (ops[1]) ==
4988 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4989 for (i = 0; i < isize; i++)
4990 dst[offset + i] = i + shift;
4991 return array_to_constant (TImode, dst);
4992 }
4993
4994 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4995 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4996 than 16 bytes, the value is repeated across the rest of the array. */
4997 void
4998 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
4999 {
5000 HOST_WIDE_INT val;
5001 int i, j, first;
5002
5003 memset (arr, 0, 16);
5004 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5005 if (GET_CODE (x) == CONST_INT
5006 || (GET_CODE (x) == CONST_DOUBLE
5007 && (mode == SFmode || mode == DFmode)))
5008 {
5009 gcc_assert (mode != VOIDmode && mode != BLKmode);
5010
5011 if (GET_CODE (x) == CONST_DOUBLE)
5012 val = const_double_to_hwint (x);
5013 else
5014 val = INTVAL (x);
5015 first = GET_MODE_SIZE (mode) - 1;
5016 for (i = first; i >= 0; i--)
5017 {
5018 arr[i] = val & 0xff;
5019 val >>= 8;
5020 }
5021 /* Splat the constant across the whole array. */
5022 for (j = 0, i = first + 1; i < 16; i++)
5023 {
5024 arr[i] = arr[j];
5025 j = (j == first) ? 0 : j + 1;
5026 }
5027 }
5028 else if (GET_CODE (x) == CONST_DOUBLE)
5029 {
5030 val = CONST_DOUBLE_LOW (x);
5031 for (i = 15; i >= 8; i--)
5032 {
5033 arr[i] = val & 0xff;
5034 val >>= 8;
5035 }
5036 val = CONST_DOUBLE_HIGH (x);
5037 for (i = 7; i >= 0; i--)
5038 {
5039 arr[i] = val & 0xff;
5040 val >>= 8;
5041 }
5042 }
5043 else if (GET_CODE (x) == CONST_VECTOR)
5044 {
5045 int units;
5046 rtx elt;
5047 mode = GET_MODE_INNER (mode);
5048 units = CONST_VECTOR_NUNITS (x);
5049 for (i = 0; i < units; i++)
5050 {
5051 elt = CONST_VECTOR_ELT (x, i);
5052 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5053 {
5054 if (GET_CODE (elt) == CONST_DOUBLE)
5055 val = const_double_to_hwint (elt);
5056 else
5057 val = INTVAL (elt);
5058 first = GET_MODE_SIZE (mode) - 1;
5059 if (first + i * GET_MODE_SIZE (mode) > 16)
5060 abort ();
5061 for (j = first; j >= 0; j--)
5062 {
5063 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5064 val >>= 8;
5065 }
5066 }
5067 }
5068 }
5069 else
5070 gcc_unreachable();
5071 }
5072
5073 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5074 smaller than 16 bytes, use the bytes that would represent that value
5075 in a register, e.g., for QImode return the value of arr[3]. */
5076 rtx
5077 array_to_constant (machine_mode mode, const unsigned char arr[16])
5078 {
5079 machine_mode inner_mode;
5080 rtvec v;
5081 int units, size, i, j, k;
5082 HOST_WIDE_INT val;
5083
5084 if (GET_MODE_CLASS (mode) == MODE_INT
5085 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5086 {
5087 j = GET_MODE_SIZE (mode);
5088 i = j < 4 ? 4 - j : 0;
5089 for (val = 0; i < j; i++)
5090 val = (val << 8) | arr[i];
5091 val = trunc_int_for_mode (val, mode);
5092 return GEN_INT (val);
5093 }
5094
5095 if (mode == TImode)
5096 {
5097 HOST_WIDE_INT high;
5098 for (i = high = 0; i < 8; i++)
5099 high = (high << 8) | arr[i];
5100 for (i = 8, val = 0; i < 16; i++)
5101 val = (val << 8) | arr[i];
5102 return immed_double_const (val, high, TImode);
5103 }
5104 if (mode == SFmode)
5105 {
5106 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5107 val = trunc_int_for_mode (val, SImode);
5108 return hwint_to_const_double (SFmode, val);
5109 }
5110 if (mode == DFmode)
5111 {
5112 for (i = 0, val = 0; i < 8; i++)
5113 val = (val << 8) | arr[i];
5114 return hwint_to_const_double (DFmode, val);
5115 }
5116
5117 if (!VECTOR_MODE_P (mode))
5118 abort ();
5119
5120 units = GET_MODE_NUNITS (mode);
5121 size = GET_MODE_UNIT_SIZE (mode);
5122 inner_mode = GET_MODE_INNER (mode);
5123 v = rtvec_alloc (units);
5124
5125 for (k = i = 0; i < units; ++i)
5126 {
5127 val = 0;
5128 for (j = 0; j < size; j++, k++)
5129 val = (val << 8) | arr[k];
5130
5131 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5132 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5133 else
5134 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5135 }
5136 if (k > 16)
5137 abort ();
5138
5139 return gen_rtx_CONST_VECTOR (mode, v);
5140 }
5141
5142 static void
5143 reloc_diagnostic (rtx x)
5144 {
5145 tree decl = 0;
5146 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5147 return;
5148
5149 if (GET_CODE (x) == SYMBOL_REF)
5150 decl = SYMBOL_REF_DECL (x);
5151 else if (GET_CODE (x) == CONST
5152 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5153 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5154
5155 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5156 if (decl && !DECL_P (decl))
5157 decl = 0;
5158
5159 /* The decl could be a string constant. */
5160 if (decl && DECL_P (decl))
5161 {
5162 location_t loc;
5163 /* We use last_assemble_variable_decl to get line information. It's
5164 not always going to be right and might not even be close, but will
5165 be right for the more common cases. */
5166 if (!last_assemble_variable_decl || in_section == ctors_section)
5167 loc = DECL_SOURCE_LOCATION (decl);
5168 else
5169 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5170
5171 if (TARGET_WARN_RELOC)
5172 warning_at (loc, 0,
5173 "creating run-time relocation for %qD", decl);
5174 else
5175 error_at (loc,
5176 "creating run-time relocation for %qD", decl);
5177 }
5178 else
5179 {
5180 if (TARGET_WARN_RELOC)
5181 warning_at (input_location, 0, "creating run-time relocation");
5182 else
5183 error_at (input_location, "creating run-time relocation");
5184 }
5185 }
5186
5187 /* Hook into assemble_integer so we can generate an error for run-time
5188 relocations. The SPU ABI disallows them. */
5189 static bool
5190 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5191 {
5192 /* By default run-time relocations aren't supported, but we allow them
5193 in case users support it in their own run-time loader. And we provide
5194 a warning for those users that don't. */
5195 if ((GET_CODE (x) == SYMBOL_REF)
5196 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5197 reloc_diagnostic (x);
5198
5199 return default_assemble_integer (x, size, aligned_p);
5200 }
5201
5202 static void
5203 spu_asm_globalize_label (FILE * file, const char *name)
5204 {
5205 fputs ("\t.global\t", file);
5206 assemble_name (file, name);
5207 fputs ("\n", file);
5208 }
5209
5210 static bool
5211 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5212 int opno ATTRIBUTE_UNUSED, int *total,
5213 bool speed ATTRIBUTE_UNUSED)
5214 {
5215 int code = GET_CODE (x);
5216 int cost = COSTS_N_INSNS (2);
5217
5218 /* Folding to a CONST_VECTOR will use extra space but there might
5219 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5220 only if it allows us to fold away multiple insns. Changing the cost
5221 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5222 because this cost will only be compared against a single insn.
5223 if (code == CONST_VECTOR)
5224 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5225 */
5226
5227 /* Use defaults for float operations. Not accurate but good enough. */
5228 if (mode == DFmode)
5229 {
5230 *total = COSTS_N_INSNS (13);
5231 return true;
5232 }
5233 if (mode == SFmode)
5234 {
5235 *total = COSTS_N_INSNS (6);
5236 return true;
5237 }
5238 switch (code)
5239 {
5240 case CONST_INT:
5241 if (satisfies_constraint_K (x))
5242 *total = 0;
5243 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5244 *total = COSTS_N_INSNS (1);
5245 else
5246 *total = COSTS_N_INSNS (3);
5247 return true;
5248
5249 case CONST:
5250 *total = COSTS_N_INSNS (3);
5251 return true;
5252
5253 case LABEL_REF:
5254 case SYMBOL_REF:
5255 *total = COSTS_N_INSNS (0);
5256 return true;
5257
5258 case CONST_DOUBLE:
5259 *total = COSTS_N_INSNS (5);
5260 return true;
5261
5262 case FLOAT_EXTEND:
5263 case FLOAT_TRUNCATE:
5264 case FLOAT:
5265 case UNSIGNED_FLOAT:
5266 case FIX:
5267 case UNSIGNED_FIX:
5268 *total = COSTS_N_INSNS (7);
5269 return true;
5270
5271 case PLUS:
5272 if (mode == TImode)
5273 {
5274 *total = COSTS_N_INSNS (9);
5275 return true;
5276 }
5277 break;
5278
5279 case MULT:
5280 cost =
5281 GET_CODE (XEXP (x, 0)) ==
5282 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5283 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5284 {
5285 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5286 {
5287 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5288 cost = COSTS_N_INSNS (14);
5289 if ((val & 0xffff) == 0)
5290 cost = COSTS_N_INSNS (9);
5291 else if (val > 0 && val < 0x10000)
5292 cost = COSTS_N_INSNS (11);
5293 }
5294 }
5295 *total = cost;
5296 return true;
5297 case DIV:
5298 case UDIV:
5299 case MOD:
5300 case UMOD:
5301 *total = COSTS_N_INSNS (20);
5302 return true;
5303 case ROTATE:
5304 case ROTATERT:
5305 case ASHIFT:
5306 case ASHIFTRT:
5307 case LSHIFTRT:
5308 *total = COSTS_N_INSNS (4);
5309 return true;
5310 case UNSPEC:
5311 if (XINT (x, 1) == UNSPEC_CONVERT)
5312 *total = COSTS_N_INSNS (0);
5313 else
5314 *total = COSTS_N_INSNS (4);
5315 return true;
5316 }
5317 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5318 if (GET_MODE_CLASS (mode) == MODE_INT
5319 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5320 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5321 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5322 *total = cost;
5323 return true;
5324 }
5325
5326 static machine_mode
5327 spu_unwind_word_mode (void)
5328 {
5329 return SImode;
5330 }
5331
5332 /* Decide whether we can make a sibling call to a function. DECL is the
5333 declaration of the function being targeted by the call and EXP is the
5334 CALL_EXPR representing the call. */
5335 static bool
5336 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5337 {
5338 return decl && !TARGET_LARGE_MEM;
5339 }
5340
5341 /* We need to correctly update the back chain pointer and the Available
5342 Stack Size (which is in the second slot of the sp register.) */
5343 void
5344 spu_allocate_stack (rtx op0, rtx op1)
5345 {
5346 HOST_WIDE_INT v;
5347 rtx chain = gen_reg_rtx (V4SImode);
5348 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5349 rtx sp = gen_reg_rtx (V4SImode);
5350 rtx splatted = gen_reg_rtx (V4SImode);
5351 rtx pat = gen_reg_rtx (TImode);
5352
5353 /* copy the back chain so we can save it back again. */
5354 emit_move_insn (chain, stack_bot);
5355
5356 op1 = force_reg (SImode, op1);
5357
5358 v = 0x1020300010203ll;
5359 emit_move_insn (pat, immed_double_const (v, v, TImode));
5360 emit_insn (gen_shufb (splatted, op1, op1, pat));
5361
5362 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5363 emit_insn (gen_subv4si3 (sp, sp, splatted));
5364
5365 if (flag_stack_check)
5366 {
5367 rtx avail = gen_reg_rtx(SImode);
5368 rtx result = gen_reg_rtx(SImode);
5369 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5370 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5371 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5372 }
5373
5374 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5375
5376 emit_move_insn (stack_bot, chain);
5377
5378 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5379 }
5380
5381 void
5382 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5383 {
5384 static unsigned char arr[16] =
5385 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5386 rtx temp = gen_reg_rtx (SImode);
5387 rtx temp2 = gen_reg_rtx (SImode);
5388 rtx temp3 = gen_reg_rtx (V4SImode);
5389 rtx temp4 = gen_reg_rtx (V4SImode);
5390 rtx pat = gen_reg_rtx (TImode);
5391 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5392
5393 /* Restore the backchain from the first word, sp from the second. */
5394 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5395 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5396
5397 emit_move_insn (pat, array_to_constant (TImode, arr));
5398
5399 /* Compute Available Stack Size for sp */
5400 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5401 emit_insn (gen_shufb (temp3, temp, temp, pat));
5402
5403 /* Compute Available Stack Size for back chain */
5404 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5405 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5406 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5407
5408 emit_insn (gen_addv4si3 (sp, sp, temp3));
5409 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5410 }
5411
5412 static void
5413 spu_init_libfuncs (void)
5414 {
5415 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5416 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5417 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5418 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5419 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5420 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5421 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5422 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5423 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5424 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5425 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5426 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5427
5428 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5429 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5430
5431 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5432 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5433 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5434 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5435 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5436 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5437 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5438 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5439 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5440 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5441 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5442 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5443
5444 set_optab_libfunc (smul_optab, TImode, "__multi3");
5445 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5446 set_optab_libfunc (smod_optab, TImode, "__modti3");
5447 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5448 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5449 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5450 }
5451
5452 /* Make a subreg, stripping any existing subreg. We could possibly just
5453 call simplify_subreg, but in this case we know what we want. */
5454 rtx
5455 spu_gen_subreg (machine_mode mode, rtx x)
5456 {
5457 if (GET_CODE (x) == SUBREG)
5458 x = SUBREG_REG (x);
5459 if (GET_MODE (x) == mode)
5460 return x;
5461 return gen_rtx_SUBREG (mode, x, 0);
5462 }
5463
5464 static bool
5465 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5466 {
5467 return (TYPE_MODE (type) == BLKmode
5468 && ((type) == 0
5469 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5470 || int_size_in_bytes (type) >
5471 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5472 }
5473 \f
5474 /* Create the built-in types and functions */
5475
5476 enum spu_function_code
5477 {
5478 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5479 #include "spu-builtins.def"
5480 #undef DEF_BUILTIN
5481 NUM_SPU_BUILTINS
5482 };
5483
5484 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5485
5486 struct spu_builtin_description spu_builtins[] = {
5487 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5488 {fcode, icode, name, type, params},
5489 #include "spu-builtins.def"
5490 #undef DEF_BUILTIN
5491 };
5492
5493 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5494
5495 /* Returns the spu builtin decl for CODE. */
5496
5497 static tree
5498 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5499 {
5500 if (code >= NUM_SPU_BUILTINS)
5501 return error_mark_node;
5502
5503 return spu_builtin_decls[code];
5504 }
5505
5506
5507 static void
5508 spu_init_builtins (void)
5509 {
5510 struct spu_builtin_description *d;
5511 unsigned int i;
5512
5513 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5514 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5515 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5516 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5517 V4SF_type_node = build_vector_type (float_type_node, 4);
5518 V2DF_type_node = build_vector_type (double_type_node, 2);
5519
5520 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5521 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5522 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5523 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5524
5525 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5526
5527 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5528 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5529 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5530 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5531 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5532 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5533 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5534 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5535 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5536 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5537 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5538 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5539
5540 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5541 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5542 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5543 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5544 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5545 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5546 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5547 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5548
5549 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5550 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5551
5552 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5553
5554 spu_builtin_types[SPU_BTI_PTR] =
5555 build_pointer_type (build_qualified_type
5556 (void_type_node,
5557 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5558
5559 /* For each builtin we build a new prototype. The tree code will make
5560 sure nodes are shared. */
5561 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5562 {
5563 tree p;
5564 char name[64]; /* build_function will make a copy. */
5565 int parm;
5566
5567 if (d->name == 0)
5568 continue;
5569
5570 /* Find last parm. */
5571 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5572 ;
5573
5574 p = void_list_node;
5575 while (parm > 1)
5576 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5577
5578 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5579
5580 sprintf (name, "__builtin_%s", d->name);
5581 spu_builtin_decls[i] =
5582 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5583 if (d->fcode == SPU_MASK_FOR_LOAD)
5584 TREE_READONLY (spu_builtin_decls[i]) = 1;
5585
5586 /* These builtins don't throw. */
5587 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5588 }
5589 }
5590
5591 void
5592 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5593 {
5594 static unsigned char arr[16] =
5595 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5596
5597 rtx temp = gen_reg_rtx (Pmode);
5598 rtx temp2 = gen_reg_rtx (V4SImode);
5599 rtx temp3 = gen_reg_rtx (V4SImode);
5600 rtx pat = gen_reg_rtx (TImode);
5601 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5602
5603 emit_move_insn (pat, array_to_constant (TImode, arr));
5604
5605 /* Restore the sp. */
5606 emit_move_insn (temp, op1);
5607 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5608
5609 /* Compute available stack size for sp. */
5610 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5611 emit_insn (gen_shufb (temp3, temp, temp, pat));
5612
5613 emit_insn (gen_addv4si3 (sp, sp, temp3));
5614 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5615 }
5616
5617 int
5618 spu_safe_dma (HOST_WIDE_INT channel)
5619 {
5620 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5621 }
5622
5623 void
5624 spu_builtin_splats (rtx ops[])
5625 {
5626 machine_mode mode = GET_MODE (ops[0]);
5627 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5628 {
5629 unsigned char arr[16];
5630 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5631 emit_move_insn (ops[0], array_to_constant (mode, arr));
5632 }
5633 else
5634 {
5635 rtx reg = gen_reg_rtx (TImode);
5636 rtx shuf;
5637 if (GET_CODE (ops[1]) != REG
5638 && GET_CODE (ops[1]) != SUBREG)
5639 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5640 switch (mode)
5641 {
5642 case E_V2DImode:
5643 case E_V2DFmode:
5644 shuf =
5645 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5646 TImode);
5647 break;
5648 case E_V4SImode:
5649 case E_V4SFmode:
5650 shuf =
5651 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5652 TImode);
5653 break;
5654 case E_V8HImode:
5655 shuf =
5656 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5657 TImode);
5658 break;
5659 case E_V16QImode:
5660 shuf =
5661 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5662 TImode);
5663 break;
5664 default:
5665 abort ();
5666 }
5667 emit_move_insn (reg, shuf);
5668 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5669 }
5670 }
5671
5672 void
5673 spu_builtin_extract (rtx ops[])
5674 {
5675 machine_mode mode;
5676 rtx rot, from, tmp;
5677
5678 mode = GET_MODE (ops[1]);
5679
5680 if (GET_CODE (ops[2]) == CONST_INT)
5681 {
5682 switch (mode)
5683 {
5684 case E_V16QImode:
5685 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5686 break;
5687 case E_V8HImode:
5688 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5689 break;
5690 case E_V4SFmode:
5691 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5692 break;
5693 case E_V4SImode:
5694 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5695 break;
5696 case E_V2DImode:
5697 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5698 break;
5699 case E_V2DFmode:
5700 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5701 break;
5702 default:
5703 abort ();
5704 }
5705 return;
5706 }
5707
5708 from = spu_gen_subreg (TImode, ops[1]);
5709 rot = gen_reg_rtx (TImode);
5710 tmp = gen_reg_rtx (SImode);
5711
5712 switch (mode)
5713 {
5714 case E_V16QImode:
5715 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5716 break;
5717 case E_V8HImode:
5718 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5719 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5720 break;
5721 case E_V4SFmode:
5722 case E_V4SImode:
5723 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5724 break;
5725 case E_V2DImode:
5726 case E_V2DFmode:
5727 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5728 break;
5729 default:
5730 abort ();
5731 }
5732 emit_insn (gen_rotqby_ti (rot, from, tmp));
5733
5734 emit_insn (gen_spu_convert (ops[0], rot));
5735 }
5736
5737 void
5738 spu_builtin_insert (rtx ops[])
5739 {
5740 machine_mode mode = GET_MODE (ops[0]);
5741 machine_mode imode = GET_MODE_INNER (mode);
5742 rtx mask = gen_reg_rtx (TImode);
5743 rtx offset;
5744
5745 if (GET_CODE (ops[3]) == CONST_INT)
5746 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5747 else
5748 {
5749 offset = gen_reg_rtx (SImode);
5750 emit_insn (gen_mulsi3
5751 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5752 }
5753 emit_insn (gen_cpat
5754 (mask, stack_pointer_rtx, offset,
5755 GEN_INT (GET_MODE_SIZE (imode))));
5756 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5757 }
5758
5759 void
5760 spu_builtin_promote (rtx ops[])
5761 {
5762 machine_mode mode, imode;
5763 rtx rot, from, offset;
5764 HOST_WIDE_INT pos;
5765
5766 mode = GET_MODE (ops[0]);
5767 imode = GET_MODE_INNER (mode);
5768
5769 from = gen_reg_rtx (TImode);
5770 rot = spu_gen_subreg (TImode, ops[0]);
5771
5772 emit_insn (gen_spu_convert (from, ops[1]));
5773
5774 if (GET_CODE (ops[2]) == CONST_INT)
5775 {
5776 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5777 if (GET_MODE_SIZE (imode) < 4)
5778 pos += 4 - GET_MODE_SIZE (imode);
5779 offset = GEN_INT (pos & 15);
5780 }
5781 else
5782 {
5783 offset = gen_reg_rtx (SImode);
5784 switch (mode)
5785 {
5786 case E_V16QImode:
5787 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5788 break;
5789 case E_V8HImode:
5790 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5791 emit_insn (gen_addsi3 (offset, offset, offset));
5792 break;
5793 case E_V4SFmode:
5794 case E_V4SImode:
5795 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5796 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5797 break;
5798 case E_V2DImode:
5799 case E_V2DFmode:
5800 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5801 break;
5802 default:
5803 abort ();
5804 }
5805 }
5806 emit_insn (gen_rotqby_ti (rot, from, offset));
5807 }
5808
5809 static void
5810 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5811 {
5812 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5813 rtx shuf = gen_reg_rtx (V4SImode);
5814 rtx insn = gen_reg_rtx (V4SImode);
5815 rtx shufc;
5816 rtx insnc;
5817 rtx mem;
5818
5819 fnaddr = force_reg (SImode, fnaddr);
5820 cxt = force_reg (SImode, cxt);
5821
5822 if (TARGET_LARGE_MEM)
5823 {
5824 rtx rotl = gen_reg_rtx (V4SImode);
5825 rtx mask = gen_reg_rtx (V4SImode);
5826 rtx bi = gen_reg_rtx (SImode);
5827 static unsigned char const shufa[16] = {
5828 2, 3, 0, 1, 18, 19, 16, 17,
5829 0, 1, 2, 3, 16, 17, 18, 19
5830 };
5831 static unsigned char const insna[16] = {
5832 0x41, 0, 0, 79,
5833 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5834 0x60, 0x80, 0, 79,
5835 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5836 };
5837
5838 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5839 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5840
5841 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5842 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5843 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5844 emit_insn (gen_selb (insn, insnc, rotl, mask));
5845
5846 mem = adjust_address (m_tramp, V4SImode, 0);
5847 emit_move_insn (mem, insn);
5848
5849 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5850 mem = adjust_address (m_tramp, Pmode, 16);
5851 emit_move_insn (mem, bi);
5852 }
5853 else
5854 {
5855 rtx scxt = gen_reg_rtx (SImode);
5856 rtx sfnaddr = gen_reg_rtx (SImode);
5857 static unsigned char const insna[16] = {
5858 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5859 0x30, 0, 0, 0,
5860 0, 0, 0, 0,
5861 0, 0, 0, 0
5862 };
5863
5864 shufc = gen_reg_rtx (TImode);
5865 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5866
5867 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5868 fits 18 bits and the last 4 are zeros. This will be true if
5869 the stack pointer is initialized to 0x3fff0 at program start,
5870 otherwise the ila instruction will be garbage. */
5871
5872 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5873 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5874 emit_insn (gen_cpat
5875 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5876 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5877 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5878
5879 mem = adjust_address (m_tramp, V4SImode, 0);
5880 emit_move_insn (mem, insn);
5881 }
5882 emit_insn (gen_sync ());
5883 }
5884
5885 static bool
5886 spu_warn_func_return (tree decl)
5887 {
5888 /* Naked functions are implemented entirely in assembly, including the
5889 return sequence, so suppress warnings about this. */
5890 return !spu_naked_function_p (decl);
5891 }
5892
5893 void
5894 spu_expand_sign_extend (rtx ops[])
5895 {
5896 unsigned char arr[16];
5897 rtx pat = gen_reg_rtx (TImode);
5898 rtx sign, c;
5899 int i, last;
5900 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5901 if (GET_MODE (ops[1]) == QImode)
5902 {
5903 sign = gen_reg_rtx (HImode);
5904 emit_insn (gen_extendqihi2 (sign, ops[1]));
5905 for (i = 0; i < 16; i++)
5906 arr[i] = 0x12;
5907 arr[last] = 0x13;
5908 }
5909 else
5910 {
5911 for (i = 0; i < 16; i++)
5912 arr[i] = 0x10;
5913 switch (GET_MODE (ops[1]))
5914 {
5915 case E_HImode:
5916 sign = gen_reg_rtx (SImode);
5917 emit_insn (gen_extendhisi2 (sign, ops[1]));
5918 arr[last] = 0x03;
5919 arr[last - 1] = 0x02;
5920 break;
5921 case E_SImode:
5922 sign = gen_reg_rtx (SImode);
5923 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5924 for (i = 0; i < 4; i++)
5925 arr[last - i] = 3 - i;
5926 break;
5927 case E_DImode:
5928 sign = gen_reg_rtx (SImode);
5929 c = gen_reg_rtx (SImode);
5930 emit_insn (gen_spu_convert (c, ops[1]));
5931 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5932 for (i = 0; i < 8; i++)
5933 arr[last - i] = 7 - i;
5934 break;
5935 default:
5936 abort ();
5937 }
5938 }
5939 emit_move_insn (pat, array_to_constant (TImode, arr));
5940 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5941 }
5942
5943 /* expand vector initialization. If there are any constant parts,
5944 load constant parts first. Then load any non-constant parts. */
5945 void
5946 spu_expand_vector_init (rtx target, rtx vals)
5947 {
5948 machine_mode mode = GET_MODE (target);
5949 int n_elts = GET_MODE_NUNITS (mode);
5950 int n_var = 0;
5951 bool all_same = true;
5952 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5953 int i;
5954
5955 first = XVECEXP (vals, 0, 0);
5956 for (i = 0; i < n_elts; ++i)
5957 {
5958 x = XVECEXP (vals, 0, i);
5959 if (!(CONST_INT_P (x)
5960 || GET_CODE (x) == CONST_DOUBLE
5961 || GET_CODE (x) == CONST_FIXED))
5962 ++n_var;
5963 else
5964 {
5965 if (first_constant == NULL_RTX)
5966 first_constant = x;
5967 }
5968 if (i > 0 && !rtx_equal_p (x, first))
5969 all_same = false;
5970 }
5971
5972 /* if all elements are the same, use splats to repeat elements */
5973 if (all_same)
5974 {
5975 if (!CONSTANT_P (first)
5976 && !register_operand (first, GET_MODE (x)))
5977 first = force_reg (GET_MODE (first), first);
5978 emit_insn (gen_spu_splats (target, first));
5979 return;
5980 }
5981
5982 /* load constant parts */
5983 if (n_var != n_elts)
5984 {
5985 if (n_var == 0)
5986 {
5987 emit_move_insn (target,
5988 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5989 }
5990 else
5991 {
5992 rtx constant_parts_rtx = copy_rtx (vals);
5993
5994 gcc_assert (first_constant != NULL_RTX);
5995 /* fill empty slots with the first constant, this increases
5996 our chance of using splats in the recursive call below. */
5997 for (i = 0; i < n_elts; ++i)
5998 {
5999 x = XVECEXP (constant_parts_rtx, 0, i);
6000 if (!(CONST_INT_P (x)
6001 || GET_CODE (x) == CONST_DOUBLE
6002 || GET_CODE (x) == CONST_FIXED))
6003 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6004 }
6005
6006 spu_expand_vector_init (target, constant_parts_rtx);
6007 }
6008 }
6009
6010 /* load variable parts */
6011 if (n_var != 0)
6012 {
6013 rtx insert_operands[4];
6014
6015 insert_operands[0] = target;
6016 insert_operands[2] = target;
6017 for (i = 0; i < n_elts; ++i)
6018 {
6019 x = XVECEXP (vals, 0, i);
6020 if (!(CONST_INT_P (x)
6021 || GET_CODE (x) == CONST_DOUBLE
6022 || GET_CODE (x) == CONST_FIXED))
6023 {
6024 if (!register_operand (x, GET_MODE (x)))
6025 x = force_reg (GET_MODE (x), x);
6026 insert_operands[1] = x;
6027 insert_operands[3] = GEN_INT (i);
6028 spu_builtin_insert (insert_operands);
6029 }
6030 }
6031 }
6032 }
6033
6034 /* Return insn index for the vector compare instruction for given CODE,
6035 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6036
6037 static int
6038 get_vec_cmp_insn (enum rtx_code code,
6039 machine_mode dest_mode,
6040 machine_mode op_mode)
6041
6042 {
6043 switch (code)
6044 {
6045 case EQ:
6046 if (dest_mode == V16QImode && op_mode == V16QImode)
6047 return CODE_FOR_ceq_v16qi;
6048 if (dest_mode == V8HImode && op_mode == V8HImode)
6049 return CODE_FOR_ceq_v8hi;
6050 if (dest_mode == V4SImode && op_mode == V4SImode)
6051 return CODE_FOR_ceq_v4si;
6052 if (dest_mode == V4SImode && op_mode == V4SFmode)
6053 return CODE_FOR_ceq_v4sf;
6054 if (dest_mode == V2DImode && op_mode == V2DFmode)
6055 return CODE_FOR_ceq_v2df;
6056 break;
6057 case GT:
6058 if (dest_mode == V16QImode && op_mode == V16QImode)
6059 return CODE_FOR_cgt_v16qi;
6060 if (dest_mode == V8HImode && op_mode == V8HImode)
6061 return CODE_FOR_cgt_v8hi;
6062 if (dest_mode == V4SImode && op_mode == V4SImode)
6063 return CODE_FOR_cgt_v4si;
6064 if (dest_mode == V4SImode && op_mode == V4SFmode)
6065 return CODE_FOR_cgt_v4sf;
6066 if (dest_mode == V2DImode && op_mode == V2DFmode)
6067 return CODE_FOR_cgt_v2df;
6068 break;
6069 case GTU:
6070 if (dest_mode == V16QImode && op_mode == V16QImode)
6071 return CODE_FOR_clgt_v16qi;
6072 if (dest_mode == V8HImode && op_mode == V8HImode)
6073 return CODE_FOR_clgt_v8hi;
6074 if (dest_mode == V4SImode && op_mode == V4SImode)
6075 return CODE_FOR_clgt_v4si;
6076 break;
6077 default:
6078 break;
6079 }
6080 return -1;
6081 }
6082
6083 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6084 DMODE is expected destination mode. This is a recursive function. */
6085
6086 static rtx
6087 spu_emit_vector_compare (enum rtx_code rcode,
6088 rtx op0, rtx op1,
6089 machine_mode dmode)
6090 {
6091 int vec_cmp_insn;
6092 rtx mask;
6093 machine_mode dest_mode;
6094 machine_mode op_mode = GET_MODE (op1);
6095
6096 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6097
6098 /* Floating point vector compare instructions uses destination V4SImode.
6099 Double floating point vector compare instructions uses destination V2DImode.
6100 Move destination to appropriate mode later. */
6101 if (dmode == V4SFmode)
6102 dest_mode = V4SImode;
6103 else if (dmode == V2DFmode)
6104 dest_mode = V2DImode;
6105 else
6106 dest_mode = dmode;
6107
6108 mask = gen_reg_rtx (dest_mode);
6109 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6110
6111 if (vec_cmp_insn == -1)
6112 {
6113 bool swap_operands = false;
6114 bool try_again = false;
6115 switch (rcode)
6116 {
6117 case LT:
6118 rcode = GT;
6119 swap_operands = true;
6120 try_again = true;
6121 break;
6122 case LTU:
6123 rcode = GTU;
6124 swap_operands = true;
6125 try_again = true;
6126 break;
6127 case NE:
6128 case UNEQ:
6129 case UNLE:
6130 case UNLT:
6131 case UNGE:
6132 case UNGT:
6133 case UNORDERED:
6134 /* Treat A != B as ~(A==B). */
6135 {
6136 enum rtx_code rev_code;
6137 enum insn_code nor_code;
6138 rtx rev_mask;
6139
6140 rev_code = reverse_condition_maybe_unordered (rcode);
6141 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6142
6143 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6144 gcc_assert (nor_code != CODE_FOR_nothing);
6145 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6146 if (dmode != dest_mode)
6147 {
6148 rtx temp = gen_reg_rtx (dest_mode);
6149 convert_move (temp, mask, 0);
6150 return temp;
6151 }
6152 return mask;
6153 }
6154 break;
6155 case GE:
6156 case GEU:
6157 case LE:
6158 case LEU:
6159 /* Try GT/GTU/LT/LTU OR EQ */
6160 {
6161 rtx c_rtx, eq_rtx;
6162 enum insn_code ior_code;
6163 enum rtx_code new_code;
6164
6165 switch (rcode)
6166 {
6167 case GE: new_code = GT; break;
6168 case GEU: new_code = GTU; break;
6169 case LE: new_code = LT; break;
6170 case LEU: new_code = LTU; break;
6171 default:
6172 gcc_unreachable ();
6173 }
6174
6175 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6176 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6177
6178 ior_code = optab_handler (ior_optab, dest_mode);
6179 gcc_assert (ior_code != CODE_FOR_nothing);
6180 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6181 if (dmode != dest_mode)
6182 {
6183 rtx temp = gen_reg_rtx (dest_mode);
6184 convert_move (temp, mask, 0);
6185 return temp;
6186 }
6187 return mask;
6188 }
6189 break;
6190 case LTGT:
6191 /* Try LT OR GT */
6192 {
6193 rtx lt_rtx, gt_rtx;
6194 enum insn_code ior_code;
6195
6196 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6197 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6198
6199 ior_code = optab_handler (ior_optab, dest_mode);
6200 gcc_assert (ior_code != CODE_FOR_nothing);
6201 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6202 if (dmode != dest_mode)
6203 {
6204 rtx temp = gen_reg_rtx (dest_mode);
6205 convert_move (temp, mask, 0);
6206 return temp;
6207 }
6208 return mask;
6209 }
6210 break;
6211 case ORDERED:
6212 /* Implement as (A==A) & (B==B) */
6213 {
6214 rtx a_rtx, b_rtx;
6215 enum insn_code and_code;
6216
6217 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6218 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6219
6220 and_code = optab_handler (and_optab, dest_mode);
6221 gcc_assert (and_code != CODE_FOR_nothing);
6222 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6223 if (dmode != dest_mode)
6224 {
6225 rtx temp = gen_reg_rtx (dest_mode);
6226 convert_move (temp, mask, 0);
6227 return temp;
6228 }
6229 return mask;
6230 }
6231 break;
6232 default:
6233 gcc_unreachable ();
6234 }
6235
6236 /* You only get two chances. */
6237 if (try_again)
6238 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6239
6240 gcc_assert (vec_cmp_insn != -1);
6241
6242 if (swap_operands)
6243 {
6244 rtx tmp;
6245 tmp = op0;
6246 op0 = op1;
6247 op1 = tmp;
6248 }
6249 }
6250
6251 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6252 if (dmode != dest_mode)
6253 {
6254 rtx temp = gen_reg_rtx (dest_mode);
6255 convert_move (temp, mask, 0);
6256 return temp;
6257 }
6258 return mask;
6259 }
6260
6261
6262 /* Emit vector conditional expression.
6263 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6264 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6265
6266 int
6267 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6268 rtx cond, rtx cc_op0, rtx cc_op1)
6269 {
6270 machine_mode dest_mode = GET_MODE (dest);
6271 enum rtx_code rcode = GET_CODE (cond);
6272 rtx mask;
6273
6274 /* Get the vector mask for the given relational operations. */
6275 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6276
6277 emit_insn(gen_selb (dest, op2, op1, mask));
6278
6279 return 1;
6280 }
6281
6282 static rtx
6283 spu_force_reg (machine_mode mode, rtx op)
6284 {
6285 rtx x, r;
6286 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6287 {
6288 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6289 || GET_MODE (op) == BLKmode)
6290 return force_reg (mode, convert_to_mode (mode, op, 0));
6291 abort ();
6292 }
6293
6294 r = force_reg (GET_MODE (op), op);
6295 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6296 {
6297 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6298 if (x)
6299 return x;
6300 }
6301
6302 x = gen_reg_rtx (mode);
6303 emit_insn (gen_spu_convert (x, r));
6304 return x;
6305 }
6306
6307 static void
6308 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6309 {
6310 HOST_WIDE_INT v = 0;
6311 int lsbits;
6312 /* Check the range of immediate operands. */
6313 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6314 {
6315 int range = p - SPU_BTI_7;
6316
6317 if (!CONSTANT_P (op))
6318 error ("%s expects an integer literal in the range [%d, %d]",
6319 d->name,
6320 spu_builtin_range[range].low, spu_builtin_range[range].high);
6321
6322 if (GET_CODE (op) == CONST
6323 && (GET_CODE (XEXP (op, 0)) == PLUS
6324 || GET_CODE (XEXP (op, 0)) == MINUS))
6325 {
6326 v = INTVAL (XEXP (XEXP (op, 0), 1));
6327 op = XEXP (XEXP (op, 0), 0);
6328 }
6329 else if (GET_CODE (op) == CONST_INT)
6330 v = INTVAL (op);
6331 else if (GET_CODE (op) == CONST_VECTOR
6332 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6333 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6334
6335 /* The default for v is 0 which is valid in every range. */
6336 if (v < spu_builtin_range[range].low
6337 || v > spu_builtin_range[range].high)
6338 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6339 d->name,
6340 spu_builtin_range[range].low, spu_builtin_range[range].high,
6341 v);
6342
6343 switch (p)
6344 {
6345 case SPU_BTI_S10_4:
6346 lsbits = 4;
6347 break;
6348 case SPU_BTI_U16_2:
6349 /* This is only used in lqa, and stqa. Even though the insns
6350 encode 16 bits of the address (all but the 2 least
6351 significant), only 14 bits are used because it is masked to
6352 be 16 byte aligned. */
6353 lsbits = 4;
6354 break;
6355 case SPU_BTI_S16_2:
6356 /* This is used for lqr and stqr. */
6357 lsbits = 2;
6358 break;
6359 default:
6360 lsbits = 0;
6361 }
6362
6363 if (GET_CODE (op) == LABEL_REF
6364 || (GET_CODE (op) == SYMBOL_REF
6365 && SYMBOL_REF_FUNCTION_P (op))
6366 || (v & ((1 << lsbits) - 1)) != 0)
6367 warning (0, "%d least significant bits of %s are ignored", lsbits,
6368 d->name);
6369 }
6370 }
6371
6372
6373 static int
6374 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6375 rtx target, rtx ops[])
6376 {
6377 enum insn_code icode = (enum insn_code) d->icode;
6378 int i = 0, a;
6379
6380 /* Expand the arguments into rtl. */
6381
6382 if (d->parm[0] != SPU_BTI_VOID)
6383 ops[i++] = target;
6384
6385 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6386 {
6387 tree arg = CALL_EXPR_ARG (exp, a);
6388 if (arg == 0)
6389 abort ();
6390 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6391 }
6392
6393 gcc_assert (i == insn_data[icode].n_generator_args);
6394 return i;
6395 }
6396
6397 static rtx
6398 spu_expand_builtin_1 (struct spu_builtin_description *d,
6399 tree exp, rtx target)
6400 {
6401 rtx pat;
6402 rtx ops[8];
6403 enum insn_code icode = (enum insn_code) d->icode;
6404 machine_mode mode, tmode;
6405 int i, p;
6406 int n_operands;
6407 tree return_type;
6408
6409 /* Set up ops[] with values from arglist. */
6410 n_operands = expand_builtin_args (d, exp, target, ops);
6411
6412 /* Handle the target operand which must be operand 0. */
6413 i = 0;
6414 if (d->parm[0] != SPU_BTI_VOID)
6415 {
6416
6417 /* We prefer the mode specified for the match_operand otherwise
6418 use the mode from the builtin function prototype. */
6419 tmode = insn_data[d->icode].operand[0].mode;
6420 if (tmode == VOIDmode)
6421 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6422
6423 /* Try to use target because not using it can lead to extra copies
6424 and when we are using all of the registers extra copies leads
6425 to extra spills. */
6426 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6427 ops[0] = target;
6428 else
6429 target = ops[0] = gen_reg_rtx (tmode);
6430
6431 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6432 abort ();
6433
6434 i++;
6435 }
6436
6437 if (d->fcode == SPU_MASK_FOR_LOAD)
6438 {
6439 machine_mode mode = insn_data[icode].operand[1].mode;
6440 tree arg;
6441 rtx addr, op, pat;
6442
6443 /* get addr */
6444 arg = CALL_EXPR_ARG (exp, 0);
6445 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6446 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6447 addr = memory_address (mode, op);
6448
6449 /* negate addr */
6450 op = gen_reg_rtx (GET_MODE (addr));
6451 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6452 op = gen_rtx_MEM (mode, op);
6453
6454 pat = GEN_FCN (icode) (target, op);
6455 if (!pat)
6456 return 0;
6457 emit_insn (pat);
6458 return target;
6459 }
6460
6461 /* Ignore align_hint, but still expand it's args in case they have
6462 side effects. */
6463 if (icode == CODE_FOR_spu_align_hint)
6464 return 0;
6465
6466 /* Handle the rest of the operands. */
6467 for (p = 1; i < n_operands; i++, p++)
6468 {
6469 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6470 mode = insn_data[d->icode].operand[i].mode;
6471 else
6472 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6473
6474 /* mode can be VOIDmode here for labels */
6475
6476 /* For specific intrinsics with an immediate operand, e.g.,
6477 si_ai(), we sometimes need to convert the scalar argument to a
6478 vector argument by splatting the scalar. */
6479 if (VECTOR_MODE_P (mode)
6480 && (GET_CODE (ops[i]) == CONST_INT
6481 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6482 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6483 {
6484 if (GET_CODE (ops[i]) == CONST_INT)
6485 ops[i] = spu_const (mode, INTVAL (ops[i]));
6486 else
6487 {
6488 rtx reg = gen_reg_rtx (mode);
6489 machine_mode imode = GET_MODE_INNER (mode);
6490 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6491 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6492 if (imode != GET_MODE (ops[i]))
6493 ops[i] = convert_to_mode (imode, ops[i],
6494 TYPE_UNSIGNED (spu_builtin_types
6495 [d->parm[i]]));
6496 emit_insn (gen_spu_splats (reg, ops[i]));
6497 ops[i] = reg;
6498 }
6499 }
6500
6501 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6502
6503 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6504 ops[i] = spu_force_reg (mode, ops[i]);
6505 }
6506
6507 switch (n_operands)
6508 {
6509 case 0:
6510 pat = GEN_FCN (icode) (0);
6511 break;
6512 case 1:
6513 pat = GEN_FCN (icode) (ops[0]);
6514 break;
6515 case 2:
6516 pat = GEN_FCN (icode) (ops[0], ops[1]);
6517 break;
6518 case 3:
6519 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6520 break;
6521 case 4:
6522 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6523 break;
6524 case 5:
6525 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6526 break;
6527 case 6:
6528 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6529 break;
6530 default:
6531 abort ();
6532 }
6533
6534 if (!pat)
6535 abort ();
6536
6537 if (d->type == B_CALL || d->type == B_BISLED)
6538 emit_call_insn (pat);
6539 else if (d->type == B_JUMP)
6540 {
6541 emit_jump_insn (pat);
6542 emit_barrier ();
6543 }
6544 else
6545 emit_insn (pat);
6546
6547 return_type = spu_builtin_types[d->parm[0]];
6548 if (d->parm[0] != SPU_BTI_VOID
6549 && GET_MODE (target) != TYPE_MODE (return_type))
6550 {
6551 /* target is the return value. It should always be the mode of
6552 the builtin function prototype. */
6553 target = spu_force_reg (TYPE_MODE (return_type), target);
6554 }
6555
6556 return target;
6557 }
6558
6559 rtx
6560 spu_expand_builtin (tree exp,
6561 rtx target,
6562 rtx subtarget ATTRIBUTE_UNUSED,
6563 machine_mode mode ATTRIBUTE_UNUSED,
6564 int ignore ATTRIBUTE_UNUSED)
6565 {
6566 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6567 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6568 struct spu_builtin_description *d;
6569
6570 if (fcode < NUM_SPU_BUILTINS)
6571 {
6572 d = &spu_builtins[fcode];
6573
6574 return spu_expand_builtin_1 (d, exp, target);
6575 }
6576 abort ();
6577 }
6578
6579 /* Implement targetm.vectorize.builtin_mask_for_load. */
6580 static tree
6581 spu_builtin_mask_for_load (void)
6582 {
6583 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6584 }
6585
6586 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6587 static int
6588 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6589 tree vectype,
6590 int misalign ATTRIBUTE_UNUSED)
6591 {
6592 unsigned elements;
6593
6594 switch (type_of_cost)
6595 {
6596 case scalar_stmt:
6597 case vector_stmt:
6598 case vector_load:
6599 case vector_store:
6600 case vec_to_scalar:
6601 case scalar_to_vec:
6602 case cond_branch_not_taken:
6603 case vec_perm:
6604 case vec_promote_demote:
6605 return 1;
6606
6607 case scalar_store:
6608 return 10;
6609
6610 case scalar_load:
6611 /* Load + rotate. */
6612 return 2;
6613
6614 case unaligned_load:
6615 return 2;
6616
6617 case cond_branch_taken:
6618 return 6;
6619
6620 case vec_construct:
6621 elements = TYPE_VECTOR_SUBPARTS (vectype);
6622 return elements / 2 + 1;
6623
6624 default:
6625 gcc_unreachable ();
6626 }
6627 }
6628
6629 /* Implement targetm.vectorize.init_cost. */
6630
6631 static void *
6632 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6633 {
6634 unsigned *cost = XNEWVEC (unsigned, 3);
6635 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6636 return cost;
6637 }
6638
6639 /* Implement targetm.vectorize.add_stmt_cost. */
6640
6641 static unsigned
6642 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6643 struct _stmt_vec_info *stmt_info, int misalign,
6644 enum vect_cost_model_location where)
6645 {
6646 unsigned *cost = (unsigned *) data;
6647 unsigned retval = 0;
6648
6649 if (flag_vect_cost_model)
6650 {
6651 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6652 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6653
6654 /* Statements in an inner loop relative to the loop being
6655 vectorized are weighted more heavily. The value here is
6656 arbitrary and could potentially be improved with analysis. */
6657 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6658 count *= 50; /* FIXME. */
6659
6660 retval = (unsigned) (count * stmt_cost);
6661 cost[where] += retval;
6662 }
6663
6664 return retval;
6665 }
6666
6667 /* Implement targetm.vectorize.finish_cost. */
6668
6669 static void
6670 spu_finish_cost (void *data, unsigned *prologue_cost,
6671 unsigned *body_cost, unsigned *epilogue_cost)
6672 {
6673 unsigned *cost = (unsigned *) data;
6674 *prologue_cost = cost[vect_prologue];
6675 *body_cost = cost[vect_body];
6676 *epilogue_cost = cost[vect_epilogue];
6677 }
6678
6679 /* Implement targetm.vectorize.destroy_cost_data. */
6680
6681 static void
6682 spu_destroy_cost_data (void *data)
6683 {
6684 free (data);
6685 }
6686
6687 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6688 after applying N number of iterations. This routine does not determine
6689 how may iterations are required to reach desired alignment. */
6690
6691 static bool
6692 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6693 {
6694 if (is_packed)
6695 return false;
6696
6697 /* All other types are naturally aligned. */
6698 return true;
6699 }
6700
6701 /* Return the appropriate mode for a named address pointer. */
6702 static machine_mode
6703 spu_addr_space_pointer_mode (addr_space_t addrspace)
6704 {
6705 switch (addrspace)
6706 {
6707 case ADDR_SPACE_GENERIC:
6708 return ptr_mode;
6709 case ADDR_SPACE_EA:
6710 return EAmode;
6711 default:
6712 gcc_unreachable ();
6713 }
6714 }
6715
6716 /* Return the appropriate mode for a named address address. */
6717 static machine_mode
6718 spu_addr_space_address_mode (addr_space_t addrspace)
6719 {
6720 switch (addrspace)
6721 {
6722 case ADDR_SPACE_GENERIC:
6723 return Pmode;
6724 case ADDR_SPACE_EA:
6725 return EAmode;
6726 default:
6727 gcc_unreachable ();
6728 }
6729 }
6730
6731 /* Determine if one named address space is a subset of another. */
6732
6733 static bool
6734 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6735 {
6736 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6737 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6738
6739 if (subset == superset)
6740 return true;
6741
6742 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6743 being subsets but instead as disjoint address spaces. */
6744 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6745 return false;
6746
6747 else
6748 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6749 }
6750
6751 /* Convert from one address space to another. */
6752 static rtx
6753 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6754 {
6755 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6756 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6757
6758 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6759 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6760
6761 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6762 {
6763 rtx result, ls;
6764
6765 ls = gen_const_mem (DImode,
6766 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6767 set_mem_align (ls, 128);
6768
6769 result = gen_reg_rtx (Pmode);
6770 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6771 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6772 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6773 ls, const0_rtx, Pmode, 1);
6774
6775 emit_insn (gen_subsi3 (result, op, ls));
6776
6777 return result;
6778 }
6779
6780 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6781 {
6782 rtx result, ls;
6783
6784 ls = gen_const_mem (DImode,
6785 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6786 set_mem_align (ls, 128);
6787
6788 result = gen_reg_rtx (EAmode);
6789 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6790 op = force_reg (Pmode, op);
6791 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6792 ls, const0_rtx, EAmode, 1);
6793 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6794
6795 if (EAmode == SImode)
6796 emit_insn (gen_addsi3 (result, op, ls));
6797 else
6798 emit_insn (gen_adddi3 (result, op, ls));
6799
6800 return result;
6801 }
6802
6803 else
6804 gcc_unreachable ();
6805 }
6806
6807
6808 /* Count the total number of instructions in each pipe and return the
6809 maximum, which is used as the Minimum Iteration Interval (MII)
6810 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6811 -2 are instructions that can go in pipe0 or pipe1. */
6812 static int
6813 spu_sms_res_mii (struct ddg *g)
6814 {
6815 int i;
6816 unsigned t[4] = {0, 0, 0, 0};
6817
6818 for (i = 0; i < g->num_nodes; i++)
6819 {
6820 rtx_insn *insn = g->nodes[i].insn;
6821 int p = get_pipe (insn) + 2;
6822
6823 gcc_assert (p >= 0);
6824 gcc_assert (p < 4);
6825
6826 t[p]++;
6827 if (dump_file && INSN_P (insn))
6828 fprintf (dump_file, "i%d %s %d %d\n",
6829 INSN_UID (insn),
6830 insn_data[INSN_CODE(insn)].name,
6831 p, t[p]);
6832 }
6833 if (dump_file)
6834 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6835
6836 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6837 }
6838
6839
6840 void
6841 spu_init_expanders (void)
6842 {
6843 if (cfun)
6844 {
6845 rtx r0, r1;
6846 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6847 frame_pointer_needed is true. We don't know that until we're
6848 expanding the prologue. */
6849 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6850
6851 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6852 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6853 to be treated as aligned, so generate them here. */
6854 r0 = gen_reg_rtx (SImode);
6855 r1 = gen_reg_rtx (SImode);
6856 mark_reg_pointer (r0, 128);
6857 mark_reg_pointer (r1, 128);
6858 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6859 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6860 }
6861 }
6862
6863 static machine_mode
6864 spu_libgcc_cmp_return_mode (void)
6865 {
6866
6867 /* For SPU word mode is TI mode so it is better to use SImode
6868 for compare returns. */
6869 return SImode;
6870 }
6871
6872 static machine_mode
6873 spu_libgcc_shift_count_mode (void)
6874 {
6875 /* For SPU word mode is TI mode so it is better to use SImode
6876 for shift counts. */
6877 return SImode;
6878 }
6879
6880 /* Implement targetm.section_type_flags. */
6881 static unsigned int
6882 spu_section_type_flags (tree decl, const char *name, int reloc)
6883 {
6884 /* .toe needs to have type @nobits. */
6885 if (strcmp (name, ".toe") == 0)
6886 return SECTION_BSS;
6887 /* Don't load _ea into the current address space. */
6888 if (strcmp (name, "._ea") == 0)
6889 return SECTION_WRITE | SECTION_DEBUG;
6890 return default_section_type_flags (decl, name, reloc);
6891 }
6892
6893 /* Implement targetm.select_section. */
6894 static section *
6895 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6896 {
6897 /* Variables and constants defined in the __ea address space
6898 go into a special section named "._ea". */
6899 if (TREE_TYPE (decl) != error_mark_node
6900 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6901 {
6902 /* We might get called with string constants, but get_named_section
6903 doesn't like them as they are not DECLs. Also, we need to set
6904 flags in that case. */
6905 if (!DECL_P (decl))
6906 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6907
6908 return get_named_section (decl, "._ea", reloc);
6909 }
6910
6911 return default_elf_select_section (decl, reloc, align);
6912 }
6913
6914 /* Implement targetm.unique_section. */
6915 static void
6916 spu_unique_section (tree decl, int reloc)
6917 {
6918 /* We don't support unique section names in the __ea address
6919 space for now. */
6920 if (TREE_TYPE (decl) != error_mark_node
6921 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6922 return;
6923
6924 default_unique_section (decl, reloc);
6925 }
6926
6927 /* Generate a constant or register which contains 2^SCALE. We assume
6928 the result is valid for MODE. Currently, MODE must be V4SFmode and
6929 SCALE must be SImode. */
6930 rtx
6931 spu_gen_exp2 (machine_mode mode, rtx scale)
6932 {
6933 gcc_assert (mode == V4SFmode);
6934 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6935 if (GET_CODE (scale) != CONST_INT)
6936 {
6937 /* unsigned int exp = (127 + scale) << 23;
6938 __vector float m = (__vector float) spu_splats (exp); */
6939 rtx reg = force_reg (SImode, scale);
6940 rtx exp = gen_reg_rtx (SImode);
6941 rtx mul = gen_reg_rtx (mode);
6942 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6943 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6944 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6945 return mul;
6946 }
6947 else
6948 {
6949 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6950 unsigned char arr[16];
6951 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6952 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6953 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6954 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6955 return array_to_constant (mode, arr);
6956 }
6957 }
6958
6959 /* After reload, just change the convert into a move instruction
6960 or a dead instruction. */
6961 void
6962 spu_split_convert (rtx ops[])
6963 {
6964 if (REGNO (ops[0]) == REGNO (ops[1]))
6965 emit_note (NOTE_INSN_DELETED);
6966 else
6967 {
6968 /* Use TImode always as this might help hard reg copyprop. */
6969 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6970 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6971 emit_insn (gen_move_insn (op0, op1));
6972 }
6973 }
6974
6975 void
6976 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6977 {
6978 fprintf (file, "# profile\n");
6979 fprintf (file, "brsl $75, _mcount\n");
6980 }
6981
6982 /* Implement targetm.ref_may_alias_errno. */
6983 static bool
6984 spu_ref_may_alias_errno (ao_ref *ref)
6985 {
6986 tree base = ao_ref_base (ref);
6987
6988 /* With SPU newlib, errno is defined as something like
6989 _impure_data._errno
6990 The default implementation of this target macro does not
6991 recognize such expressions, so special-code for it here. */
6992
6993 if (TREE_CODE (base) == VAR_DECL
6994 && !TREE_STATIC (base)
6995 && DECL_EXTERNAL (base)
6996 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
6997 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
6998 "_impure_data") == 0
6999 /* _errno is the first member of _impure_data. */
7000 && ref->offset == 0)
7001 return true;
7002
7003 return default_ref_may_alias_errno (ref);
7004 }
7005
7006 /* Output thunk to FILE that implements a C++ virtual function call (with
7007 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7008 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7009 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7010 relative to the resulting this pointer. */
7011
7012 static void
7013 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7014 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7015 tree function)
7016 {
7017 rtx op[8];
7018
7019 /* Make sure unwind info is emitted for the thunk if needed. */
7020 final_start_function (emit_barrier (), file, 1);
7021
7022 /* Operand 0 is the target function. */
7023 op[0] = XEXP (DECL_RTL (function), 0);
7024
7025 /* Operand 1 is the 'this' pointer. */
7026 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7027 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7028 else
7029 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7030
7031 /* Operands 2/3 are the low/high halfwords of delta. */
7032 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7033 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7034
7035 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7036 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7037 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7038
7039 /* Operands 6/7 are temporary registers. */
7040 op[6] = gen_rtx_REG (Pmode, 79);
7041 op[7] = gen_rtx_REG (Pmode, 78);
7042
7043 /* Add DELTA to this pointer. */
7044 if (delta)
7045 {
7046 if (delta >= -0x200 && delta < 0x200)
7047 output_asm_insn ("ai\t%1,%1,%2", op);
7048 else if (delta >= -0x8000 && delta < 0x8000)
7049 {
7050 output_asm_insn ("il\t%6,%2", op);
7051 output_asm_insn ("a\t%1,%1,%6", op);
7052 }
7053 else
7054 {
7055 output_asm_insn ("ilhu\t%6,%3", op);
7056 output_asm_insn ("iohl\t%6,%2", op);
7057 output_asm_insn ("a\t%1,%1,%6", op);
7058 }
7059 }
7060
7061 /* Perform vcall adjustment. */
7062 if (vcall_offset)
7063 {
7064 output_asm_insn ("lqd\t%7,0(%1)", op);
7065 output_asm_insn ("rotqby\t%7,%7,%1", op);
7066
7067 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7068 output_asm_insn ("ai\t%7,%7,%4", op);
7069 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7070 {
7071 output_asm_insn ("il\t%6,%4", op);
7072 output_asm_insn ("a\t%7,%7,%6", op);
7073 }
7074 else
7075 {
7076 output_asm_insn ("ilhu\t%6,%5", op);
7077 output_asm_insn ("iohl\t%6,%4", op);
7078 output_asm_insn ("a\t%7,%7,%6", op);
7079 }
7080
7081 output_asm_insn ("lqd\t%6,0(%7)", op);
7082 output_asm_insn ("rotqby\t%6,%6,%7", op);
7083 output_asm_insn ("a\t%1,%1,%6", op);
7084 }
7085
7086 /* Jump to target. */
7087 output_asm_insn ("br\t%0", op);
7088
7089 final_end_function ();
7090 }
7091
7092 /* Canonicalize a comparison from one we don't have to one we do have. */
7093 static void
7094 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7095 bool op0_preserve_value)
7096 {
7097 if (!op0_preserve_value
7098 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7099 {
7100 rtx tem = *op0;
7101 *op0 = *op1;
7102 *op1 = tem;
7103 *code = (int)swap_condition ((enum rtx_code)*code);
7104 }
7105 }
7106
7107 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7108 to perform. MEM is the memory on which to operate. VAL is the second
7109 operand of the binary operator. BEFORE and AFTER are optional locations to
7110 return the value of MEM either before of after the operation. */
7111 void
7112 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7113 rtx orig_before, rtx orig_after)
7114 {
7115 machine_mode mode = GET_MODE (mem);
7116 rtx before = orig_before, after = orig_after;
7117
7118 if (before == NULL_RTX)
7119 before = gen_reg_rtx (mode);
7120
7121 emit_move_insn (before, mem);
7122
7123 if (code == MULT) /* NAND operation */
7124 {
7125 rtx x = expand_simple_binop (mode, AND, before, val,
7126 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7127 after = expand_simple_unop (mode, NOT, x, after, 1);
7128 }
7129 else
7130 {
7131 after = expand_simple_binop (mode, code, before, val,
7132 after, 1, OPTAB_LIB_WIDEN);
7133 }
7134
7135 emit_move_insn (mem, after);
7136
7137 if (orig_after && after != orig_after)
7138 emit_move_insn (orig_after, after);
7139 }
7140
7141 \f
7142 /* Table of machine attributes. */
7143 static const struct attribute_spec spu_attribute_table[] =
7144 {
7145 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7146 affects_type_identity } */
7147 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7148 false },
7149 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7150 false },
7151 { NULL, 0, 0, false, false, false, NULL, false }
7152 };
7153
7154 /* TARGET overrides. */
7155
7156 #undef TARGET_LRA_P
7157 #define TARGET_LRA_P hook_bool_void_false
7158
7159 #undef TARGET_ADDR_SPACE_POINTER_MODE
7160 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7161
7162 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7163 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7164
7165 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7166 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7167 spu_addr_space_legitimate_address_p
7168
7169 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7170 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7171
7172 #undef TARGET_ADDR_SPACE_SUBSET_P
7173 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7174
7175 #undef TARGET_ADDR_SPACE_CONVERT
7176 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7177
7178 #undef TARGET_INIT_BUILTINS
7179 #define TARGET_INIT_BUILTINS spu_init_builtins
7180 #undef TARGET_BUILTIN_DECL
7181 #define TARGET_BUILTIN_DECL spu_builtin_decl
7182
7183 #undef TARGET_EXPAND_BUILTIN
7184 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7185
7186 #undef TARGET_UNWIND_WORD_MODE
7187 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7188
7189 #undef TARGET_LEGITIMIZE_ADDRESS
7190 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7191
7192 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7193 and .quad for the debugger. When it is known that the assembler is fixed,
7194 these can be removed. */
7195 #undef TARGET_ASM_UNALIGNED_SI_OP
7196 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7197
7198 #undef TARGET_ASM_ALIGNED_DI_OP
7199 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7200
7201 /* The .8byte directive doesn't seem to work well for a 32 bit
7202 architecture. */
7203 #undef TARGET_ASM_UNALIGNED_DI_OP
7204 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7205
7206 #undef TARGET_RTX_COSTS
7207 #define TARGET_RTX_COSTS spu_rtx_costs
7208
7209 #undef TARGET_ADDRESS_COST
7210 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7211
7212 #undef TARGET_SCHED_ISSUE_RATE
7213 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7214
7215 #undef TARGET_SCHED_INIT_GLOBAL
7216 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7217
7218 #undef TARGET_SCHED_INIT
7219 #define TARGET_SCHED_INIT spu_sched_init
7220
7221 #undef TARGET_SCHED_VARIABLE_ISSUE
7222 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7223
7224 #undef TARGET_SCHED_REORDER
7225 #define TARGET_SCHED_REORDER spu_sched_reorder
7226
7227 #undef TARGET_SCHED_REORDER2
7228 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7229
7230 #undef TARGET_SCHED_ADJUST_COST
7231 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7232
7233 #undef TARGET_ATTRIBUTE_TABLE
7234 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7235
7236 #undef TARGET_ASM_INTEGER
7237 #define TARGET_ASM_INTEGER spu_assemble_integer
7238
7239 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7240 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7241
7242 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7243 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7244
7245 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7246 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7247
7248 #undef TARGET_ASM_GLOBALIZE_LABEL
7249 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7250
7251 #undef TARGET_PASS_BY_REFERENCE
7252 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7253
7254 #undef TARGET_FUNCTION_ARG
7255 #define TARGET_FUNCTION_ARG spu_function_arg
7256
7257 #undef TARGET_FUNCTION_ARG_ADVANCE
7258 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7259
7260 #undef TARGET_MUST_PASS_IN_STACK
7261 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7262
7263 #undef TARGET_BUILD_BUILTIN_VA_LIST
7264 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7265
7266 #undef TARGET_EXPAND_BUILTIN_VA_START
7267 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7268
7269 #undef TARGET_SETUP_INCOMING_VARARGS
7270 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7271
7272 #undef TARGET_MACHINE_DEPENDENT_REORG
7273 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7274
7275 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7276 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7277
7278 #undef TARGET_INIT_LIBFUNCS
7279 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7280
7281 #undef TARGET_RETURN_IN_MEMORY
7282 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7283
7284 #undef TARGET_ENCODE_SECTION_INFO
7285 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7286
7287 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7288 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7289
7290 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7291 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7292
7293 #undef TARGET_VECTORIZE_INIT_COST
7294 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7295
7296 #undef TARGET_VECTORIZE_ADD_STMT_COST
7297 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7298
7299 #undef TARGET_VECTORIZE_FINISH_COST
7300 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7301
7302 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7303 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7304
7305 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7306 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7307
7308 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7309 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7310
7311 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7312 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7313
7314 #undef TARGET_SCHED_SMS_RES_MII
7315 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7316
7317 #undef TARGET_SECTION_TYPE_FLAGS
7318 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7319
7320 #undef TARGET_ASM_SELECT_SECTION
7321 #define TARGET_ASM_SELECT_SECTION spu_select_section
7322
7323 #undef TARGET_ASM_UNIQUE_SECTION
7324 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7325
7326 #undef TARGET_LEGITIMATE_ADDRESS_P
7327 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7328
7329 #undef TARGET_LEGITIMATE_CONSTANT_P
7330 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7331
7332 #undef TARGET_TRAMPOLINE_INIT
7333 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7334
7335 #undef TARGET_WARN_FUNC_RETURN
7336 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7337
7338 #undef TARGET_OPTION_OVERRIDE
7339 #define TARGET_OPTION_OVERRIDE spu_option_override
7340
7341 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7342 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7343
7344 #undef TARGET_REF_MAY_ALIAS_ERRNO
7345 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7346
7347 #undef TARGET_ASM_OUTPUT_MI_THUNK
7348 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7349 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7350 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7351
7352 /* Variable tracking should be run after all optimizations which
7353 change order of insns. It also needs a valid CFG. */
7354 #undef TARGET_DELAY_VARTRACK
7355 #define TARGET_DELAY_VARTRACK true
7356
7357 #undef TARGET_CANONICALIZE_COMPARISON
7358 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7359
7360 #undef TARGET_CAN_USE_DOLOOP_P
7361 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7362
7363 struct gcc_target targetm = TARGET_INITIALIZER;
7364
7365 #include "gt-spu.h"